1   /*
2    *  SerialCorpusImpl.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Kalina Bontcheva, 19/Oct/2001
12   *
13   *  $Id: SerialCorpusImpl.java,v 1.29 2002/05/27 13:21:56 nasso Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.util.*;
19  
20  import gate.*;
21  import gate.util.*;
22  import gate.annotation.*;
23  import gate.persist.*;
24  import java.io.*;
25  import java.net.*;
26  import gate.event.*;
27  import gate.creole.*;
28  import gate.creole.ir.*;
29  import gate.creole.ir.lucene.*;
30  import gate.security.SecurityException;
31  
32  //The initial design was to implement this on the basis of a WeakValueHashMap.
33  //However this creates problems, because the user might e.g., add a transient
34  //document to the corpus and then if the Document variable goes out of scope
35  //before sync() is called, nothing will be saved of the new document. Bad!
36  //Instead, to cope with the unloading for memory saving use, I implemented
37  //a documentUnload() method, which sets the in-memory copy to null but can
38  //always restore the doc, because it has its persistence ID.
39  
40  public class SerialCorpusImpl extends
41            AbstractLanguageResource
42                        implements Corpus, CreoleListener,
43                                   DatastoreListener, IndexedCorpus {
44  
45    /** Debug flag */
46    private static final boolean DEBUG = false;
47  
48    static final long serialVersionUID = 3632609241787241616L;
49  
50    protected transient Vector corpusListeners;
51    protected java.util.List docDataList = null;
52  
53    //here I keep document index as key (same as the index in docDataList
54    //which defines the document order) and Documents as value
55    protected transient List documents = null;
56  
57    protected transient IndexManager indexManager= null;
58    protected transient List addedDocs = null;
59    protected transient List removedDocIDs = null;
60    protected transient List changedDocs = null;
61  
62    public SerialCorpusImpl() {
63    }
64  
65    /**
66     * Constructor to create a SerialCorpus from a transient one.
67     * This is called by adopt() to store the transient corpus
68     * and re-route the methods calls to it, until the corpus is
69     * sync-ed on disk. After that, the transientCorpus will always
70     * be null, so the new functionality will be used instead.
71     */
72    protected SerialCorpusImpl(Corpus tCorpus){
73      //copy the corpus name and features from the one in memory
74      this.setName(tCorpus.getName());
75      this.setFeatures(tCorpus.getFeatures());
76  
77      docDataList = new ArrayList();
78      //now cache the names of all docs for future use
79      Iterator iter = tCorpus.getDocumentNames().iterator();
80      while (iter.hasNext())
81        docDataList.add(new DocumentData((String) iter.next(), null));
82  
83      //copy all the documents from the transient corpus
84      documents = new ArrayList();
85      documents.addAll(tCorpus);
86  
87      //make sure we fire events when docs are added/removed/etc
88      Gate.getCreoleRegister().addCreoleListener(this);
89    }
90  
91    /**
92     * Gets the names of the documents in this corpus.
93     * @return a {@link List} of Strings representing the names of the documents
94     * in this corpus.
95     */
96    public List getDocumentNames(){
97      List docsNames = new ArrayList();
98      if(docDataList == null)
99        return docsNames;
100     Iterator iter = docDataList.iterator();
101     while (iter.hasNext()) {
102       DocumentData data = (DocumentData) iter.next();
103       docsNames.add(data.getDocumentName());
104     }
105     return docsNames;
106   }
107 
108   /**
109    * This method should only be used by the Serial Datastore to set
110    */
111   public void setDocumentPersistentID(int index, Object persID){
112     if (index >= docDataList.size()) return;
113     ((DocumentData)docDataList.get(index)).setPersistentID(persID);
114     if (DEBUG) Out.prln("IDs are now: " + docDataList);
115   }
116 
117   /**
118    * Gets the name of a document in this corpus.
119    * @param index the index of the document
120    * @return a String value representing the name of the document at
121    * <tt>index</tt> in this corpus.<P>
122    */
123   public String getDocumentName(int index){
124     if (index >= docDataList.size()) return "No such document";
125 
126     return ((DocumentData) docDataList.get(index)).getDocumentName();
127   }
128 
129   /**
130    * Unloads the document from memory, but calls sync() first, to store the
131    * changes
132    */
133   public void unloadDocument(int index) {
134     //1. check whether its been loaded and is a persistent one
135     // if a persistent doc is not loaded, there's nothing we need to do
136     if ( (! isDocumentLoaded(index)) && isPersistentDocument(index))
137       return;
138 
139     //2. sync the document before releasing it from memory, because the
140     //creole register garbage collects all LRs which are not used any more
141     Document doc = (Document) documents.get(index);
142     try {
143       //if the document is not already adopted, we need to do that first
144       if (doc.getLRPersistenceId() == null) {
145         doc = (Document) this.getDataStore().adopt(doc, null);
146         this.getDataStore().sync(doc);
147         this.setDocumentPersistentID(index, doc.getLRPersistenceId());
148       } else //if it is adopted, just sync it
149         this.getDataStore().sync(doc);
150 
151       //3. remove the document from the memory
152       //do this, only if the saving has succeeded
153       documents.set(index, null);
154 
155     } catch (PersistenceException ex) {
156         throw new GateRuntimeException("Error unloading document from corpus"
157                       + "because document sync failed: " + ex.getMessage());
158     } catch (gate.security.SecurityException ex1) {
159         throw new GateRuntimeException("Error unloading document from corpus"
160                       + "because of document access error: " + ex1.getMessage());
161     }
162 
163   }
164 
165   /**
166    * Unloads a document from memory
167    */
168   public void unloadDocument(Document doc) {
169     if (DEBUG) Out.prln("Document to be unloaded :" + doc.getName());
170     //1. determine the index of the document; if not there, do nothing
171     int index = findDocument(doc);
172     if (index == -1)
173       return;
174     if (DEBUG) Out.prln("Index of doc: " + index);
175     if (DEBUG) Out.prln("Size of corpus: " + documents.size());
176     unloadDocument(index);
177 //    documents.remove(new Integer(index));
178   }
179 
180   /**
181    * This method returns true when the document is already loaded in memory
182    */
183   public boolean isDocumentLoaded(int index) {
184     if (documents == null || documents.isEmpty()) return false;
185     return documents.get(index) != null;
186   }
187 
188   /**
189    * This method returns true when the document is already stored on disk
190    * i.e., is not transient
191    */
192   public boolean isPersistentDocument(int index) {
193     if (documents == null || documents.isEmpty()) return false;
194     return (((DocumentData)docDataList.get(index)).getPersistentID() != null);
195   }
196 
197   /**
198    * Every LR that is a CreoleListener (and other Listeners too) must
199    * override this method and make sure it removes itself from the
200    * objects which it has been listening to. Otherwise, the object will
201    * not be released from memory (memory leak!).
202    */
203   public void cleanup() {
204     if (DEBUG) Out.prln("serial corpus cleanup called");
205     if (corpusListeners != null)
206       corpusListeners = null;
207     if (documents != null)
208       documents.clear();
209     docDataList.clear();
210     Gate.getCreoleRegister().removeCreoleListener(this);
211     if (this.dataStore != null) {
212       this.dataStore.removeDatastoreListener(this);
213     }
214   }
215 
216   /**
217    * Fills this corpus with documents created from files in a directory.
218    * @param filter the file filter used to select files from the target
219    * directory. If the filter is <tt>null</tt> all the files will be accepted.
220    * @param directory the directory from which the files will be picked. This
221    * parameter is an URL for uniformity. It needs to be a URL of type file
222    * otherwise an InvalidArgumentException will be thrown.
223    * An implementation for this method is provided as a static method at
224    * {@link gate.corpora.CorpusImpl#populate(Corpus,URL,FileFilter,boolean)}.
225    * @param encoding the encoding to be used for reading the documents
226    * @param recurseDirectories should the directory be parsed recursively?. If
227    * <tt>true</tt> all the files from the provided directory and all its
228    * children directories (on as many levels as necessary) will be picked if
229    * accepted by the filter otherwise the children directories will be ignored.
230    */
231   public void populate(URL directory, FileFilter filter, String encoding,
232                        boolean recurseDirectories)
233               throws IOException, ResourceInstantiationException{
234     CorpusImpl.populate(this, directory, filter, encoding, recurseDirectories);
235   }
236 
237 
238   public synchronized void removeCorpusListener(CorpusListener l) {
239     if (corpusListeners != null && corpusListeners.contains(l)) {
240       Vector v = (Vector) corpusListeners.clone();
241       v.removeElement(l);
242       corpusListeners = v;
243     }
244   }
245   public synchronized void addCorpusListener(CorpusListener l) {
246     Vector v = corpusListeners == null ? new Vector(2) : (Vector) corpusListeners.clone();
247     if (!v.contains(l)) {
248       v.addElement(l);
249       corpusListeners = v;
250     }
251   }
252   protected void fireDocumentAdded(CorpusEvent e) {
253     if (corpusListeners != null) {
254       Vector listeners = corpusListeners;
255       int count = listeners.size();
256       for (int i = 0; i < count; i++) {
257         ((CorpusListener) listeners.elementAt(i)).documentAdded(e);
258       }
259     }
260   }
261   protected void fireDocumentRemoved(CorpusEvent e) {
262     if (corpusListeners != null) {
263       Vector listeners = corpusListeners;
264       int count = listeners.size();
265       for (int i = 0; i < count; i++) {
266         ((CorpusListener) listeners.elementAt(i)).documentRemoved(e);
267       }
268     }
269   }
270   public void resourceLoaded(CreoleEvent e) {
271   }
272 
273   public void resourceRenamed(Resource resource, String oldName,
274                               String newName){}
275 
276   public void resourceUnloaded(CreoleEvent e) {
277     Resource res = e.getResource();
278     if (res instanceof Document) {
279       Document doc = (Document) res;
280       if (DEBUG)
281         Out.prln("resource Unloaded called ");
282       //remove from the corpus too, if a transient one
283       if (doc.getDataStore() != this.getDataStore()) {
284         this.remove(doc);
285       } else {
286         //unload all occurences
287         int index = indexOf(res);
288         if (index < 0)
289           return;
290         documents.set(index, null);
291         if (DEBUG)
292           Out.prln("corpus: document "+ index + " unloaded and set to null");
293       } //if
294     }
295   }
296   public void datastoreOpened(CreoleEvent e) {
297   }
298   public void datastoreCreated(CreoleEvent e) {
299   }
300   public void datastoreClosed(CreoleEvent e) {
301     if (! e.getDatastore().equals(this.getDataStore()))
302       return;
303     if (this.getDataStore() != null)
304       this.getDataStore().removeDatastoreListener(this);
305     //close this corpus, since it cannot stay open when the DS it comes from
306     //is closed
307     Factory.deleteResource(this);
308   }
309   /**
310    * Called by a datastore when a new resource has been adopted
311    */
312   public void resourceAdopted(DatastoreEvent evt){
313   }
314 
315   /**
316    * Called by a datastore when a resource has been deleted
317    */
318   public void resourceDeleted(DatastoreEvent evt){
319     DataStore ds = (DataStore)evt.getSource();
320     //1. check whether this datastore fired the event. If not, return.
321     if (!ds.equals(this.dataStore))
322       return;
323 
324     Object docID = evt.getResourceID();
325     if (docID == null)
326       return;
327 
328     if (DEBUG) Out.prln("Resource deleted called for: " + docID);
329     //first check if it is this corpus that's been deleted, it must be
330     //unloaded immediately
331     if (docID.equals(this.getLRPersistenceId())) {
332       Factory.deleteResource(this);
333       return;
334     }//if
335 
336     boolean isDirty=false;
337     //the problem here is that I only have the doc persistent ID
338     //and nothing else, so I need to determine the index of the doc first
339     for (int i=0; i< docDataList.size(); i++) {
340       DocumentData docData = (DocumentData)docDataList.get(i);
341       //we've found the correct document
342       //don't break the loop, because it might appear more than once
343       if (docID.equals(docData.getPersistentID())) {
344         remove(i);
345         isDirty = true;
346       }//if
347     }//for loop through the doc data
348 
349     if (isDirty)
350       try {
351         this.dataStore.sync(this);
352       } catch (PersistenceException ex) {
353         throw new GateRuntimeException("SerialCorpusImpl: " + ex.getMessage());
354       } catch (SecurityException sex) {
355         throw new GateRuntimeException("SerialCorpusImpl: " + sex.getMessage());
356       }
357   }//resourceDeleted
358 
359   /**
360    * Called by a datastore when a resource has been wrote into the datastore
361    */
362   public void resourceWritten(DatastoreEvent evt){
363     if (evt.getResourceID().equals(this.getLRPersistenceId())) {
364       thisResourceWritten();
365     }
366   }
367 
368 
369 
370   //List methods
371   //java docs will be automatically copied from the List interface.
372 
373   public int size() {
374     return docDataList.size();
375   }
376 
377   public boolean isEmpty() {
378     return docDataList.isEmpty();
379   }
380 
381   public boolean contains(Object o){
382     //return true if:
383     // - the document data list contains a document with such a name
384     //   and persistent id
385 
386     if(! (o instanceof Document))
387       return false;
388 
389     int index = findDocument((Document) o);
390     if (index < 0)
391       return false;
392     else
393       return true;
394   }
395 
396   public Iterator iterator(){
397     return new Iterator(){
398       Iterator docDataIter = docDataList.iterator();
399 
400       public boolean hasNext() {
401         return docDataIter.hasNext();
402       }
403 
404       public Object next(){
405 
406         //try finding a document with the same name and persistent ID
407         DocumentData docData = (DocumentData) docDataIter.next();
408         int index = docDataList.indexOf(docData);
409         return SerialCorpusImpl.this.get(index);
410       }
411 
412       public void remove() {
413         throw new UnsupportedOperationException("SerialCorpusImpl does not " +
414                     "support remove in the iterators");
415       }
416     }; //return
417 
418   }//iterator
419 
420   public String toString() {
421     return "document data " + docDataList.toString() + " documents " + documents;
422   }
423 
424   public Object[] toArray(){
425     //there is a problem here, because some docs might not be instantiated
426     throw new MethodNotImplementedException(
427                 "toArray() is not implemented for SerialCorpusImpl");
428   }
429 
430   public Object[] toArray(Object[] a){
431     //there is a problem here, because some docs might not be instantiated
432     throw new MethodNotImplementedException(
433                 "toArray(Object[] a) is not implemented for SerialCorpusImpl");
434   }
435 
436   public boolean add(Object o){
437     if (! (o instanceof Document) || o == null)
438       return false;
439     Document doc = (Document) o;
440 
441     //make it accept only docs from its own datastore
442     if (doc.getDataStore() != null
443         && !this.dataStore.equals(doc.getDataStore())) {
444       Err.prln("Error: Persistent corpus can only accept documents " +
445                "from its own datastore!");
446       return false;
447     }//if
448 
449     //add the document with its index in the docDataList
450     //in this case, since it's going to be added to the end
451     //the index will be the size of the docDataList before
452     //the addition
453     DocumentData docData = new DocumentData(doc.getName(),
454                                             doc.getLRPersistenceId());
455     boolean result = docDataList.add(docData);
456     documents.add(doc);
457     documentAdded(doc);
458     fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this,
459                                       doc,
460                                       docDataList.size()-1,
461                                       CorpusEvent.DOCUMENT_ADDED));
462 
463     return result;
464   }
465 
466   public boolean remove(Object o){
467     if (DEBUG) Out.prln("SerialCorpus:Remove object called");
468     if (! (o instanceof Document))
469       return false;
470     Document doc = (Document) o;
471 
472     //see if we can find it first. If not, then judt return
473     int index = findDocument(doc);
474     if (index == -1)
475       return false;
476 
477     if(index < docDataList.size()) { //we found it, so remove it
478       docDataList.remove(index);
479       Document oldDoc =  (Document) documents.remove(index);
480       if (DEBUG) Out.prln("documents after remove of " + oldDoc.getName()
481                           + " are " + documents);
482       documentRemoved(oldDoc.getLRPersistenceId().toString());
483       fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this,
484                                           oldDoc,
485                                           index,
486                                           CorpusEvent.DOCUMENT_REMOVED));
487     }
488 
489     return true;
490   }
491 
492   public int findDocument(Document doc) {
493     boolean found = false;
494     DocumentData docData = null;
495 
496     //first try finding the document in memory
497     int index = documents.indexOf(doc);
498     if (index > -1 && index < docDataList.size())
499       return index;
500 
501     //else try finding a document with the same name and persistent ID
502     Iterator iter = docDataList.iterator();
503     for (index = 0;  iter.hasNext(); index++) {
504       docData = (DocumentData) iter.next();
505       if (docData.getDocumentName().equals(doc.getName()) &&
506           docData.getPersistentID().equals(doc.getLRPersistenceId())) {
507         found = true;
508         break;
509       }
510     }
511     if (found && index < docDataList.size())
512       return index;
513     else
514       return -1;
515   }//findDocument
516 
517   public boolean containsAll(Collection c){
518     Iterator iter = c.iterator();
519     while (iter.hasNext()) {
520       if (! contains(iter.next()))
521         return false;
522     }
523     return true;
524   }
525 
526   public boolean addAll(Collection c){
527     boolean allAdded = true;
528     Iterator iter = c.iterator();
529     while (iter.hasNext()) {
530       if (! add(iter.next()))
531         allAdded = false;
532     }
533     return allAdded;
534   }
535 
536   public boolean addAll(int index, Collection c){
537     throw new UnsupportedOperationException();
538   }
539 
540   public boolean removeAll(Collection c){
541     boolean allRemoved = true;
542     Iterator iter = c.iterator();
543     while (iter.hasNext()) {
544       if (! remove(iter.next()))
545         allRemoved = false;
546     }
547     return allRemoved;
548 
549   }
550 
551   public boolean retainAll(Collection c){
552     throw new UnsupportedOperationException();
553   }
554 
555   public void clear(){
556     documents.clear();
557     docDataList.clear();
558   }
559 
560   public boolean equals(Object o){
561     if (! (o instanceof SerialCorpusImpl))
562       return false;
563     SerialCorpusImpl oCorpus = (SerialCorpusImpl) o;
564     if ((this == null && oCorpus != null) || (oCorpus == null && this != null))
565       return false;
566     if (oCorpus == this)
567       return true;
568     if ((oCorpus.lrPersistentId == this.lrPersistentId ||
569           ( this.lrPersistentId != null &&
570             this.lrPersistentId.equals(oCorpus.lrPersistentId))
571           )
572         &&
573         oCorpus.name.equals(this.name)
574         &&
575         (oCorpus.dataStore == this.dataStore
576           || oCorpus.dataStore.equals(this.dataStore))
577         &&
578         oCorpus.docDataList.equals(docDataList))
579       return true;
580     return false;
581   }
582 
583   public int hashCode(){
584     return docDataList.hashCode();
585   }
586 
587   public Object get(int index){
588       if (index >= docDataList.size())
589         return null;
590 
591       Object res = documents.get(index);
592 
593       if (DEBUG)
594         Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res);
595 
596       //if the document is null, then I must get it from the DS
597       if (res == null) {
598         FeatureMap features = Factory.newFeatureMap();
599         features.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore);
600         try {
601           features.put(DataStore.LR_ID_FEATURE_NAME,
602                       ((DocumentData)docDataList.get(index)).getPersistentID());
603           Resource lr = Factory.createResource( "gate.corpora.DocumentImpl",
604                                                 features);
605           if (DEBUG)
606             Out.prln("Loaded document :" + lr.getName());
607           //change the result to the newly loaded doc
608           res = lr;
609 
610           //finally replace the doc with the instantiated version
611           documents.set(index, lr);
612         } catch (ResourceInstantiationException ex) {
613           Err.prln("Error reading document inside a serialised corpus.");
614           throw new GateRuntimeException(ex.getMessage());
615         }
616       }
617 
618       return res;
619   }
620 
621   public Object set(int index, Object element){
622     throw new gate.util.MethodNotImplementedException();
623         //fire the 2 events
624 /*        fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this,
625                                             oldDoc,
626                                             ((Integer) key).intValue(),
627                                             CorpusEvent.DOCUMENT_REMOVED));
628         fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this,
629                                           newDoc,
630                                           ((Integer) key).intValue(),
631                                           CorpusEvent.DOCUMENT_ADDED));
632 */
633   }
634 
635   public void add(int index, Object o){
636     if (! (o instanceof Document) || o == null)
637       return;
638     Document doc = (Document) o;
639 
640     DocumentData docData = new DocumentData(doc.getName(),
641                                             doc.getLRPersistenceId());
642     docDataList.add(index, docData);
643 
644     documents.add(index, doc);
645     documentAdded(doc);
646     fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this,
647                                       doc,
648                                       index,
649                                       CorpusEvent.DOCUMENT_ADDED));
650 
651   }
652 
653   public Object remove(int index){
654     if (DEBUG) Out.prln("Remove index called");
655 
656     boolean isLoaded = isDocumentLoaded(index);
657     Document removed = (Document) get(index);
658     documentRemoved(removed.getLRPersistenceId().toString());
659     if (!isLoaded){
660       unloadDocument(removed);
661     }
662 
663     docDataList.remove(index);
664     Document res = (Document) documents.remove(index);
665     fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this,
666                                         res,
667                                         index,
668                                         CorpusEvent.DOCUMENT_REMOVED));
669     return res;
670 
671   }
672 
673   public int indexOf(Object o){
674     if (o instanceof Document)
675       return findDocument((Document) o);
676 
677     return -1;
678   }
679 
680   public int lastIndexOf(Object o){
681     throw new gate.util.MethodNotImplementedException();
682   }
683 
684   public ListIterator listIterator(){
685     throw new gate.util.MethodNotImplementedException();
686   }
687 
688   public ListIterator listIterator(int index){
689     throw new gate.util.MethodNotImplementedException();
690   }
691 
692   /**
693    * persistent Corpus does not support this method as all
694    * the documents might no be in memory
695    */
696   public List subList(int fromIndex, int toIndex){
697     throw new gate.util.MethodNotImplementedException();
698   }
699 
700   public void setDataStore(DataStore dataStore)
701                 throws gate.persist.PersistenceException {
702     super.setDataStore( dataStore);
703     if (this.dataStore != null)
704       this.dataStore.addDatastoreListener(this);
705   }
706 
707   public void setTransientSource(Object source) {
708     if (! (source instanceof Corpus))
709       return;
710 
711     //the following initialisation is only valid when we're constructing
712     //this object from a transient one. If it has already been stored in
713     //a datastore, then the initialisation is done in readObject() since
714     //this method is the one called by serialisation, when objects
715     //are restored.
716     if (this.dataStore != null && this.lrPersistentId != null)
717       return;
718 
719     Corpus tCorpus = (Corpus) source;
720 
721     //copy the corpus name and features from the one in memory
722     this.setName(tCorpus.getName());
723     this.setFeatures(tCorpus.getFeatures());
724 
725     docDataList = new ArrayList();
726     //now cache the names of all docs for future use
727     Iterator iter = tCorpus.getDocumentNames().iterator();
728     while (iter.hasNext())
729       docDataList.add(new DocumentData((String) iter.next(), null));
730 
731     //copy all the documents from the transient corpus
732     documents = new ArrayList();
733     documents.addAll(tCorpus);
734 
735     this.addedDocs = new Vector();
736     this.removedDocIDs = new Vector();
737     this.changedDocs = new Vector();
738 
739     //make sure we fire events when docs are added/removed/etc
740     Gate.getCreoleRegister().addCreoleListener(this);
741 
742   }
743 
744   //we don't keep the transient source, so always return null
745   //Sill this must be implemented, coz of the GUI and Factory
746   public Object getTransientSource() {
747     return null;
748   }
749 
750 
751   public Resource init() throws gate.creole.ResourceInstantiationException {
752     super.init();
753 
754     return this;
755 
756   }
757 
758 
759   /**
760    * readObject - calls the default readObject() and then initialises the
761    * transient data
762    *
763    * @serialData Read serializable fields. No optional data read.
764    */
765   private void readObject(ObjectInputStream s)
766       throws IOException, ClassNotFoundException {
767     s.defaultReadObject();
768     documents = new ArrayList(docDataList.size());
769     for (int i = 0; i < docDataList.size(); i++)
770       documents.add(null);
771     corpusListeners = new Vector();
772     //finally set the creole listeners if the LR is like that
773     Gate.getCreoleRegister().addCreoleListener(this);
774     if (this.dataStore != null)
775       this.dataStore.addDatastoreListener(this);
776 
777     //if indexed construct the manager.
778     IndexDefinition  definition = (IndexDefinition) this.getFeatures().get(
779                 GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY);
780     if (definition != null){
781       String className = definition.getIrEngineClassName();
782       try{
783         Class aClass = Class.forName(className);
784         IREngine engine = (IREngine)aClass.newInstance();
785         this.indexManager = engine.getIndexmanager();
786         this.indexManager.setIndexDefinition(definition);
787         this.indexManager.setCorpus(this);
788       }catch(Exception e){
789         e.printStackTrace(Err.getPrintWriter());
790       }
791 //      switch (definition.getIndexType()) {
792 //        case GateConstants.IR_LUCENE_INVFILE:
793 //          this.indexManager = new LuceneIndexManager();
794 //          this.indexManager.setIndexDefinition(definition);
795 //          this.indexManager.setCorpus(this);
796 //          break;
797 //      }
798       this.addedDocs = new Vector();
799       this.removedDocIDs = new Vector();
800       this.changedDocs = new Vector();
801     }
802   }//readObject
803 
804   public void setIndexDefinition(IndexDefinition definition) {
805     if (definition != null){
806       this.getFeatures().put(GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY,
807                               definition);
808 
809       String className = definition.getIrEngineClassName();
810       try{
811         Class aClass = Class.forName(className);
812         IREngine engine = (IREngine)aClass.newInstance();
813         this.indexManager = engine.getIndexmanager();
814         this.indexManager.setIndexDefinition(definition);
815         this.indexManager.setCorpus(this);
816       }catch(Exception e){
817         e.printStackTrace(Err.getPrintWriter());
818       }
819 //    switch (definition.getIndexType()) {
820 //      case GateConstants.IR_LUCENE_INVFILE:
821 //        this.indexManager = new LuceneIndexManager();
822 //        this.indexManager.setIndexDefinition(definition);
823 //        this.indexManager.setCorpus(this);
824 //        break;
825 //    }
826       this.addedDocs = new Vector();
827       this.removedDocIDs = new Vector();
828       this.changedDocs = new Vector();
829     }
830   }
831 
832   public IndexDefinition getIndexDefinition() {
833     return (IndexDefinition) this.getFeatures().get(
834                            GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY);
835   }
836 
837   public IndexManager getIndexManager() {
838     return this.indexManager;
839   }
840 
841   public IndexStatistics getIndexStatistics(){
842     return (IndexStatistics) this.getFeatures().get(
843                            GateConstants.CORPUS_INDEX_STATISTICS_FEATURE_KEY);
844   }
845 
846   private void documentAdded(Document doc) {
847     if (indexManager != null){
848       addedDocs.add(doc);
849     }
850   }
851 
852   private void documentRemoved(String lrID) {
853     if (indexManager != null) {
854       removedDocIDs.add(lrID);
855     }
856   }
857 
858   private void thisResourceWritten() {
859     if (indexManager != null) {
860       try {
861         for (int i = 0; i<documents.size(); i++) {
862           if (documents.get(i) != null) {
863             Document doc = (Document) documents.get(i);
864             if (!addedDocs.contains(doc) && doc.isModified()) {
865               changedDocs.add(doc);
866             }
867           }
868         }
869         indexManager.sync(addedDocs, removedDocIDs, changedDocs);
870       } catch (IndexException ie) {
871         ie.printStackTrace();
872       }
873     }
874   }
875 
876 }