1   /*
2    *  DatabaseCorpusImpl.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Marin Dimitrov, 05/Nov/2001
12   *
13   *  $Id: DatabaseCorpusImpl.java,v 1.17 2003/07/08 08:36:58 marin Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.util.*;
19  
20  import junit.framework.*;
21  
22  import gate.*;
23  import gate.persist.*;
24  import gate.annotation.*;
25  import gate.creole.*;
26  import gate.event.*;
27  import gate.util.*;
28  import gate.security.SecurityInfo;
29  
30  
31  public class DatabaseCorpusImpl extends CorpusImpl
32                                  implements DatastoreListener,
33                                             EventAwareCorpus {
34  
35    /** Debug flag */
36    private static final boolean DEBUG = false;
37  
38    private boolean featuresChanged;
39    private boolean nameChanged;
40    /**
41     * The listener for the events coming from the features.
42     */
43    protected EventsHandler eventHandler;
44    protected List documentData;
45    protected List removedDocuments;
46    protected List  addedDocuments;
47  
48    public DatabaseCorpusImpl() {
49      super();
50    }
51  
52  
53    public DatabaseCorpusImpl(String _name,
54                              DatabaseDataStore _ds,
55                              Long _persistenceID,
56                              FeatureMap _features,
57                              Vector _dbDocs) {
58  
59      super();
60  
61      this.name = _name;
62      this.dataStore = _ds;
63      this.lrPersistentId = _persistenceID;
64      this.features = _features;
65      this.documentData =  _dbDocs;
66      this.supportList = new ArrayList(this.documentData.size());
67      this.removedDocuments = new ArrayList();
68      //just allocate space for this one, don't initialize it -
69      //invokations of add() will add elements to it
70      this.addedDocuments = new ArrayList();
71  
72      //init the document list
73      for (int i=0; i< this.documentData.size(); i++) {
74        this.supportList.add(null);
75      }
76  
77      this.featuresChanged = false;
78      this.nameChanged = false;
79  
80      //3. add the listeners for the features
81      if (eventHandler == null)
82        eventHandler = new EventsHandler();
83      this.features.addFeatureMapListener(eventHandler);
84  
85  
86      //4. add self as listener for the data store, so that we'll know when the DS is
87      //synced and we'll clear the isXXXChanged flags
88      this.dataStore.addDatastoreListener(this);
89    }
90  
91  
92    public boolean add(Object o){
93  
94      Assert.assertNotNull(o);
95      boolean result = false;
96  
97      //accept only documents
98      if (false == o instanceof Document) {
99        throw new IllegalArgumentException();
100     }
101 
102     Document doc = (Document)o;
103 
104     //assert docs are either transient or from the same datastore
105     if (isValidForAdoption(doc)) {
106       result = super.add(doc);
107     }
108     else {
109       return false;
110     }
111 
112     //add to doc data too
113 /* Was:
114     DocumentData newDocData = new DocumentData(doc.getName(),null);
115 */
116     DocumentData newDocData = new DocumentData(doc.getName(),
117                                                doc.getLRPersistenceId());
118 
119     this.documentData.add(newDocData);
120 
121     //add the LRID to the set of newly added documents so that upon sync() a reference
122     // from the doc to the corpus will be added in the database
123     if (null != doc.getLRPersistenceId()) {
124       this.addedDocuments.add(doc.getLRPersistenceId());
125 //Out.prln("adding [" + doc.getLRPersistenceId() + "] to NewlyAddedDocs...");
126     }
127 
128     if (result) {
129       fireDocumentAdded(new CorpusEvent(this,
130                                         doc,
131                                         this.supportList.size()-1,
132                                         CorpusEvent.DOCUMENT_ADDED));
133     }
134 
135     return result;
136   }
137 
138 
139   public void add(int index, Object element){
140 
141     Assert.assertNotNull(element);
142     Assert.assertTrue(index >= 0);
143 
144     long    collInitialSize = this.supportList.size();
145 
146     //accept only documents
147     if (false == element instanceof Document) {
148       throw new IllegalArgumentException();
149     }
150 
151     Document doc = (Document)element;
152 
153     //assert docs are either transient or from the same datastore
154     if (isValidForAdoption(doc)) {
155       super.add(index,doc);
156 
157       //add to doc data too
158       DocumentData newDocData = new DocumentData(doc.getName(),null);
159       this.documentData.add(index,newDocData);
160 
161       //add the LRID to the set of newly added documents so that upon sync() a reference
162       // from the doc to the corpus will be added in the database
163       if (null != doc.getLRPersistenceId()) {
164 
165       this.addedDocuments.add(doc.getLRPersistenceId());
166 //Out.prln("adding ["+doc.getLRPersistenceId()+"] to NewlyAddedDocs...");
167       }
168 
169       //if added then fire event
170       if (this.supportList.size() > collInitialSize) {
171         fireDocumentAdded(new CorpusEvent(this,
172                                           doc,
173                                           index,
174                                           CorpusEvent.DOCUMENT_ADDED));
175       }
176     }
177   }
178 
179 
180 
181   public boolean addAll(Collection c){
182 
183     boolean collectionChanged = false;
184 
185     Iterator it = c.iterator();
186     while (it.hasNext()) {
187       Document doc = (Document)it.next();
188       if (isValidForAdoption(doc)) {
189         collectionChanged |= add(doc);
190       }
191     }
192 
193     return collectionChanged;
194   }
195 
196 
197   public boolean addAll(int index, Collection c){
198 
199     Assert.assertTrue(index >=0);
200 
201     //funny enough add(index,element) returns void and not boolean
202     //so we can't use it
203     boolean collectionChanged = false;
204     int collInitialSize = this.supportList.size();
205     int currIndex = index;
206 
207     Iterator it = c.iterator();
208     while (it.hasNext()) {
209       Document doc = (Document)it.next();
210       if (isValidForAdoption(doc)) {
211         add(currIndex++,doc);
212       }
213     }
214 
215     return (this.supportList.size() > collInitialSize);
216   }
217 
218 
219   private boolean isValidForAdoption(LanguageResource lr) {
220 
221     Long lrID = (Long)lr.getLRPersistenceId();
222 
223     if (null == lrID ||
224         (this.getDataStore() != null && lr.getDataStore().equals(this.getDataStore()))) {
225       return true;
226     }
227     else {
228       return false;
229     }
230   }
231 
232   public void resourceAdopted(DatastoreEvent evt){
233   }
234 
235   public void resourceDeleted(DatastoreEvent evt){
236 
237     Assert.assertNotNull(evt);
238     Long  deletedID = (Long)evt.getResourceID();
239     Assert.assertNotNull(deletedID);
240 
241     //unregister self as listener from the DataStore
242     if (deletedID.equals(this.getLRPersistenceId())) {
243       //someone deleted this corpus
244       this.supportList.clear();
245       getDataStore().removeDatastoreListener(this);
246     }
247 
248     //check if the ID is of a document the corpus contains
249     Iterator it = this.supportList.iterator();
250     while (it.hasNext()) {
251       Document doc = (Document)it.next();
252       if (doc.getLRPersistenceId().equals(deletedID)) {
253         this.supportList.remove(doc);
254         break;
255       }
256     }
257   }
258 
259   public void resourceWritten(DatastoreEvent evt){
260     Assert.assertNotNull(evt);
261     Assert.assertNotNull(evt.getResourceID());
262 
263     //is the event for us?
264     if (evt.getResourceID().equals(this.getLRPersistenceId())) {
265       //wow, the event is for me
266       //clear all flags, the content is synced with the DB
267       this.featuresChanged =
268         this.nameChanged = false;
269 
270       this.removedDocuments.clear();
271       this.addedDocuments.clear();
272     }
273   }
274 
275 
276   public void resourceUnloaded(CreoleEvent e) {
277 
278     Assert.assertNotNull(e);
279     Assert.assertNotNull(e.getResource());
280 
281     Resource res = e.getResource();
282 
283     if (res instanceof Document) {
284 
285       Document doc = (Document) res;
286 
287       if (DEBUG) {
288         Out.prln("resource Unloaded called ");
289       }
290 
291       //remove from the corpus too, if a transient one
292       if (null == doc.getLRPersistenceId()) {
293         //@FIXME - not sure we need this
294         super.remove(doc);
295       }
296       else {
297         //unload all occurences
298         //see if we can find it first. If not, then judt return
299         int index = findDocument(doc);
300         if (index == -1) {
301           //not our document
302           return;
303         }
304         else {
305           //3. unload from internal data structures
306 
307           //@FIXME - not sure we need this
308           //super.remove(doc);
309 
310           //remove from the list of loaded documents
311 //System.out.println("resourceUnloaded called -- removing doc[" + index +"] from supportList...");
312           //WAS: Document oldDoc = (Document)this.supportList.remove(index);
313           this.supportList.set(index, null);
314 
315           if (DEBUG)
316             Out.prln("corpus: document " + index + " unloaded and set to null");
317         } //else
318       } //else
319     } //if
320   }
321 
322 
323   public boolean isResourceChanged(int changeType) {
324 
325     switch(changeType) {
326 
327       case EventAwareLanguageResource.RES_FEATURES:
328         return this.featuresChanged;
329       case EventAwareLanguageResource.RES_NAME:
330         return this.nameChanged;
331       default:
332         throw new IllegalArgumentException();
333     }
334   }
335 
336   /**
337    * Returns true of an LR has been modified since the last sync.
338    * Always returns false for transient LRs.
339    */
340   public boolean isModified() {
341     return this.isResourceChanged(EventAwareLanguageResource.RES_FEATURES) ||
342             this.isResourceChanged(EventAwareLanguageResource.RES_NAME);
343   }
344 
345 
346 
347   /** Sets the name of this resource*/
348   public void setName(String name){
349     super.setName(name);
350 
351     this.nameChanged = true;
352   }
353 
354 
355   /** Set the feature set */
356   public void setFeatures(FeatureMap features) {
357     //1. save them first, so we can remove the listener
358     FeatureMap oldFeatures = this.features;
359 
360     super.setFeatures(features);
361 
362     this.featuresChanged = true;
363 
364     //4. sort out the listeners
365     if (eventHandler != null)
366       oldFeatures.removeFeatureMapListener(eventHandler);
367     else
368       eventHandler = new EventsHandler();
369     this.features.addFeatureMapListener(eventHandler);
370   }
371 
372 
373   /**
374    * All the events from the features are handled by
375    * this inner class.
376    */
377   class EventsHandler implements gate.event.FeatureMapListener {
378     public void featureMapUpdated(){
379       //tell the document that its features have been updated
380       featuresChanged = true;
381     }
382   }
383 
384   /**
385    * Overriden to remove the features listener, when the document is closed.
386    */
387   public void cleanup() {
388     super.cleanup();
389     if (eventHandler != null)
390       this.features.removeFeatureMapListener(eventHandler);
391   }///inner class EventsHandler
392 
393 
394 
395   public void setInitData__$$__(Object data) {
396 
397     HashMap initData = (HashMap)data;
398 
399     this.name = (String)initData.get("CORP_NAME");
400     this.dataStore = (DatabaseDataStore)initData.get("DS");
401     this.lrPersistentId = (Long)initData.get("LR_ID");
402     this.features = (FeatureMap)initData.get("CORP_FEATURES");
403     this.supportList = new ArrayList((List)initData.get("CORP_SUPPORT_LIST"));
404 
405     this.documentData = new ArrayList(this.supportList.size());
406     this.removedDocuments = new ArrayList();
407     this.addedDocuments = new ArrayList();
408 
409     //init the documentData list
410     for (int i=0; i< this.supportList.size(); i++) {
411       Document dbDoc = (Document)this.supportList.get(i);
412       DocumentData dd = new DocumentData(dbDoc.getName(),dbDoc.getLRPersistenceId());
413       this.documentData.add(dd);
414     }
415 
416     this.featuresChanged = false;
417     this.nameChanged = false;
418 
419      //3. add the listeners for the features
420     if (eventHandler == null)
421       eventHandler = new EventsHandler();
422     this.features.addFeatureMapListener(eventHandler);
423 
424 
425     //4. add self as listener for the data store, so that we'll know when the DS is
426     //synced and we'll clear the isXXXChanged flags
427     this.dataStore.addDatastoreListener(this);
428   }
429 
430   public Object getInitData__$$__(Object initData) {
431     return null;
432   }
433 
434   /**
435    * Gets the names of the documents in this corpus.
436    * @return a {@link List} of Strings representing the names of the documents
437    * in this corpus.
438    */
439   public List getDocumentNames(){
440 
441     List docsNames = new ArrayList();
442 
443     if(this.documentData == null)
444       return docsNames;
445 
446     Iterator iter = this.documentData.iterator();
447     while (iter.hasNext()) {
448       DocumentData data = (DocumentData)iter.next();
449       docsNames.add(data.getDocumentName());
450     }
451 
452     return docsNames;
453   }
454 
455 
456   /**
457    * Gets the name of a document in this corpus.
458    * @param index the index of the document
459    * @return a String value representing the name of the document at
460    * <tt>index</tt> in this corpus.<P>
461    */
462   public String getDocumentName(int index){
463 
464     if (index >= this.documentData.size()) return "No such document";
465 
466     return ((DocumentData)this.documentData.get(index)).getDocumentName();
467   }
468 
469   /**
470    * returns a document in the coprus by index
471    * @param index the index of the document
472    * @return an Object value representing DatabaseDocumentImpl
473    */
474   public Object get(int index){
475 
476     //0. preconditions
477     Assert.assertTrue(index >= 0);
478     Assert.assertTrue(index < this.documentData.size());
479     Assert.assertTrue(index < this.supportList.size());
480     Assert.assertTrue(this.documentData.size() == this.supportList.size());
481 
482     if (index >= this.documentData.size())
483       return null;
484 
485     Object res = this.supportList.get(index);
486 
487     //if the document is null, then I must get it from the database
488     if (null == res) {
489       Long currLRID = (Long)((DocumentData)this.documentData.get(index)).getPersistentID();
490       FeatureMap params = Factory.newFeatureMap();
491       params.put(DataStore.DATASTORE_FEATURE_NAME, this.getDataStore());
492       params.put(DataStore.LR_ID_FEATURE_NAME, currLRID);
493 
494       try {
495         Document dbDoc = (Document)Factory.createResource(DBHelper.DOCUMENT_CLASS, params);
496 
497         if (DEBUG) {
498           Out.prln("Loaded document :" + dbDoc.getName());
499         }
500 
501         //change the result to the newly loaded doc
502         res = dbDoc;
503 
504         //finally replace the doc with the instantiated version
505         Assert.assertNull(this.supportList.get(index));
506         this.supportList.set(index, dbDoc);
507       }
508       catch (ResourceInstantiationException ex) {
509         Err.prln("Error reading document inside a serialised corpus.");
510         throw new GateRuntimeException(ex.getMessage());
511       }
512     }
513 
514     return res;
515   }
516 
517   public Object remove(int index){
518 
519     //1. get the persistent id and add it to the removed list
520     DocumentData docData = (DocumentData)this.documentData.get(index);
521     Long removedID = (Long)docData.getPersistentID();
522 //    Assert.assertTrue(null != removedID);
523     //removedID may be NULL if the doc is still transient
524 
525     //2. add to the list of removed documents but only if it's not newly added
526     //othewrwise just ignore
527     if (null != removedID && false == this.addedDocuments.contains(removedID)) {
528       this.removedDocuments.add(removedID);
529 //Out.prln("adding ["+removedID+"] to RemovedDocs...");
530     }
531 
532     //3. delete
533     this.documentData.remove(index);
534     Document res = (Document)this.supportList.remove(index);
535 
536     //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference
537     // from the doc to the corpus will NOT be added in the database
538     if (this.addedDocuments.contains(removedID)) {
539       this.addedDocuments.remove(removedID);
540 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs...");
541     }
542 
543     //5, fire events
544     fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this,
545                                         res,
546                                         index,
547                                         CorpusEvent.DOCUMENT_REMOVED));
548     return res;
549 
550   }
551 
552 
553   public boolean remove(Object obj){
554 
555     //0. preconditions
556     Assert.assertNotNull(obj);
557     Assert.assertTrue(obj instanceof DatabaseDocumentImpl);
558 
559     if (false == obj instanceof Document) {
560       return false;
561     }
562 
563     Document doc = (Document) obj;
564 
565     //see if we can find it first. If not, then judt return
566     int index = findDocument(doc);
567     if (index == -1) {
568       return false;
569     }
570 
571     if(index < this.documentData.size()) {
572       //we found it, so remove it
573 
574       //1. get the persistent id and add it to the removed list
575       DocumentData docData = (DocumentData)this.documentData.get(index);
576       Long removedID = (Long)docData.getPersistentID();
577       //Assert.assertTrue(null != removedID);
578       //removed ID may be null - doc is still transient
579 
580       //2. add to the list of removed documents
581       if (null != removedID && false == this.addedDocuments.contains(removedID)) {
582         this.removedDocuments.add(removedID);
583 //Out.prln("adding ["+removedID+"] to RemovedDocs...");
584       }
585 
586       //3. delete
587       this.documentData.remove(index);
588       Document oldDoc = (Document) this.supportList.remove(index);
589 
590       //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference
591       // from the doc to the corpus will NOT be added in the database
592       if (this.addedDocuments.contains(removedID)) {
593         this.addedDocuments.remove(removedID);
594 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs...");
595       }
596 
597       //5. fire events
598       fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this,
599                                           oldDoc,
600                                           index,
601                                           CorpusEvent.DOCUMENT_REMOVED));
602     }
603 
604     return true;
605   }
606 
607 
608   public int findDocument(Document doc) {
609 
610     boolean found = false;
611     DocumentData docData = null;
612 
613     //first try finding the document in memory
614     int index = this.supportList.indexOf(doc);
615 
616     if (index > -1 && index < this.documentData.size()) {
617       return index;
618     }
619 
620     //else try finding a document with the same name and persistent ID
621     Iterator iter = this.documentData.iterator();
622 
623     for (index = 0;  iter.hasNext(); index++) {
624       docData = (DocumentData) iter.next();
625       if (docData.getDocumentName().equals(doc.getName()) &&
626           docData.getPersistentID().equals(doc.getLRPersistenceId())) {
627         found = true;
628         break;
629       }
630     }
631 
632     if (found && index < this.documentData.size()) {
633       return index;
634     }
635     else {
636       return -1;
637     }
638   }//findDocument
639 
640 
641   public boolean contains(Object o){
642     //return true if:
643     // - the document data list contains a document with such a name
644     //   and persistent id
645 
646     if(false == o instanceof Document)
647       return false;
648 
649     int index = findDocument((Document) o);
650 
651     if (index < 0) {
652       return false;
653     }
654     else {
655       return true;
656     }
657   }
658 
659   public Iterator iterator(){
660     return new DatabaseCorpusIterator(this.documentData);
661   }
662 
663   public List getLoadedDocuments() {
664     return new ArrayList(this.supportList);
665   }
666 
667   public List getRemovedDocuments() {
668     return new ArrayList(this.removedDocuments);
669   }
670 
671   public List getAddedDocuments() {
672     return new ArrayList(this.addedDocuments);
673   }
674 
675   private class DatabaseCorpusIterator implements Iterator {
676 
677       private Iterator docDataIter;
678       private List docDataList;
679 
680       public DatabaseCorpusIterator(List docDataList) {
681         this.docDataList = docDataList;
682         this.docDataIter = this.docDataList.iterator();
683       }
684 
685       public boolean hasNext() {
686         return docDataIter.hasNext();
687       }
688 
689       public Object next(){
690 
691         //try finding a document with the same name and persistent ID
692         DocumentData docData = (DocumentData)docDataIter.next();
693         int index = this.docDataList.indexOf(docData);
694         return DatabaseCorpusImpl.this.get(index);
695       }
696 
697       public void remove() {
698         throw new UnsupportedOperationException("DatabaseCorpusImpl does not " +
699                     "support remove in the iterators");
700       }
701   }
702 
703 
704   /**
705    * Unloads the document from memory, but calls sync() first, to store the
706    * changes
707    */
708   public void unloadDocument(int index) {
709 
710     //preconditions
711     Assert.assertTrue(index >= 0);
712 
713     //1. check whether its been loaded and is a persistent one
714     // if a persistent doc is not loaded, there's nothing we need to do
715     if ( (! isDocumentLoaded(index)) && isPersistentDocument(index)) {
716       return;
717     }
718 
719     //2. sync the document before releasing it from memory, because the
720     //creole register garbage collects all LRs which are not used any more
721     Document doc = (Document)this.supportList.get(index);
722     Assert.assertNotNull(doc);
723 
724     try {
725 
726       //if the document is not already adopted, we need to do that first
727       if (doc.getLRPersistenceId() == null) {
728 
729         //3.2 get the security info for the corpus
730         SecurityInfo si = this.getDataStore().getSecurityInfo(this);
731         Document dbDoc = (Document) this.getDataStore().adopt(doc, si);
732       }
733       else {
734         //if it is adopted, just sync it
735         this.getDataStore().sync(doc);
736       }
737 
738       //3. remove the document from the memory
739       //do this, only if the saving has succeeded
740       // WAS this.supportList.remove(index);
741       this.supportList.set(index,null);
742     }
743     catch (PersistenceException pex) {
744       throw new GateRuntimeException("Error unloading document from corpus"
745                       + "because document sync failed: " + pex.getMessage());
746     }
747     catch (gate.security.SecurityException sex) {
748       throw new GateRuntimeException("Error unloading document from corpus"
749                       + "because of document access error: " + sex.getMessage());
750     }
751 
752   }
753 
754   /**
755    * Unloads a document from memory
756    */
757   public void unloadDocument(Document doc) {
758 
759     Assert.assertNotNull(doc);
760 
761     //1. determine the index of the document; if not there, do nothing
762     int index = findDocument(doc);
763 
764     if (index == -1) {
765       return;
766     }
767 
768     unloadDocument(index);
769   }
770 
771 
772   /**
773    * This method returns true when the document is already loaded in memory
774    */
775   public boolean isDocumentLoaded(int index) {
776 
777     //preconditions
778     Assert.assertTrue(index >= 0);
779 
780     if (this.supportList == null || this.supportList.isEmpty()) {
781       return false;
782     }
783 
784     return this.supportList.get(index) != null;
785   }
786 
787   /**
788    * This method returns true when the document is already stored on disk
789    * i.e., is not transient
790    */
791   public boolean isPersistentDocument(int index) {
792 
793     //preconditions
794     Assert.assertTrue(index >= 0);
795 
796     if (this.supportList == null || this.supportList.isEmpty()) {
797       return false;
798     }
799 
800     return (((DocumentData)this.documentData.get(index)).getPersistentID() != null);
801   }
802 
803 
804   public boolean equals(Object o){
805 
806     if (! (o instanceof DatabaseCorpusImpl))
807       return false;
808 
809     DatabaseCorpusImpl dbCorp = (DatabaseCorpusImpl)o;
810 
811     if (this.getDataStore() != null && this.getDataStore() != dbCorp.getDataStore()) {
812       return false;
813     }
814 
815     if (this.getLRPersistenceId() != null && this.getLRPersistenceId() != dbCorp.getLRPersistenceId()) {
816       return false;
817     }
818 
819     return supportList.equals(o);
820   }
821 
822 
823 }