|
SerialCorpusImpl |
|
1 /* 2 * SerialCorpusImpl.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Kalina Bontcheva, 19/Oct/2001 12 * 13 * $Id: SerialCorpusImpl.java,v 1.29 2002/05/27 13:21:56 nasso Exp $ 14 */ 15 16 package gate.corpora; 17 18 import java.util.*; 19 20 import gate.*; 21 import gate.util.*; 22 import gate.annotation.*; 23 import gate.persist.*; 24 import java.io.*; 25 import java.net.*; 26 import gate.event.*; 27 import gate.creole.*; 28 import gate.creole.ir.*; 29 import gate.creole.ir.lucene.*; 30 import gate.security.SecurityException; 31 32 //The initial design was to implement this on the basis of a WeakValueHashMap. 33 //However this creates problems, because the user might e.g., add a transient 34 //document to the corpus and then if the Document variable goes out of scope 35 //before sync() is called, nothing will be saved of the new document. Bad! 36 //Instead, to cope with the unloading for memory saving use, I implemented 37 //a documentUnload() method, which sets the in-memory copy to null but can 38 //always restore the doc, because it has its persistence ID. 39 40 public class SerialCorpusImpl extends 41 AbstractLanguageResource 42 implements Corpus, CreoleListener, 43 DatastoreListener, IndexedCorpus { 44 45 /** Debug flag */ 46 private static final boolean DEBUG = false; 47 48 static final long serialVersionUID = 3632609241787241616L; 49 50 protected transient Vector corpusListeners; 51 protected java.util.List docDataList = null; 52 53 //here I keep document index as key (same as the index in docDataList 54 //which defines the document order) and Documents as value 55 protected transient List documents = null; 56 57 protected transient IndexManager indexManager= null; 58 protected transient List addedDocs = null; 59 protected transient List removedDocIDs = null; 60 protected transient List changedDocs = null; 61 62 public SerialCorpusImpl() { 63 } 64 65 /** 66 * Constructor to create a SerialCorpus from a transient one. 67 * This is called by adopt() to store the transient corpus 68 * and re-route the methods calls to it, until the corpus is 69 * sync-ed on disk. After that, the transientCorpus will always 70 * be null, so the new functionality will be used instead. 71 */ 72 protected SerialCorpusImpl(Corpus tCorpus){ 73 //copy the corpus name and features from the one in memory 74 this.setName(tCorpus.getName()); 75 this.setFeatures(tCorpus.getFeatures()); 76 77 docDataList = new ArrayList(); 78 //now cache the names of all docs for future use 79 Iterator iter = tCorpus.getDocumentNames().iterator(); 80 while (iter.hasNext()) 81 docDataList.add(new DocumentData((String) iter.next(), null)); 82 83 //copy all the documents from the transient corpus 84 documents = new ArrayList(); 85 documents.addAll(tCorpus); 86 87 //make sure we fire events when docs are added/removed/etc 88 Gate.getCreoleRegister().addCreoleListener(this); 89 } 90 91 /** 92 * Gets the names of the documents in this corpus. 93 * @return a {@link List} of Strings representing the names of the documents 94 * in this corpus. 95 */ 96 public List getDocumentNames(){ 97 List docsNames = new ArrayList(); 98 if(docDataList == null) 99 return docsNames; 100 Iterator iter = docDataList.iterator(); 101 while (iter.hasNext()) { 102 DocumentData data = (DocumentData) iter.next(); 103 docsNames.add(data.getDocumentName()); 104 } 105 return docsNames; 106 } 107 108 /** 109 * This method should only be used by the Serial Datastore to set 110 */ 111 public void setDocumentPersistentID(int index, Object persID){ 112 if (index >= docDataList.size()) return; 113 ((DocumentData)docDataList.get(index)).setPersistentID(persID); 114 if (DEBUG) Out.prln("IDs are now: " + docDataList); 115 } 116 117 /** 118 * Gets the name of a document in this corpus. 119 * @param index the index of the document 120 * @return a String value representing the name of the document at 121 * <tt>index</tt> in this corpus.<P> 122 */ 123 public String getDocumentName(int index){ 124 if (index >= docDataList.size()) return "No such document"; 125 126 return ((DocumentData) docDataList.get(index)).getDocumentName(); 127 } 128 129 /** 130 * Unloads the document from memory, but calls sync() first, to store the 131 * changes 132 */ 133 public void unloadDocument(int index) { 134 //1. check whether its been loaded and is a persistent one 135 // if a persistent doc is not loaded, there's nothing we need to do 136 if ( (! isDocumentLoaded(index)) && isPersistentDocument(index)) 137 return; 138 139 //2. sync the document before releasing it from memory, because the 140 //creole register garbage collects all LRs which are not used any more 141 Document doc = (Document) documents.get(index); 142 try { 143 //if the document is not already adopted, we need to do that first 144 if (doc.getLRPersistenceId() == null) { 145 doc = (Document) this.getDataStore().adopt(doc, null); 146 this.getDataStore().sync(doc); 147 this.setDocumentPersistentID(index, doc.getLRPersistenceId()); 148 } else //if it is adopted, just sync it 149 this.getDataStore().sync(doc); 150 151 //3. remove the document from the memory 152 //do this, only if the saving has succeeded 153 documents.set(index, null); 154 155 } catch (PersistenceException ex) { 156 throw new GateRuntimeException("Error unloading document from corpus" 157 + "because document sync failed: " + ex.getMessage()); 158 } catch (gate.security.SecurityException ex1) { 159 throw new GateRuntimeException("Error unloading document from corpus" 160 + "because of document access error: " + ex1.getMessage()); 161 } 162 163 } 164 165 /** 166 * Unloads a document from memory 167 */ 168 public void unloadDocument(Document doc) { 169 if (DEBUG) Out.prln("Document to be unloaded :" + doc.getName()); 170 //1. determine the index of the document; if not there, do nothing 171 int index = findDocument(doc); 172 if (index == -1) 173 return; 174 if (DEBUG) Out.prln("Index of doc: " + index); 175 if (DEBUG) Out.prln("Size of corpus: " + documents.size()); 176 unloadDocument(index); 177 // documents.remove(new Integer(index)); 178 } 179 180 /** 181 * This method returns true when the document is already loaded in memory 182 */ 183 public boolean isDocumentLoaded(int index) { 184 if (documents == null || documents.isEmpty()) return false; 185 return documents.get(index) != null; 186 } 187 188 /** 189 * This method returns true when the document is already stored on disk 190 * i.e., is not transient 191 */ 192 public boolean isPersistentDocument(int index) { 193 if (documents == null || documents.isEmpty()) return false; 194 return (((DocumentData)docDataList.get(index)).getPersistentID() != null); 195 } 196 197 /** 198 * Every LR that is a CreoleListener (and other Listeners too) must 199 * override this method and make sure it removes itself from the 200 * objects which it has been listening to. Otherwise, the object will 201 * not be released from memory (memory leak!). 202 */ 203 public void cleanup() { 204 if (DEBUG) Out.prln("serial corpus cleanup called"); 205 if (corpusListeners != null) 206 corpusListeners = null; 207 if (documents != null) 208 documents.clear(); 209 docDataList.clear(); 210 Gate.getCreoleRegister().removeCreoleListener(this); 211 if (this.dataStore != null) { 212 this.dataStore.removeDatastoreListener(this); 213 } 214 } 215 216 /** 217 * Fills this corpus with documents created from files in a directory. 218 * @param filter the file filter used to select files from the target 219 * directory. If the filter is <tt>null</tt> all the files will be accepted. 220 * @param directory the directory from which the files will be picked. This 221 * parameter is an URL for uniformity. It needs to be a URL of type file 222 * otherwise an InvalidArgumentException will be thrown. 223 * An implementation for this method is provided as a static method at 224 * {@link gate.corpora.CorpusImpl#populate(Corpus,URL,FileFilter,boolean)}. 225 * @param encoding the encoding to be used for reading the documents 226 * @param recurseDirectories should the directory be parsed recursively?. If 227 * <tt>true</tt> all the files from the provided directory and all its 228 * children directories (on as many levels as necessary) will be picked if 229 * accepted by the filter otherwise the children directories will be ignored. 230 */ 231 public void populate(URL directory, FileFilter filter, String encoding, 232 boolean recurseDirectories) 233 throws IOException, ResourceInstantiationException{ 234 CorpusImpl.populate(this, directory, filter, encoding, recurseDirectories); 235 } 236 237 238 public synchronized void removeCorpusListener(CorpusListener l) { 239 if (corpusListeners != null && corpusListeners.contains(l)) { 240 Vector v = (Vector) corpusListeners.clone(); 241 v.removeElement(l); 242 corpusListeners = v; 243 } 244 } 245 public synchronized void addCorpusListener(CorpusListener l) { 246 Vector v = corpusListeners == null ? new Vector(2) : (Vector) corpusListeners.clone(); 247 if (!v.contains(l)) { 248 v.addElement(l); 249 corpusListeners = v; 250 } 251 } 252 protected void fireDocumentAdded(CorpusEvent e) { 253 if (corpusListeners != null) { 254 Vector listeners = corpusListeners; 255 int count = listeners.size(); 256 for (int i = 0; i < count; i++) { 257 ((CorpusListener) listeners.elementAt(i)).documentAdded(e); 258 } 259 } 260 } 261 protected void fireDocumentRemoved(CorpusEvent e) { 262 if (corpusListeners != null) { 263 Vector listeners = corpusListeners; 264 int count = listeners.size(); 265 for (int i = 0; i < count; i++) { 266 ((CorpusListener) listeners.elementAt(i)).documentRemoved(e); 267 } 268 } 269 } 270 public void resourceLoaded(CreoleEvent e) { 271 } 272 273 public void resourceRenamed(Resource resource, String oldName, 274 String newName){} 275 276 public void resourceUnloaded(CreoleEvent e) { 277 Resource res = e.getResource(); 278 if (res instanceof Document) { 279 Document doc = (Document) res; 280 if (DEBUG) 281 Out.prln("resource Unloaded called "); 282 //remove from the corpus too, if a transient one 283 if (doc.getDataStore() != this.getDataStore()) { 284 this.remove(doc); 285 } else { 286 //unload all occurences 287 int index = indexOf(res); 288 if (index < 0) 289 return; 290 documents.set(index, null); 291 if (DEBUG) 292 Out.prln("corpus: document "+ index + " unloaded and set to null"); 293 } //if 294 } 295 } 296 public void datastoreOpened(CreoleEvent e) { 297 } 298 public void datastoreCreated(CreoleEvent e) { 299 } 300 public void datastoreClosed(CreoleEvent e) { 301 if (! e.getDatastore().equals(this.getDataStore())) 302 return; 303 if (this.getDataStore() != null) 304 this.getDataStore().removeDatastoreListener(this); 305 //close this corpus, since it cannot stay open when the DS it comes from 306 //is closed 307 Factory.deleteResource(this); 308 } 309 /** 310 * Called by a datastore when a new resource has been adopted 311 */ 312 public void resourceAdopted(DatastoreEvent evt){ 313 } 314 315 /** 316 * Called by a datastore when a resource has been deleted 317 */ 318 public void resourceDeleted(DatastoreEvent evt){ 319 DataStore ds = (DataStore)evt.getSource(); 320 //1. check whether this datastore fired the event. If not, return. 321 if (!ds.equals(this.dataStore)) 322 return; 323 324 Object docID = evt.getResourceID(); 325 if (docID == null) 326 return; 327 328 if (DEBUG) Out.prln("Resource deleted called for: " + docID); 329 //first check if it is this corpus that's been deleted, it must be 330 //unloaded immediately 331 if (docID.equals(this.getLRPersistenceId())) { 332 Factory.deleteResource(this); 333 return; 334 }//if 335 336 boolean isDirty=false; 337 //the problem here is that I only have the doc persistent ID 338 //and nothing else, so I need to determine the index of the doc first 339 for (int i=0; i< docDataList.size(); i++) { 340 DocumentData docData = (DocumentData)docDataList.get(i); 341 //we've found the correct document 342 //don't break the loop, because it might appear more than once 343 if (docID.equals(docData.getPersistentID())) { 344 remove(i); 345 isDirty = true; 346 }//if 347 }//for loop through the doc data 348 349 if (isDirty) 350 try { 351 this.dataStore.sync(this); 352 } catch (PersistenceException ex) { 353 throw new GateRuntimeException("SerialCorpusImpl: " + ex.getMessage()); 354 } catch (SecurityException sex) { 355 throw new GateRuntimeException("SerialCorpusImpl: " + sex.getMessage()); 356 } 357 }//resourceDeleted 358 359 /** 360 * Called by a datastore when a resource has been wrote into the datastore 361 */ 362 public void resourceWritten(DatastoreEvent evt){ 363 if (evt.getResourceID().equals(this.getLRPersistenceId())) { 364 thisResourceWritten(); 365 } 366 } 367 368 369 370 //List methods 371 //java docs will be automatically copied from the List interface. 372 373 public int size() { 374 return docDataList.size(); 375 } 376 377 public boolean isEmpty() { 378 return docDataList.isEmpty(); 379 } 380 381 public boolean contains(Object o){ 382 //return true if: 383 // - the document data list contains a document with such a name 384 // and persistent id 385 386 if(! (o instanceof Document)) 387 return false; 388 389 int index = findDocument((Document) o); 390 if (index < 0) 391 return false; 392 else 393 return true; 394 } 395 396 public Iterator iterator(){ 397 return new Iterator(){ 398 Iterator docDataIter = docDataList.iterator(); 399 400 public boolean hasNext() { 401 return docDataIter.hasNext(); 402 } 403 404 public Object next(){ 405 406 //try finding a document with the same name and persistent ID 407 DocumentData docData = (DocumentData) docDataIter.next(); 408 int index = docDataList.indexOf(docData); 409 return SerialCorpusImpl.this.get(index); 410 } 411 412 public void remove() { 413 throw new UnsupportedOperationException("SerialCorpusImpl does not " + 414 "support remove in the iterators"); 415 } 416 }; //return 417 418 }//iterator 419 420 public String toString() { 421 return "document data " + docDataList.toString() + " documents " + documents; 422 } 423 424 public Object[] toArray(){ 425 //there is a problem here, because some docs might not be instantiated 426 throw new MethodNotImplementedException( 427 "toArray() is not implemented for SerialCorpusImpl"); 428 } 429 430 public Object[] toArray(Object[] a){ 431 //there is a problem here, because some docs might not be instantiated 432 throw new MethodNotImplementedException( 433 "toArray(Object[] a) is not implemented for SerialCorpusImpl"); 434 } 435 436 public boolean add(Object o){ 437 if (! (o instanceof Document) || o == null) 438 return false; 439 Document doc = (Document) o; 440 441 //make it accept only docs from its own datastore 442 if (doc.getDataStore() != null 443 && !this.dataStore.equals(doc.getDataStore())) { 444 Err.prln("Error: Persistent corpus can only accept documents " + 445 "from its own datastore!"); 446 return false; 447 }//if 448 449 //add the document with its index in the docDataList 450 //in this case, since it's going to be added to the end 451 //the index will be the size of the docDataList before 452 //the addition 453 DocumentData docData = new DocumentData(doc.getName(), 454 doc.getLRPersistenceId()); 455 boolean result = docDataList.add(docData); 456 documents.add(doc); 457 documentAdded(doc); 458 fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this, 459 doc, 460 docDataList.size()-1, 461 CorpusEvent.DOCUMENT_ADDED)); 462 463 return result; 464 } 465 466 public boolean remove(Object o){ 467 if (DEBUG) Out.prln("SerialCorpus:Remove object called"); 468 if (! (o instanceof Document)) 469 return false; 470 Document doc = (Document) o; 471 472 //see if we can find it first. If not, then judt return 473 int index = findDocument(doc); 474 if (index == -1) 475 return false; 476 477 if(index < docDataList.size()) { //we found it, so remove it 478 docDataList.remove(index); 479 Document oldDoc = (Document) documents.remove(index); 480 if (DEBUG) Out.prln("documents after remove of " + oldDoc.getName() 481 + " are " + documents); 482 documentRemoved(oldDoc.getLRPersistenceId().toString()); 483 fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this, 484 oldDoc, 485 index, 486 CorpusEvent.DOCUMENT_REMOVED)); 487 } 488 489 return true; 490 } 491 492 public int findDocument(Document doc) { 493 boolean found = false; 494 DocumentData docData = null; 495 496 //first try finding the document in memory 497 int index = documents.indexOf(doc); 498 if (index > -1 && index < docDataList.size()) 499 return index; 500 501 //else try finding a document with the same name and persistent ID 502 Iterator iter = docDataList.iterator(); 503 for (index = 0; iter.hasNext(); index++) { 504 docData = (DocumentData) iter.next(); 505 if (docData.getDocumentName().equals(doc.getName()) && 506 docData.getPersistentID().equals(doc.getLRPersistenceId())) { 507 found = true; 508 break; 509 } 510 } 511 if (found && index < docDataList.size()) 512 return index; 513 else 514 return -1; 515 }//findDocument 516 517 public boolean containsAll(Collection c){ 518 Iterator iter = c.iterator(); 519 while (iter.hasNext()) { 520 if (! contains(iter.next())) 521 return false; 522 } 523 return true; 524 } 525 526 public boolean addAll(Collection c){ 527 boolean allAdded = true; 528 Iterator iter = c.iterator(); 529 while (iter.hasNext()) { 530 if (! add(iter.next())) 531 allAdded = false; 532 } 533 return allAdded; 534 } 535 536 public boolean addAll(int index, Collection c){ 537 throw new UnsupportedOperationException(); 538 } 539 540 public boolean removeAll(Collection c){ 541 boolean allRemoved = true; 542 Iterator iter = c.iterator(); 543 while (iter.hasNext()) { 544 if (! remove(iter.next())) 545 allRemoved = false; 546 } 547 return allRemoved; 548 549 } 550 551 public boolean retainAll(Collection c){ 552 throw new UnsupportedOperationException(); 553 } 554 555 public void clear(){ 556 documents.clear(); 557 docDataList.clear(); 558 } 559 560 public boolean equals(Object o){ 561 if (! (o instanceof SerialCorpusImpl)) 562 return false; 563 SerialCorpusImpl oCorpus = (SerialCorpusImpl) o; 564 if ((this == null && oCorpus != null) || (oCorpus == null && this != null)) 565 return false; 566 if (oCorpus == this) 567 return true; 568 if ((oCorpus.lrPersistentId == this.lrPersistentId || 569 ( this.lrPersistentId != null && 570 this.lrPersistentId.equals(oCorpus.lrPersistentId)) 571 ) 572 && 573 oCorpus.name.equals(this.name) 574 && 575 (oCorpus.dataStore == this.dataStore 576 || oCorpus.dataStore.equals(this.dataStore)) 577 && 578 oCorpus.docDataList.equals(docDataList)) 579 return true; 580 return false; 581 } 582 583 public int hashCode(){ 584 return docDataList.hashCode(); 585 } 586 587 public Object get(int index){ 588 if (index >= docDataList.size()) 589 return null; 590 591 Object res = documents.get(index); 592 593 if (DEBUG) 594 Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res); 595 596 //if the document is null, then I must get it from the DS 597 if (res == null) { 598 FeatureMap features = Factory.newFeatureMap(); 599 features.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore); 600 try { 601 features.put(DataStore.LR_ID_FEATURE_NAME, 602 ((DocumentData)docDataList.get(index)).getPersistentID()); 603 Resource lr = Factory.createResource( "gate.corpora.DocumentImpl", 604 features); 605 if (DEBUG) 606 Out.prln("Loaded document :" + lr.getName()); 607 //change the result to the newly loaded doc 608 res = lr; 609 610 //finally replace the doc with the instantiated version 611 documents.set(index, lr); 612 } catch (ResourceInstantiationException ex) { 613 Err.prln("Error reading document inside a serialised corpus."); 614 throw new GateRuntimeException(ex.getMessage()); 615 } 616 } 617 618 return res; 619 } 620 621 public Object set(int index, Object element){ 622 throw new gate.util.MethodNotImplementedException(); 623 //fire the 2 events 624 /* fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this, 625 oldDoc, 626 ((Integer) key).intValue(), 627 CorpusEvent.DOCUMENT_REMOVED)); 628 fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this, 629 newDoc, 630 ((Integer) key).intValue(), 631 CorpusEvent.DOCUMENT_ADDED)); 632 */ 633 } 634 635 public void add(int index, Object o){ 636 if (! (o instanceof Document) || o == null) 637 return; 638 Document doc = (Document) o; 639 640 DocumentData docData = new DocumentData(doc.getName(), 641 doc.getLRPersistenceId()); 642 docDataList.add(index, docData); 643 644 documents.add(index, doc); 645 documentAdded(doc); 646 fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this, 647 doc, 648 index, 649 CorpusEvent.DOCUMENT_ADDED)); 650 651 } 652 653 public Object remove(int index){ 654 if (DEBUG) Out.prln("Remove index called"); 655 656 boolean isLoaded = isDocumentLoaded(index); 657 Document removed = (Document) get(index); 658 documentRemoved(removed.getLRPersistenceId().toString()); 659 if (!isLoaded){ 660 unloadDocument(removed); 661 } 662 663 docDataList.remove(index); 664 Document res = (Document) documents.remove(index); 665 fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this, 666 res, 667 index, 668 CorpusEvent.DOCUMENT_REMOVED)); 669 return res; 670 671 } 672 673 public int indexOf(Object o){ 674 if (o instanceof Document) 675 return findDocument((Document) o); 676 677 return -1; 678 } 679 680 public int lastIndexOf(Object o){ 681 throw new gate.util.MethodNotImplementedException(); 682 } 683 684 public ListIterator listIterator(){ 685 throw new gate.util.MethodNotImplementedException(); 686 } 687 688 public ListIterator listIterator(int index){ 689 throw new gate.util.MethodNotImplementedException(); 690 } 691 692 /** 693 * persistent Corpus does not support this method as all 694 * the documents might no be in memory 695 */ 696 public List subList(int fromIndex, int toIndex){ 697 throw new gate.util.MethodNotImplementedException(); 698 } 699 700 public void setDataStore(DataStore dataStore) 701 throws gate.persist.PersistenceException { 702 super.setDataStore( dataStore); 703 if (this.dataStore != null) 704 this.dataStore.addDatastoreListener(this); 705 } 706 707 public void setTransientSource(Object source) { 708 if (! (source instanceof Corpus)) 709 return; 710 711 //the following initialisation is only valid when we're constructing 712 //this object from a transient one. If it has already been stored in 713 //a datastore, then the initialisation is done in readObject() since 714 //this method is the one called by serialisation, when objects 715 //are restored. 716 if (this.dataStore != null && this.lrPersistentId != null) 717 return; 718 719 Corpus tCorpus = (Corpus) source; 720 721 //copy the corpus name and features from the one in memory 722 this.setName(tCorpus.getName()); 723 this.setFeatures(tCorpus.getFeatures()); 724 725 docDataList = new ArrayList(); 726 //now cache the names of all docs for future use 727 Iterator iter = tCorpus.getDocumentNames().iterator(); 728 while (iter.hasNext()) 729 docDataList.add(new DocumentData((String) iter.next(), null)); 730 731 //copy all the documents from the transient corpus 732 documents = new ArrayList(); 733 documents.addAll(tCorpus); 734 735 this.addedDocs = new Vector(); 736 this.removedDocIDs = new Vector(); 737 this.changedDocs = new Vector(); 738 739 //make sure we fire events when docs are added/removed/etc 740 Gate.getCreoleRegister().addCreoleListener(this); 741 742 } 743 744 //we don't keep the transient source, so always return null 745 //Sill this must be implemented, coz of the GUI and Factory 746 public Object getTransientSource() { 747 return null; 748 } 749 750 751 public Resource init() throws gate.creole.ResourceInstantiationException { 752 super.init(); 753 754 return this; 755 756 } 757 758 759 /** 760 * readObject - calls the default readObject() and then initialises the 761 * transient data 762 * 763 * @serialData Read serializable fields. No optional data read. 764 */ 765 private void readObject(ObjectInputStream s) 766 throws IOException, ClassNotFoundException { 767 s.defaultReadObject(); 768 documents = new ArrayList(docDataList.size()); 769 for (int i = 0; i < docDataList.size(); i++) 770 documents.add(null); 771 corpusListeners = new Vector(); 772 //finally set the creole listeners if the LR is like that 773 Gate.getCreoleRegister().addCreoleListener(this); 774 if (this.dataStore != null) 775 this.dataStore.addDatastoreListener(this); 776 777 //if indexed construct the manager. 778 IndexDefinition definition = (IndexDefinition) this.getFeatures().get( 779 GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY); 780 if (definition != null){ 781 String className = definition.getIrEngineClassName(); 782 try{ 783 Class aClass = Class.forName(className); 784 IREngine engine = (IREngine)aClass.newInstance(); 785 this.indexManager = engine.getIndexmanager(); 786 this.indexManager.setIndexDefinition(definition); 787 this.indexManager.setCorpus(this); 788 }catch(Exception e){ 789 e.printStackTrace(Err.getPrintWriter()); 790 } 791 // switch (definition.getIndexType()) { 792 // case GateConstants.IR_LUCENE_INVFILE: 793 // this.indexManager = new LuceneIndexManager(); 794 // this.indexManager.setIndexDefinition(definition); 795 // this.indexManager.setCorpus(this); 796 // break; 797 // } 798 this.addedDocs = new Vector(); 799 this.removedDocIDs = new Vector(); 800 this.changedDocs = new Vector(); 801 } 802 }//readObject 803 804 public void setIndexDefinition(IndexDefinition definition) { 805 if (definition != null){ 806 this.getFeatures().put(GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY, 807 definition); 808 809 String className = definition.getIrEngineClassName(); 810 try{ 811 Class aClass = Class.forName(className); 812 IREngine engine = (IREngine)aClass.newInstance(); 813 this.indexManager = engine.getIndexmanager(); 814 this.indexManager.setIndexDefinition(definition); 815 this.indexManager.setCorpus(this); 816 }catch(Exception e){ 817 e.printStackTrace(Err.getPrintWriter()); 818 } 819 // switch (definition.getIndexType()) { 820 // case GateConstants.IR_LUCENE_INVFILE: 821 // this.indexManager = new LuceneIndexManager(); 822 // this.indexManager.setIndexDefinition(definition); 823 // this.indexManager.setCorpus(this); 824 // break; 825 // } 826 this.addedDocs = new Vector(); 827 this.removedDocIDs = new Vector(); 828 this.changedDocs = new Vector(); 829 } 830 } 831 832 public IndexDefinition getIndexDefinition() { 833 return (IndexDefinition) this.getFeatures().get( 834 GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY); 835 } 836 837 public IndexManager getIndexManager() { 838 return this.indexManager; 839 } 840 841 public IndexStatistics getIndexStatistics(){ 842 return (IndexStatistics) this.getFeatures().get( 843 GateConstants.CORPUS_INDEX_STATISTICS_FEATURE_KEY); 844 } 845 846 private void documentAdded(Document doc) { 847 if (indexManager != null){ 848 addedDocs.add(doc); 849 } 850 } 851 852 private void documentRemoved(String lrID) { 853 if (indexManager != null) { 854 removedDocIDs.add(lrID); 855 } 856 } 857 858 private void thisResourceWritten() { 859 if (indexManager != null) { 860 try { 861 for (int i = 0; i<documents.size(); i++) { 862 if (documents.get(i) != null) { 863 Document doc = (Document) documents.get(i); 864 if (!addedDocs.contains(doc) && doc.isModified()) { 865 changedDocs.add(doc); 866 } 867 } 868 } 869 indexManager.sync(addedDocs, removedDocIDs, changedDocs); 870 } catch (IndexException ie) { 871 ie.printStackTrace(); 872 } 873 } 874 } 875 876 }
|
SerialCorpusImpl |
|