|
DatabaseCorpusImpl |
|
1 /* 2 * DatabaseCorpusImpl.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Marin Dimitrov, 05/Nov/2001 12 * 13 * $Id: DatabaseCorpusImpl.java,v 1.17 2003/07/08 08:36:58 marin Exp $ 14 */ 15 16 package gate.corpora; 17 18 import java.util.*; 19 20 import junit.framework.*; 21 22 import gate.*; 23 import gate.persist.*; 24 import gate.annotation.*; 25 import gate.creole.*; 26 import gate.event.*; 27 import gate.util.*; 28 import gate.security.SecurityInfo; 29 30 31 public class DatabaseCorpusImpl extends CorpusImpl 32 implements DatastoreListener, 33 EventAwareCorpus { 34 35 /** Debug flag */ 36 private static final boolean DEBUG = false; 37 38 private boolean featuresChanged; 39 private boolean nameChanged; 40 /** 41 * The listener for the events coming from the features. 42 */ 43 protected EventsHandler eventHandler; 44 protected List documentData; 45 protected List removedDocuments; 46 protected List addedDocuments; 47 48 public DatabaseCorpusImpl() { 49 super(); 50 } 51 52 53 public DatabaseCorpusImpl(String _name, 54 DatabaseDataStore _ds, 55 Long _persistenceID, 56 FeatureMap _features, 57 Vector _dbDocs) { 58 59 super(); 60 61 this.name = _name; 62 this.dataStore = _ds; 63 this.lrPersistentId = _persistenceID; 64 this.features = _features; 65 this.documentData = _dbDocs; 66 this.supportList = new ArrayList(this.documentData.size()); 67 this.removedDocuments = new ArrayList(); 68 //just allocate space for this one, don't initialize it - 69 //invokations of add() will add elements to it 70 this.addedDocuments = new ArrayList(); 71 72 //init the document list 73 for (int i=0; i< this.documentData.size(); i++) { 74 this.supportList.add(null); 75 } 76 77 this.featuresChanged = false; 78 this.nameChanged = false; 79 80 //3. add the listeners for the features 81 if (eventHandler == null) 82 eventHandler = new EventsHandler(); 83 this.features.addFeatureMapListener(eventHandler); 84 85 86 //4. add self as listener for the data store, so that we'll know when the DS is 87 //synced and we'll clear the isXXXChanged flags 88 this.dataStore.addDatastoreListener(this); 89 } 90 91 92 public boolean add(Object o){ 93 94 Assert.assertNotNull(o); 95 boolean result = false; 96 97 //accept only documents 98 if (false == o instanceof Document) { 99 throw new IllegalArgumentException(); 100 } 101 102 Document doc = (Document)o; 103 104 //assert docs are either transient or from the same datastore 105 if (isValidForAdoption(doc)) { 106 result = super.add(doc); 107 } 108 else { 109 return false; 110 } 111 112 //add to doc data too 113 /* Was: 114 DocumentData newDocData = new DocumentData(doc.getName(),null); 115 */ 116 DocumentData newDocData = new DocumentData(doc.getName(), 117 doc.getLRPersistenceId()); 118 119 this.documentData.add(newDocData); 120 121 //add the LRID to the set of newly added documents so that upon sync() a reference 122 // from the doc to the corpus will be added in the database 123 if (null != doc.getLRPersistenceId()) { 124 this.addedDocuments.add(doc.getLRPersistenceId()); 125 //Out.prln("adding [" + doc.getLRPersistenceId() + "] to NewlyAddedDocs..."); 126 } 127 128 if (result) { 129 fireDocumentAdded(new CorpusEvent(this, 130 doc, 131 this.supportList.size()-1, 132 CorpusEvent.DOCUMENT_ADDED)); 133 } 134 135 return result; 136 } 137 138 139 public void add(int index, Object element){ 140 141 Assert.assertNotNull(element); 142 Assert.assertTrue(index >= 0); 143 144 long collInitialSize = this.supportList.size(); 145 146 //accept only documents 147 if (false == element instanceof Document) { 148 throw new IllegalArgumentException(); 149 } 150 151 Document doc = (Document)element; 152 153 //assert docs are either transient or from the same datastore 154 if (isValidForAdoption(doc)) { 155 super.add(index,doc); 156 157 //add to doc data too 158 DocumentData newDocData = new DocumentData(doc.getName(),null); 159 this.documentData.add(index,newDocData); 160 161 //add the LRID to the set of newly added documents so that upon sync() a reference 162 // from the doc to the corpus will be added in the database 163 if (null != doc.getLRPersistenceId()) { 164 165 this.addedDocuments.add(doc.getLRPersistenceId()); 166 //Out.prln("adding ["+doc.getLRPersistenceId()+"] to NewlyAddedDocs..."); 167 } 168 169 //if added then fire event 170 if (this.supportList.size() > collInitialSize) { 171 fireDocumentAdded(new CorpusEvent(this, 172 doc, 173 index, 174 CorpusEvent.DOCUMENT_ADDED)); 175 } 176 } 177 } 178 179 180 181 public boolean addAll(Collection c){ 182 183 boolean collectionChanged = false; 184 185 Iterator it = c.iterator(); 186 while (it.hasNext()) { 187 Document doc = (Document)it.next(); 188 if (isValidForAdoption(doc)) { 189 collectionChanged |= add(doc); 190 } 191 } 192 193 return collectionChanged; 194 } 195 196 197 public boolean addAll(int index, Collection c){ 198 199 Assert.assertTrue(index >=0); 200 201 //funny enough add(index,element) returns void and not boolean 202 //so we can't use it 203 boolean collectionChanged = false; 204 int collInitialSize = this.supportList.size(); 205 int currIndex = index; 206 207 Iterator it = c.iterator(); 208 while (it.hasNext()) { 209 Document doc = (Document)it.next(); 210 if (isValidForAdoption(doc)) { 211 add(currIndex++,doc); 212 } 213 } 214 215 return (this.supportList.size() > collInitialSize); 216 } 217 218 219 private boolean isValidForAdoption(LanguageResource lr) { 220 221 Long lrID = (Long)lr.getLRPersistenceId(); 222 223 if (null == lrID || 224 (this.getDataStore() != null && lr.getDataStore().equals(this.getDataStore()))) { 225 return true; 226 } 227 else { 228 return false; 229 } 230 } 231 232 public void resourceAdopted(DatastoreEvent evt){ 233 } 234 235 public void resourceDeleted(DatastoreEvent evt){ 236 237 Assert.assertNotNull(evt); 238 Long deletedID = (Long)evt.getResourceID(); 239 Assert.assertNotNull(deletedID); 240 241 //unregister self as listener from the DataStore 242 if (deletedID.equals(this.getLRPersistenceId())) { 243 //someone deleted this corpus 244 this.supportList.clear(); 245 getDataStore().removeDatastoreListener(this); 246 } 247 248 //check if the ID is of a document the corpus contains 249 Iterator it = this.supportList.iterator(); 250 while (it.hasNext()) { 251 Document doc = (Document)it.next(); 252 if (doc.getLRPersistenceId().equals(deletedID)) { 253 this.supportList.remove(doc); 254 break; 255 } 256 } 257 } 258 259 public void resourceWritten(DatastoreEvent evt){ 260 Assert.assertNotNull(evt); 261 Assert.assertNotNull(evt.getResourceID()); 262 263 //is the event for us? 264 if (evt.getResourceID().equals(this.getLRPersistenceId())) { 265 //wow, the event is for me 266 //clear all flags, the content is synced with the DB 267 this.featuresChanged = 268 this.nameChanged = false; 269 270 this.removedDocuments.clear(); 271 this.addedDocuments.clear(); 272 } 273 } 274 275 276 public void resourceUnloaded(CreoleEvent e) { 277 278 Assert.assertNotNull(e); 279 Assert.assertNotNull(e.getResource()); 280 281 Resource res = e.getResource(); 282 283 if (res instanceof Document) { 284 285 Document doc = (Document) res; 286 287 if (DEBUG) { 288 Out.prln("resource Unloaded called "); 289 } 290 291 //remove from the corpus too, if a transient one 292 if (null == doc.getLRPersistenceId()) { 293 //@FIXME - not sure we need this 294 super.remove(doc); 295 } 296 else { 297 //unload all occurences 298 //see if we can find it first. If not, then judt return 299 int index = findDocument(doc); 300 if (index == -1) { 301 //not our document 302 return; 303 } 304 else { 305 //3. unload from internal data structures 306 307 //@FIXME - not sure we need this 308 //super.remove(doc); 309 310 //remove from the list of loaded documents 311 //System.out.println("resourceUnloaded called -- removing doc[" + index +"] from supportList..."); 312 //WAS: Document oldDoc = (Document)this.supportList.remove(index); 313 this.supportList.set(index, null); 314 315 if (DEBUG) 316 Out.prln("corpus: document " + index + " unloaded and set to null"); 317 } //else 318 } //else 319 } //if 320 } 321 322 323 public boolean isResourceChanged(int changeType) { 324 325 switch(changeType) { 326 327 case EventAwareLanguageResource.RES_FEATURES: 328 return this.featuresChanged; 329 case EventAwareLanguageResource.RES_NAME: 330 return this.nameChanged; 331 default: 332 throw new IllegalArgumentException(); 333 } 334 } 335 336 /** 337 * Returns true of an LR has been modified since the last sync. 338 * Always returns false for transient LRs. 339 */ 340 public boolean isModified() { 341 return this.isResourceChanged(EventAwareLanguageResource.RES_FEATURES) || 342 this.isResourceChanged(EventAwareLanguageResource.RES_NAME); 343 } 344 345 346 347 /** Sets the name of this resource*/ 348 public void setName(String name){ 349 super.setName(name); 350 351 this.nameChanged = true; 352 } 353 354 355 /** Set the feature set */ 356 public void setFeatures(FeatureMap features) { 357 //1. save them first, so we can remove the listener 358 FeatureMap oldFeatures = this.features; 359 360 super.setFeatures(features); 361 362 this.featuresChanged = true; 363 364 //4. sort out the listeners 365 if (eventHandler != null) 366 oldFeatures.removeFeatureMapListener(eventHandler); 367 else 368 eventHandler = new EventsHandler(); 369 this.features.addFeatureMapListener(eventHandler); 370 } 371 372 373 /** 374 * All the events from the features are handled by 375 * this inner class. 376 */ 377 class EventsHandler implements gate.event.FeatureMapListener { 378 public void featureMapUpdated(){ 379 //tell the document that its features have been updated 380 featuresChanged = true; 381 } 382 } 383 384 /** 385 * Overriden to remove the features listener, when the document is closed. 386 */ 387 public void cleanup() { 388 super.cleanup(); 389 if (eventHandler != null) 390 this.features.removeFeatureMapListener(eventHandler); 391 }///inner class EventsHandler 392 393 394 395 public void setInitData__$$__(Object data) { 396 397 HashMap initData = (HashMap)data; 398 399 this.name = (String)initData.get("CORP_NAME"); 400 this.dataStore = (DatabaseDataStore)initData.get("DS"); 401 this.lrPersistentId = (Long)initData.get("LR_ID"); 402 this.features = (FeatureMap)initData.get("CORP_FEATURES"); 403 this.supportList = new ArrayList((List)initData.get("CORP_SUPPORT_LIST")); 404 405 this.documentData = new ArrayList(this.supportList.size()); 406 this.removedDocuments = new ArrayList(); 407 this.addedDocuments = new ArrayList(); 408 409 //init the documentData list 410 for (int i=0; i< this.supportList.size(); i++) { 411 Document dbDoc = (Document)this.supportList.get(i); 412 DocumentData dd = new DocumentData(dbDoc.getName(),dbDoc.getLRPersistenceId()); 413 this.documentData.add(dd); 414 } 415 416 this.featuresChanged = false; 417 this.nameChanged = false; 418 419 //3. add the listeners for the features 420 if (eventHandler == null) 421 eventHandler = new EventsHandler(); 422 this.features.addFeatureMapListener(eventHandler); 423 424 425 //4. add self as listener for the data store, so that we'll know when the DS is 426 //synced and we'll clear the isXXXChanged flags 427 this.dataStore.addDatastoreListener(this); 428 } 429 430 public Object getInitData__$$__(Object initData) { 431 return null; 432 } 433 434 /** 435 * Gets the names of the documents in this corpus. 436 * @return a {@link List} of Strings representing the names of the documents 437 * in this corpus. 438 */ 439 public List getDocumentNames(){ 440 441 List docsNames = new ArrayList(); 442 443 if(this.documentData == null) 444 return docsNames; 445 446 Iterator iter = this.documentData.iterator(); 447 while (iter.hasNext()) { 448 DocumentData data = (DocumentData)iter.next(); 449 docsNames.add(data.getDocumentName()); 450 } 451 452 return docsNames; 453 } 454 455 456 /** 457 * Gets the name of a document in this corpus. 458 * @param index the index of the document 459 * @return a String value representing the name of the document at 460 * <tt>index</tt> in this corpus.<P> 461 */ 462 public String getDocumentName(int index){ 463 464 if (index >= this.documentData.size()) return "No such document"; 465 466 return ((DocumentData)this.documentData.get(index)).getDocumentName(); 467 } 468 469 /** 470 * returns a document in the coprus by index 471 * @param index the index of the document 472 * @return an Object value representing DatabaseDocumentImpl 473 */ 474 public Object get(int index){ 475 476 //0. preconditions 477 Assert.assertTrue(index >= 0); 478 Assert.assertTrue(index < this.documentData.size()); 479 Assert.assertTrue(index < this.supportList.size()); 480 Assert.assertTrue(this.documentData.size() == this.supportList.size()); 481 482 if (index >= this.documentData.size()) 483 return null; 484 485 Object res = this.supportList.get(index); 486 487 //if the document is null, then I must get it from the database 488 if (null == res) { 489 Long currLRID = (Long)((DocumentData)this.documentData.get(index)).getPersistentID(); 490 FeatureMap params = Factory.newFeatureMap(); 491 params.put(DataStore.DATASTORE_FEATURE_NAME, this.getDataStore()); 492 params.put(DataStore.LR_ID_FEATURE_NAME, currLRID); 493 494 try { 495 Document dbDoc = (Document)Factory.createResource(DBHelper.DOCUMENT_CLASS, params); 496 497 if (DEBUG) { 498 Out.prln("Loaded document :" + dbDoc.getName()); 499 } 500 501 //change the result to the newly loaded doc 502 res = dbDoc; 503 504 //finally replace the doc with the instantiated version 505 Assert.assertNull(this.supportList.get(index)); 506 this.supportList.set(index, dbDoc); 507 } 508 catch (ResourceInstantiationException ex) { 509 Err.prln("Error reading document inside a serialised corpus."); 510 throw new GateRuntimeException(ex.getMessage()); 511 } 512 } 513 514 return res; 515 } 516 517 public Object remove(int index){ 518 519 //1. get the persistent id and add it to the removed list 520 DocumentData docData = (DocumentData)this.documentData.get(index); 521 Long removedID = (Long)docData.getPersistentID(); 522 // Assert.assertTrue(null != removedID); 523 //removedID may be NULL if the doc is still transient 524 525 //2. add to the list of removed documents but only if it's not newly added 526 //othewrwise just ignore 527 if (null != removedID && false == this.addedDocuments.contains(removedID)) { 528 this.removedDocuments.add(removedID); 529 //Out.prln("adding ["+removedID+"] to RemovedDocs..."); 530 } 531 532 //3. delete 533 this.documentData.remove(index); 534 Document res = (Document)this.supportList.remove(index); 535 536 //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference 537 // from the doc to the corpus will NOT be added in the database 538 if (this.addedDocuments.contains(removedID)) { 539 this.addedDocuments.remove(removedID); 540 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs..."); 541 } 542 543 //5, fire events 544 fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this, 545 res, 546 index, 547 CorpusEvent.DOCUMENT_REMOVED)); 548 return res; 549 550 } 551 552 553 public boolean remove(Object obj){ 554 555 //0. preconditions 556 Assert.assertNotNull(obj); 557 Assert.assertTrue(obj instanceof DatabaseDocumentImpl); 558 559 if (false == obj instanceof Document) { 560 return false; 561 } 562 563 Document doc = (Document) obj; 564 565 //see if we can find it first. If not, then judt return 566 int index = findDocument(doc); 567 if (index == -1) { 568 return false; 569 } 570 571 if(index < this.documentData.size()) { 572 //we found it, so remove it 573 574 //1. get the persistent id and add it to the removed list 575 DocumentData docData = (DocumentData)this.documentData.get(index); 576 Long removedID = (Long)docData.getPersistentID(); 577 //Assert.assertTrue(null != removedID); 578 //removed ID may be null - doc is still transient 579 580 //2. add to the list of removed documents 581 if (null != removedID && false == this.addedDocuments.contains(removedID)) { 582 this.removedDocuments.add(removedID); 583 //Out.prln("adding ["+removedID+"] to RemovedDocs..."); 584 } 585 586 //3. delete 587 this.documentData.remove(index); 588 Document oldDoc = (Document) this.supportList.remove(index); 589 590 //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference 591 // from the doc to the corpus will NOT be added in the database 592 if (this.addedDocuments.contains(removedID)) { 593 this.addedDocuments.remove(removedID); 594 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs..."); 595 } 596 597 //5. fire events 598 fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this, 599 oldDoc, 600 index, 601 CorpusEvent.DOCUMENT_REMOVED)); 602 } 603 604 return true; 605 } 606 607 608 public int findDocument(Document doc) { 609 610 boolean found = false; 611 DocumentData docData = null; 612 613 //first try finding the document in memory 614 int index = this.supportList.indexOf(doc); 615 616 if (index > -1 && index < this.documentData.size()) { 617 return index; 618 } 619 620 //else try finding a document with the same name and persistent ID 621 Iterator iter = this.documentData.iterator(); 622 623 for (index = 0; iter.hasNext(); index++) { 624 docData = (DocumentData) iter.next(); 625 if (docData.getDocumentName().equals(doc.getName()) && 626 docData.getPersistentID().equals(doc.getLRPersistenceId())) { 627 found = true; 628 break; 629 } 630 } 631 632 if (found && index < this.documentData.size()) { 633 return index; 634 } 635 else { 636 return -1; 637 } 638 }//findDocument 639 640 641 public boolean contains(Object o){ 642 //return true if: 643 // - the document data list contains a document with such a name 644 // and persistent id 645 646 if(false == o instanceof Document) 647 return false; 648 649 int index = findDocument((Document) o); 650 651 if (index < 0) { 652 return false; 653 } 654 else { 655 return true; 656 } 657 } 658 659 public Iterator iterator(){ 660 return new DatabaseCorpusIterator(this.documentData); 661 } 662 663 public List getLoadedDocuments() { 664 return new ArrayList(this.supportList); 665 } 666 667 public List getRemovedDocuments() { 668 return new ArrayList(this.removedDocuments); 669 } 670 671 public List getAddedDocuments() { 672 return new ArrayList(this.addedDocuments); 673 } 674 675 private class DatabaseCorpusIterator implements Iterator { 676 677 private Iterator docDataIter; 678 private List docDataList; 679 680 public DatabaseCorpusIterator(List docDataList) { 681 this.docDataList = docDataList; 682 this.docDataIter = this.docDataList.iterator(); 683 } 684 685 public boolean hasNext() { 686 return docDataIter.hasNext(); 687 } 688 689 public Object next(){ 690 691 //try finding a document with the same name and persistent ID 692 DocumentData docData = (DocumentData)docDataIter.next(); 693 int index = this.docDataList.indexOf(docData); 694 return DatabaseCorpusImpl.this.get(index); 695 } 696 697 public void remove() { 698 throw new UnsupportedOperationException("DatabaseCorpusImpl does not " + 699 "support remove in the iterators"); 700 } 701 } 702 703 704 /** 705 * Unloads the document from memory, but calls sync() first, to store the 706 * changes 707 */ 708 public void unloadDocument(int index) { 709 710 //preconditions 711 Assert.assertTrue(index >= 0); 712 713 //1. check whether its been loaded and is a persistent one 714 // if a persistent doc is not loaded, there's nothing we need to do 715 if ( (! isDocumentLoaded(index)) && isPersistentDocument(index)) { 716 return; 717 } 718 719 //2. sync the document before releasing it from memory, because the 720 //creole register garbage collects all LRs which are not used any more 721 Document doc = (Document)this.supportList.get(index); 722 Assert.assertNotNull(doc); 723 724 try { 725 726 //if the document is not already adopted, we need to do that first 727 if (doc.getLRPersistenceId() == null) { 728 729 //3.2 get the security info for the corpus 730 SecurityInfo si = this.getDataStore().getSecurityInfo(this); 731 Document dbDoc = (Document) this.getDataStore().adopt(doc, si); 732 } 733 else { 734 //if it is adopted, just sync it 735 this.getDataStore().sync(doc); 736 } 737 738 //3. remove the document from the memory 739 //do this, only if the saving has succeeded 740 // WAS this.supportList.remove(index); 741 this.supportList.set(index,null); 742 } 743 catch (PersistenceException pex) { 744 throw new GateRuntimeException("Error unloading document from corpus" 745 + "because document sync failed: " + pex.getMessage()); 746 } 747 catch (gate.security.SecurityException sex) { 748 throw new GateRuntimeException("Error unloading document from corpus" 749 + "because of document access error: " + sex.getMessage()); 750 } 751 752 } 753 754 /** 755 * Unloads a document from memory 756 */ 757 public void unloadDocument(Document doc) { 758 759 Assert.assertNotNull(doc); 760 761 //1. determine the index of the document; if not there, do nothing 762 int index = findDocument(doc); 763 764 if (index == -1) { 765 return; 766 } 767 768 unloadDocument(index); 769 } 770 771 772 /** 773 * This method returns true when the document is already loaded in memory 774 */ 775 public boolean isDocumentLoaded(int index) { 776 777 //preconditions 778 Assert.assertTrue(index >= 0); 779 780 if (this.supportList == null || this.supportList.isEmpty()) { 781 return false; 782 } 783 784 return this.supportList.get(index) != null; 785 } 786 787 /** 788 * This method returns true when the document is already stored on disk 789 * i.e., is not transient 790 */ 791 public boolean isPersistentDocument(int index) { 792 793 //preconditions 794 Assert.assertTrue(index >= 0); 795 796 if (this.supportList == null || this.supportList.isEmpty()) { 797 return false; 798 } 799 800 return (((DocumentData)this.documentData.get(index)).getPersistentID() != null); 801 } 802 803 804 public boolean equals(Object o){ 805 806 if (! (o instanceof DatabaseCorpusImpl)) 807 return false; 808 809 DatabaseCorpusImpl dbCorp = (DatabaseCorpusImpl)o; 810 811 if (this.getDataStore() != null && this.getDataStore() != dbCorp.getDataStore()) { 812 return false; 813 } 814 815 if (this.getLRPersistenceId() != null && this.getLRPersistenceId() != dbCorp.getLRPersistenceId()) { 816 return false; 817 } 818 819 return supportList.equals(o); 820 } 821 822 823 }
|
DatabaseCorpusImpl |
|