|
LuceneIndexManager |
|
1 /* 2 * LuceneIndexManager.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Rosen Marinov, 19/Apr/2002 12 * 13 */ 14 15 package gate.creole.ir.lucene; 16 17 import gate.*; 18 import gate.util.*; 19 import gate.creole.ir.*; 20 21 import org.apache.lucene.index.*; 22 import org.apache.lucene.analysis.*; 23 import org.apache.lucene.document.*; 24 import org.apache.lucene.search.*; 25 import org.apache.lucene.queryParser.*; 26 import org.apache.lucene.store.*; 27 28 import java.io.*; 29 import java.util.*; 30 31 /** This class represents Lucene implementation of IndexManeager interface.*/ 32 public class LuceneIndexManager implements IndexManager{ 33 34 /** used in Lucene Documents as a key for gate document ID value. */ 35 public final static String DOCUMENT_ID = "DOCUMENT_ID"; 36 37 /** IndexDefinition - location, type, fields, etc.*/ 38 private IndexDefinition indexDefinition; 39 40 /** An corpus for indexing*/ 41 private Corpus corpus; 42 43 /** Constructor of the class. */ 44 public LuceneIndexManager(){ 45 } 46 47 /** Creates index directory and indexing all 48 * documents in the corpus. */ 49 public void createIndex() throws IndexException{ 50 if(indexDefinition == null) 51 throw new GateRuntimeException("Index definition is null!"); 52 if(corpus == null) 53 throw new GateRuntimeException("Corpus is null!"); 54 55 String location = indexDefinition.getIndexLocation(); 56 try { 57 File file = new File(location); 58 if (file.exists()){ 59 if (file.isDirectory() && file.listFiles().length>0) { 60 throw new IndexException(location+ " is not empty directory"); 61 } 62 if (!file.isDirectory()){ 63 throw new IndexException("Only empty directory can be index path"); 64 } 65 } 66 67 IndexWriter writer = new IndexWriter(location, 68 new SimpleAnalyzer(), true); 69 70 for(int i = 0; i<corpus.size(); i++) { 71 boolean isLoaded = corpus.isDocumentLoaded(i); 72 gate.Document gateDoc = (gate.Document) corpus.get(i); 73 writer.addDocument(getLuceneDoc(gateDoc)); 74 if (!isLoaded) { 75 corpus.unloadDocument(gateDoc); 76 } 77 }//for (all documents) 78 79 writer.close(); 80 corpus.sync(); 81 } catch (java.io.IOException ioe){ 82 throw new IndexException(ioe.getMessage()); 83 } catch (gate.persist.PersistenceException pe){ 84 pe.printStackTrace(); 85 } catch (gate.security.SecurityException se){ 86 se.printStackTrace(); 87 } 88 } 89 90 /** Optimize existing index. */ 91 public void optimizeIndex() throws IndexException{ 92 if(indexDefinition == null) 93 throw new GateRuntimeException("Index definition is null!"); 94 try { 95 IndexWriter writer = new IndexWriter(indexDefinition.getIndexLocation(), 96 new SimpleAnalyzer(), false); 97 writer.optimize(); 98 writer.close(); 99 } catch (java.io.IOException ioe){ 100 throw new IndexException(ioe.getMessage()); 101 } 102 } 103 104 /** Delete index. */ 105 public void deleteIndex() throws IndexException{ 106 if(indexDefinition == null) 107 throw new GateRuntimeException("Index definition is null!"); 108 boolean isDeleted = true; 109 File dir = new File(indexDefinition.getIndexLocation()); 110 if (dir.exists() && dir.isDirectory()) { 111 File[] files = dir.listFiles(); 112 for (int i =0; i<files.length; i++){ 113 File f = files[i]; 114 isDeleted = f.delete(); 115 } 116 } 117 dir.delete(); 118 if (!isDeleted) { 119 throw new IndexException("Can't delete directory" 120 + indexDefinition.getIndexLocation()); 121 } 122 } 123 124 /** Reindexing changed documents, removing removed documents and 125 * add to the index new corpus documents. */ 126 public void sync(List added, List removedIDs, List changed) throws IndexException{ 127 String location = indexDefinition.getIndexLocation(); 128 try { 129 130 IndexReader reader = IndexReader.open(location); 131 132 for (int i = 0; i<removedIDs.size(); i++) { 133 String id = removedIDs.get(i).toString(); 134 org.apache.lucene.index.Term term = 135 new org.apache.lucene.index.Term(DOCUMENT_ID,id); 136 reader.delete(term); 137 }//for (remove all removed documents) 138 139 for (int i = 0; i<changed.size(); i++) { 140 gate.Document gateDoc = (gate.Document) changed.get(i); 141 String id = gateDoc.getLRPersistenceId().toString(); 142 org.apache.lucene.index.Term term = 143 new org.apache.lucene.index.Term(DOCUMENT_ID,id); 144 reader.delete(term); 145 }//for (remove all changed documents) 146 147 reader.close(); 148 149 IndexWriter writer = new IndexWriter(location, 150 new SimpleAnalyzer(), false); 151 152 for(int i = 0; i<added.size(); i++) { 153 gate.Document gateDoc = (gate.Document) added.get(i); 154 writer.addDocument(getLuceneDoc(gateDoc)); 155 }//for (add all added documents) 156 157 for(int i = 0; i<changed.size(); i++) { 158 gate.Document gateDoc = (gate.Document) changed.get(i); 159 writer.addDocument(getLuceneDoc(gateDoc)); 160 }//for (add all changed documents) 161 162 writer.close(); 163 } catch (java.io.IOException ioe) { 164 throw new IndexException(ioe.getMessage()); 165 } 166 } 167 168 private org.apache.lucene.document.Document getLuceneDoc(gate.Document gateDoc){ 169 org.apache.lucene.document.Document luceneDoc = 170 new org.apache.lucene.document.Document(); 171 Iterator fields = indexDefinition.getIndexFields(); 172 173 luceneDoc.add(Field.Keyword(DOCUMENT_ID, 174 gateDoc.getLRPersistenceId().toString())); 175 176 while (fields.hasNext()) { 177 IndexField field = (IndexField) fields.next(); 178 String valueForIndexing; 179 180 if (field.getReader() == null){ 181 valueForIndexing = gateDoc.getFeatures().get(field.getName()).toString(); 182 } else { 183 valueForIndexing = field.getReader().getPropertyValue(gateDoc); 184 } //if-else reader or feature 185 186 if (field.isPreseved()) { 187 luceneDoc.add(Field.Keyword(field.getName(),valueForIndexing)); 188 } else { 189 luceneDoc.add(Field.UnStored(field.getName(),valueForIndexing)); 190 } // if-else keyword or text 191 192 }// while (add all fields) 193 194 return luceneDoc; 195 } 196 197 public Corpus getCorpus() { 198 return corpus; 199 } 200 public void setCorpus(Corpus corpus) { 201 this.corpus = corpus; 202 } 203 public IndexDefinition getIndexDefinition() { 204 return indexDefinition; 205 } 206 public void setIndexDefinition(IndexDefinition indexDefinition) { 207 this.indexDefinition = indexDefinition; 208 } 209 210 }
|
LuceneIndexManager |
|