1
14
15 package gate.creole.ir.lucene;
16
17 import java.io.File;
18 import java.util.Iterator;
19 import java.util.List;
20
21 import org.apache.lucene.analysis.SimpleAnalyzer;
22 import org.apache.lucene.document.Field;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.index.IndexWriter;
25
26 import gate.Corpus;
27 import gate.creole.ir.*;
28 import gate.util.GateRuntimeException;
29
30
31 public class LuceneIndexManager implements IndexManager{
32
33
34 public final static String DOCUMENT_ID = "DOCUMENT_ID";
35
36
37 private IndexDefinition indexDefinition;
38
39
40 private Corpus corpus;
41
42
43
44 public final static String CORPUS_INDEX_FEATURE = "CorpusIndexFeature";
45 public final static String CORPUS_INDEX_FEATURE_VALUE = "IR";
46
47
48
49 public LuceneIndexManager(){
50 }
51
52
54 public void createIndex() throws IndexException{
55 if(indexDefinition == null)
56 throw new GateRuntimeException("Index definition is null!");
57 if(corpus == null)
58 throw new GateRuntimeException("Corpus is null!");
59
60 String location = indexDefinition.getIndexLocation();
61 try {
62 File file = new File(location);
63 if (file.exists()){
64 if (file.isDirectory() && file.listFiles().length>0) {
65 throw new IndexException(location+ " is not empty directory");
66 }
67 if (!file.isDirectory()){
68 throw new IndexException("Only empty directory can be index path");
69 }
70 }
71
72
73 corpus.getFeatures().put(CORPUS_INDEX_FEATURE, CORPUS_INDEX_FEATURE_VALUE);
75
76
77 IndexWriter writer = new IndexWriter(location,
78 new SimpleAnalyzer(), true);
79
80 for(int i = 0; i<corpus.size(); i++) {
81 boolean isLoaded = corpus.isDocumentLoaded(i);
82 gate.Document gateDoc = (gate.Document) corpus.get(i);
83 writer.addDocument(getLuceneDoc(gateDoc));
84 if (!isLoaded) {
85 corpus.unloadDocument(gateDoc);
86 }
87 }
89 writer.close();
90 corpus.sync();
91 } catch (java.io.IOException ioe){
92 throw new IndexException(ioe.getMessage());
93 } catch (gate.persist.PersistenceException pe){
94 pe.printStackTrace();
95 } catch (gate.security.SecurityException se){
96 se.printStackTrace();
97 }
98 }
99
100
101 public void optimizeIndex() throws IndexException{
102 if(indexDefinition == null)
103 throw new GateRuntimeException("Index definition is null!");
104 try {
105 IndexWriter writer = new IndexWriter(indexDefinition.getIndexLocation(),
106 new SimpleAnalyzer(), false);
107 writer.optimize();
108 writer.close();
109 } catch (java.io.IOException ioe){
110 throw new IndexException(ioe.getMessage());
111 }
112 }
113
114
115 public void deleteIndex() throws IndexException{
116 if(indexDefinition == null)
117 throw new GateRuntimeException("Index definition is null!");
118 boolean isDeleted = true;
119 File dir = new File(indexDefinition.getIndexLocation());
120 if (dir.exists() && dir.isDirectory()) {
121 File[] files = dir.listFiles();
122 for (int i =0; i<files.length; i++){
123 File f = files[i];
124 isDeleted = f.delete();
125 }
126 }
127 dir.delete();
128 if (!isDeleted) {
129 throw new IndexException("Can't delete directory"
130 + indexDefinition.getIndexLocation());
131 }
132 }
133
134
136 public void sync(List added, List removedIDs, List changed) throws IndexException{
137 String location = indexDefinition.getIndexLocation();
138 try {
139
140 IndexReader reader = IndexReader.open(location);
141
142 for (int i = 0; i<removedIDs.size(); i++) {
143 String id = removedIDs.get(i).toString();
144 org.apache.lucene.index.Term term =
145 new org.apache.lucene.index.Term(DOCUMENT_ID,id);
146 reader.delete(term);
147 }
149 for (int i = 0; i<changed.size(); i++) {
150 gate.Document gateDoc = (gate.Document) changed.get(i);
151 String id = gateDoc.getLRPersistenceId().toString();
152 org.apache.lucene.index.Term term =
153 new org.apache.lucene.index.Term(DOCUMENT_ID,id);
154 reader.delete(term);
155 }
157 reader.close();
158
159 IndexWriter writer = new IndexWriter(location,
160 new SimpleAnalyzer(), false);
161
162 for(int i = 0; i<added.size(); i++) {
163 gate.Document gateDoc = (gate.Document) added.get(i);
164 writer.addDocument(getLuceneDoc(gateDoc));
165 }
167 for(int i = 0; i<changed.size(); i++) {
168 gate.Document gateDoc = (gate.Document) changed.get(i);
169 writer.addDocument(getLuceneDoc(gateDoc));
170 }
172 writer.close();
173 } catch (java.io.IOException ioe) {
174 throw new IndexException(ioe.getMessage());
175 }
176 }
177
178 private org.apache.lucene.document.Document getLuceneDoc(gate.Document gateDoc){
179 org.apache.lucene.document.Document luceneDoc =
180 new org.apache.lucene.document.Document();
181 Iterator fields = indexDefinition.getIndexFields();
182
183 luceneDoc.add(Field.Keyword(DOCUMENT_ID,
184 gateDoc.getLRPersistenceId().toString()));
185
186 while (fields.hasNext()) {
187 IndexField field = (IndexField) fields.next();
188 String valueForIndexing;
189
190 if (field.getReader() == null){
191 valueForIndexing = gateDoc.getFeatures().get(field.getName()).toString();
192 } else {
193 valueForIndexing = field.getReader().getPropertyValue(gateDoc);
194 }
196 if (field.isPreseved()) {
197 luceneDoc.add(Field.Keyword(field.getName(),valueForIndexing));
198 } else {
199 luceneDoc.add(Field.UnStored(field.getName(),valueForIndexing));
200 }
202 }
204 return luceneDoc;
205 }
206
207 public Corpus getCorpus() {
208 return corpus;
209 }
210 public void setCorpus(Corpus corpus) {
211 this.corpus = corpus;
212 }
213 public IndexDefinition getIndexDefinition() {
214 return indexDefinition;
215 }
216 public void setIndexDefinition(IndexDefinition indexDefinition) {
217 this.indexDefinition = indexDefinition;
218 }
219
220 }