1
14
15 package gate.creole.ir.lucene;
16
17 import java.io.File;
18 import java.util.Iterator;
19 import java.util.List;
20
21 import org.apache.lucene.analysis.SimpleAnalyzer;
22 import org.apache.lucene.document.Field;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.index.IndexWriter;
25
26 import gate.Corpus;
27 import gate.creole.ir.*;
28 import gate.util.GateRuntimeException;
29
30
31 public class LuceneIndexManager implements IndexManager{
32
33
34 public final static String DOCUMENT_ID = "DOCUMENT_ID";
35
36
37 private IndexDefinition indexDefinition;
38
39
40 private Corpus corpus;
41
42
43 public LuceneIndexManager(){
44 }
45
46
48 public void createIndex() throws IndexException{
49 if(indexDefinition == null)
50 throw new GateRuntimeException("Index definition is null!");
51 if(corpus == null)
52 throw new GateRuntimeException("Corpus is null!");
53
54 String location = indexDefinition.getIndexLocation();
55 try {
56 File file = new File(location);
57 if (file.exists()){
58 if (file.isDirectory() && file.listFiles().length>0) {
59 throw new IndexException(location+ " is not empty directory");
60 }
61 if (!file.isDirectory()){
62 throw new IndexException("Only empty directory can be index path");
63 }
64 }
65
66 IndexWriter writer = new IndexWriter(location,
67 new SimpleAnalyzer(), true);
68
69 for(int i = 0; i<corpus.size(); i++) {
70 boolean isLoaded = corpus.isDocumentLoaded(i);
71 gate.Document gateDoc = (gate.Document) corpus.get(i);
72 writer.addDocument(getLuceneDoc(gateDoc));
73 if (!isLoaded) {
74 corpus.unloadDocument(gateDoc);
75 }
76 }
78 writer.close();
79 corpus.sync();
80 } catch (java.io.IOException ioe){
81 throw new IndexException(ioe.getMessage());
82 } catch (gate.persist.PersistenceException pe){
83 pe.printStackTrace();
84 } catch (gate.security.SecurityException se){
85 se.printStackTrace();
86 }
87 }
88
89
90 public void optimizeIndex() throws IndexException{
91 if(indexDefinition == null)
92 throw new GateRuntimeException("Index definition is null!");
93 try {
94 IndexWriter writer = new IndexWriter(indexDefinition.getIndexLocation(),
95 new SimpleAnalyzer(), false);
96 writer.optimize();
97 writer.close();
98 } catch (java.io.IOException ioe){
99 throw new IndexException(ioe.getMessage());
100 }
101 }
102
103
104 public void deleteIndex() throws IndexException{
105 if(indexDefinition == null)
106 throw new GateRuntimeException("Index definition is null!");
107 boolean isDeleted = true;
108 File dir = new File(indexDefinition.getIndexLocation());
109 if (dir.exists() && dir.isDirectory()) {
110 File[] files = dir.listFiles();
111 for (int i =0; i<files.length; i++){
112 File f = files[i];
113 isDeleted = f.delete();
114 }
115 }
116 dir.delete();
117 if (!isDeleted) {
118 throw new IndexException("Can't delete directory"
119 + indexDefinition.getIndexLocation());
120 }
121 }
122
123
125 public void sync(List added, List removedIDs, List changed) throws IndexException{
126 String location = indexDefinition.getIndexLocation();
127 try {
128
129 IndexReader reader = IndexReader.open(location);
130
131 for (int i = 0; i<removedIDs.size(); i++) {
132 String id = removedIDs.get(i).toString();
133 org.apache.lucene.index.Term term =
134 new org.apache.lucene.index.Term(DOCUMENT_ID,id);
135 reader.delete(term);
136 }
138 for (int i = 0; i<changed.size(); i++) {
139 gate.Document gateDoc = (gate.Document) changed.get(i);
140 String id = gateDoc.getLRPersistenceId().toString();
141 org.apache.lucene.index.Term term =
142 new org.apache.lucene.index.Term(DOCUMENT_ID,id);
143 reader.delete(term);
144 }
146 reader.close();
147
148 IndexWriter writer = new IndexWriter(location,
149 new SimpleAnalyzer(), false);
150
151 for(int i = 0; i<added.size(); i++) {
152 gate.Document gateDoc = (gate.Document) added.get(i);
153 writer.addDocument(getLuceneDoc(gateDoc));
154 }
156 for(int i = 0; i<changed.size(); i++) {
157 gate.Document gateDoc = (gate.Document) changed.get(i);
158 writer.addDocument(getLuceneDoc(gateDoc));
159 }
161 writer.close();
162 } catch (java.io.IOException ioe) {
163 throw new IndexException(ioe.getMessage());
164 }
165 }
166
167 private org.apache.lucene.document.Document getLuceneDoc(gate.Document gateDoc){
168 org.apache.lucene.document.Document luceneDoc =
169 new org.apache.lucene.document.Document();
170 Iterator fields = indexDefinition.getIndexFields();
171
172 luceneDoc.add(Field.Keyword(DOCUMENT_ID,
173 gateDoc.getLRPersistenceId().toString()));
174
175 while (fields.hasNext()) {
176 IndexField field = (IndexField) fields.next();
177 String valueForIndexing;
178
179 if (field.getReader() == null){
180 valueForIndexing = gateDoc.getFeatures().get(field.getName()).toString();
181 } else {
182 valueForIndexing = field.getReader().getPropertyValue(gateDoc);
183 }
185 if (field.isPreseved()) {
186 luceneDoc.add(Field.Keyword(field.getName(),valueForIndexing));
187 } else {
188 luceneDoc.add(Field.UnStored(field.getName(),valueForIndexing));
189 }
191 }
193 return luceneDoc;
194 }
195
196 public Corpus getCorpus() {
197 return corpus;
198 }
199 public void setCorpus(Corpus corpus) {
200 this.corpus = corpus;
201 }
202 public IndexDefinition getIndexDefinition() {
203 return indexDefinition;
204 }
205 public void setIndexDefinition(IndexDefinition indexDefinition) {
206 this.indexDefinition = indexDefinition;
207 }
208
209 }