1   package gate.util.web;
2   
3   import java.io.IOException;
4   import java.net.MalformedURLException;
5   import java.net.URL;
6   import java.util.*;
7   
8   import javax.servlet.ServletContext;
9   
10  import gate.*;
11  import gate.annotation.AnnotationSetImpl;
12  import gate.corpora.RepositioningInfo;
13  import gate.creole.SerialAnalyserController;
14  import gate.util.GateException;
15  
16  
17  public class WebCrimeReportAnalyser {
18  
19      public static final String SOCIS_CONTROLLER_KEY = "socis.controller";
20      public static final String GATE_INIT_KEY = "gate.init";
21  
22      
23      public String filePath = "";
24      
25      private SerialAnalyserController controller;
26  
27      public void initCrimeReportAnalyser() throws GateException {
28          controller = (SerialAnalyserController)
29              Factory.createResource("gate.creole.SerialAnalyserController",
30                                     Factory.newFeatureMap(),
31                                     Factory.newFeatureMap(),
32                                     "Crime Report Analyser");
33  
34          ProcessingResource tokeniser = (ProcessingResource)
35              Factory.createResource("gate.creole.tokeniser.DefaultTokeniser",
36                                     Factory.newFeatureMap());
37  
38          controller.add(tokeniser);
39          
40          ProcessingResource split = (ProcessingResource)
41              Factory.createResource("gate.creole.splitter.SentenceSplitter",
42                                     Factory.newFeatureMap());
43          
44          controller.add(split);
45          
46          ProcessingResource postagger = (ProcessingResource)
47              Factory.createResource("gate.creole.POSTagger",
48                                     Factory.newFeatureMap());
49          
50          controller.add(postagger);
51         
52          /* ProcessingResource prechunking = (ProcessingResource)
53              Factory.createResource("chunking.PreChunking",
54                                     Factory.newFeatureMap());
55          
56          controller.add(prechunking);
57               System.out.println("after tokeniser");
58              System.out.println("Freemem: " + Runtime.getRuntime().freeMemory());
59         
60          FeatureMap fm = Factory.newFeatureMap();
61          fm.put("inputASName","ChunkAnnotations");
62          fm.put("outputASName","ChunkAnnotations");
63  
64          try {
65              URL urlnp = new URL("jar:file:" + filePath + "files.jar!/resources/grammars/Chunk/mainNPChunk.jape");
66              fm.put("grammarURL",urlnp);
67          } catch(MalformedURLException e) {
68              
69              e.printStackTrace();
70          }
71          
72          ProcessingResource npchunk = (ProcessingResource)
73              Factory.createResource("gate.creole.ANNIETransducer",
74                                     fm);
75          
76          controller.add(npchunk);
77                  
78          FeatureMap fm1 = Factory.newFeatureMap();
79          fm1.put("inputASName","ChunkAnnotations");
80          fm1.put("outputASName","ChunkAnnotations");
81  
82          try {
83              URL urlvp = new URL("jar:file:" + filePath + "files.jar!/resources/grammars/Chunk/mainNPChunk.jape");
84              fm1.put("grammarURL",urlvp);
85          } catch(MalformedURLException e) {
86              e.printStackTrace();
87          }
88          
89          ProcessingResource vpchunk = (ProcessingResource)
90              Factory.createResource("gate.creole.ANNIETransducer",
91                                     fm1);
92          
93          controller.add(vpchunk);*/
94          
95          FeatureMap fm_gaz = Factory.newFeatureMap();
96          fm_gaz.put("encoding","ISO-8859-1");
97  
98          try {
99              URL urlgaz = new URL("jar:file:" + filePath + "files.jar!/resources/gazetters/general/lists.def");
100             fm_gaz.put("listsURL",urlgaz);
101         } catch(MalformedURLException e) {
102             e.printStackTrace();
103         }
104         
105         ProcessingResource gazetteer = (ProcessingResource)
106             Factory.createResource("gate.creole.gazetteer.DefaultGazetteer",
107                                    fm_gaz);
108         
109         controller.add(gazetteer);        
110 
111         FeatureMap fm_gra = Factory.newFeatureMap();
112         
113         try {
114             URL urlgra = new URL("jar:file:" + filePath + "files.jar!/resources/grammars/NamedEntities/socismain.jape");
115             fm_gra.put("grammarURL",urlgra);
116         } catch(MalformedURLException e) {
117             e.printStackTrace();
118         }
119         
120         ProcessingResource grammar = (ProcessingResource)
121             Factory.createResource("gate.creole.ANNIETransducer",
122                                    fm_gra);
123         
124         controller.add(grammar);
125         
126     } // initIndexAnalyser()
127     
128     public String process(ServletContext app, String url, String[] annotations)
129         throws GateException, IOException {
130 
131         long start;
132 
133         // Is this the first time a gate demo has been run? If so, 
134         // initiali[s|z]e gate. It's a very heavy process, so only do
135         // it once.
136 
137         if (app.getAttribute(GATE_INIT_KEY) == null) {
138             Gate.setLocalWebServer(false);
139             Gate.setNetConnected(false);
140 
141             System.setProperty("java.protocol.handler.pkgs",
142                                "gate.util.protocols");
143             
144             // Do the deed
145             Gate.init();
146 
147             app.setAttribute(GATE_INIT_KEY, "true");
148         }
149 
150         // Now do the same for the SOCIS controller
151 
152         if (app.getAttribute(SOCIS_CONTROLLER_KEY) == null) {
153 
154             CreoleRegister reg = Gate.getCreoleRegister();
155 
156             filePath = app.getInitParameter("files.path");
157             
158 //            URL filesURL = new URL("jar:file:" + filePath + "files.jar!/");
159 //            try {
160 //                reg.registerDirectories(filesURL);
161 //            } catch(GateException e) {
162 //                System.out.println(e.getMessage());
163 //            }
164             
165             initCrimeReportAnalyser();
166 
167             app.setAttribute(SOCIS_CONTROLLER_KEY, controller);
168         }
169         else {
170             // The SOCIS demo has already run, so take the existing
171             // controller from the application attribute hash
172             
173             controller = (SerialAnalyserController) 
174                 app.getAttribute(SOCIS_CONTROLLER_KEY);
175         }
176 
177         Corpus corpus =
178             (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
179 
180         /* here the url specified by the user */
181         URL textURL = new URL(url);
182         
183         FeatureMap params = Factory.newFeatureMap();
184         params.put("sourceUrl", textURL);
185         params.put("preserveOriginalContent", new Boolean(true));
186         params.put("collectRepositioningInfo", new Boolean(true));
187         
188         Document doc = (Document)
189             Factory.createResource("gate.corpora.DocumentImpl",params);
190 
191         corpus.add(doc);
192         
193         controller.setCorpus(corpus);
194         controller.execute();
195         
196         AnnotationSet defaultAnnotSet = doc.getAnnotations();
197         AnnotationSet chunkAnnotSet = doc.getAnnotations("ChunkAnnotations");
198         Set annotTypesRequired = new HashSet();
199         Set chunkTypesRequired = new HashSet();
200 
201         for (int i=0;i<annotations.length;i++) {
202             annotTypesRequired.add(annotations[i]);
203         }
204 
205         /* socis stuff */
206         /*annotTypesRequired.add("Location");
207         annotTypesRequired.add("Time");
208         annotTypesRequired.add("Organization");
209         annotTypesRequired.add("Person");
210         annotTypesRequired.add("Id_No");
211         annotTypesRequired.add("Date");
212         annotTypesRequired.add("Money");
213         annotTypesRequired.add("Percent");
214         annotTypesRequired.add("Conv_make");
215         annotTypesRequired.add("Offence");
216         annotTypesRequired.add("Age");
217         annotTypesRequired.add("Drug");
218         annotTypesRequired.add("Address"); */
219 
220         /* required chunks */
221         /*
222         chunkTypesRequired.add("NPCHUNK");
223         chunkTypesRequired.add("VPCHUNK"); */
224         
225         AnnotationSet socis = defaultAnnotSet.get(annotTypesRequired);
226 
227         //AnnotationSet chunks = chunkAnnotSet.get(chunkTypesRequired);
228         
229         FeatureMap features = doc.getFeatures();
230         String originalContent = (String)
231             features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
232 
233         RepositioningInfo info = (RepositioningInfo)
234             features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);
235         
236         Annotation currAnnot;
237         SortedAnnotationList sortedAnnotationsNamedEntities =
238             new SortedAnnotationList();
239         
240         // The AnnotationSet socis can be null if no annotations have
241         // been found
242         if (socis != null) {
243             Iterator it = socis.iterator();
244             while(it.hasNext()) {
245                 currAnnot = (Annotation) it.next();
246                 sortedAnnotationsNamedEntities.addSortedExclusive(currAnnot);
247             }
248         }
249         
250         AnnotationSet uniqueNamedEntities =
251             new AnnotationSetImpl(doc);
252         
253         uniqueNamedEntities.addAll(sortedAnnotationsNamedEntities);
254         
255         SortedAnnotationList sortedAnnotationsChunks =
256             new SortedAnnotationList();
257         
258         /*it = chunks.iterator();
259         while(it.hasNext()) {
260             currAnnot = (Annotation) it.next();
261             sortedAnnotationsChunks.addSortedExclusive(currAnnot);
262             } //while
263         
264         AnnotationSet uniqueChunks = new AnnotationSetImpl((Document) null);
265         
266         uniqueChunks.addAll(sortedAnnotationsChunks); */
267 
268         String xmlDocumentNamedEntities = doc.toXml(uniqueNamedEntities, true);
269         //String xmlDocumentChunks = doc.toXml(uniqueChunks,true);
270         
271         //delete the used resources 
272         Factory.deleteResource(doc);
273         Factory.deleteResource(corpus);
274         return xmlDocumentNamedEntities;
275         
276     }
277     
278     public static class SortedAnnotationList extends Vector {
279         
280         public SortedAnnotationList() {
281             super();
282         }
283         public boolean addSortedExclusive(Annotation annot) {
284             Annotation currAnnot = null;
285             for(int i=0; i<size() ; ++i) {
286                 currAnnot = (Annotation) get(i);
287                 if(annot.overlaps(currAnnot)) {
288                     return false;
289                     
290                 } //if
291                 
292             } //for
293             long annotStart = annot.getStartNode().getOffset().longValue();
294             long currStart;
295             for (int i=0; i < size(); ++i) {
296                 currAnnot = (Annotation) get(i);
297                 currStart = currAnnot.getStartNode().getOffset().longValue();
298                 if(annotStart < currStart) {
299                     insertElementAt(annot, i);
300                     return true;
301                     
302                 } //if
303                 
304             } //for
305             
306             int size = size();
307             insertElementAt(annot, size);
308             return true;
309         } //addSortedExclusive
310         
311     } //SortedAnnotationList
312     
313 }
314 
315