|
WebCrimeReportAnalyser |
|
1 package gate.util.web; 2 3 import gate.*; 4 import gate.creole.*; 5 import gate.util.*; 6 import gate.corpora.RepositioningInfo; 7 import gate.annotation.*; 8 9 import java.util.*; 10 import java.net.*; 11 import java.util.jar.*; 12 import java.io.IOException; 13 14 import javax.servlet.*; 15 16 17 public class WebCrimeReportAnalyser { 18 19 public static final String SOCIS_CONTROLLER_KEY = "socis.controller"; 20 public static final String GATE_INIT_KEY = "gate.init"; 21 22 23 public String filePath = ""; 24 25 private SerialAnalyserController controller; 26 27 public void initCrimeReportAnalyser() throws GateException { 28 controller = (SerialAnalyserController) 29 Factory.createResource("gate.creole.SerialAnalyserController", 30 Factory.newFeatureMap(), 31 Factory.newFeatureMap(), 32 "Crime Report Analyser"); 33 34 ProcessingResource tokeniser = (ProcessingResource) 35 Factory.createResource("gate.creole.tokeniser.DefaultTokeniser", 36 Factory.newFeatureMap()); 37 38 controller.add(tokeniser); 39 40 ProcessingResource split = (ProcessingResource) 41 Factory.createResource("gate.creole.splitter.SentenceSplitter", 42 Factory.newFeatureMap()); 43 44 controller.add(split); 45 46 ProcessingResource postagger = (ProcessingResource) 47 Factory.createResource("gate.creole.POSTagger", 48 Factory.newFeatureMap()); 49 50 controller.add(postagger); 51 52 /* ProcessingResource prechunking = (ProcessingResource) 53 Factory.createResource("chunking.PreChunking", 54 Factory.newFeatureMap()); 55 56 controller.add(prechunking); 57 System.out.println("after tokeniser"); 58 System.out.println("Freemem: " + Runtime.getRuntime().freeMemory()); 59 60 FeatureMap fm = Factory.newFeatureMap(); 61 fm.put("inputASName","ChunkAnnotations"); 62 fm.put("outputASName","ChunkAnnotations"); 63 64 try { 65 URL urlnp = new URL("jar:file:" + filePath + "files.jar!/resources/grammars/Chunk/mainNPChunk.jape"); 66 fm.put("grammarURL",urlnp); 67 } catch(MalformedURLException e) { 68 69 e.printStackTrace(); 70 } 71 72 ProcessingResource npchunk = (ProcessingResource) 73 Factory.createResource("gate.creole.ANNIETransducer", 74 fm); 75 76 controller.add(npchunk); 77 78 FeatureMap fm1 = Factory.newFeatureMap(); 79 fm1.put("inputASName","ChunkAnnotations"); 80 fm1.put("outputASName","ChunkAnnotations"); 81 82 try { 83 URL urlvp = new URL("jar:file:" + filePath + "files.jar!/resources/grammars/Chunk/mainNPChunk.jape"); 84 fm1.put("grammarURL",urlvp); 85 } catch(MalformedURLException e) { 86 e.printStackTrace(); 87 } 88 89 ProcessingResource vpchunk = (ProcessingResource) 90 Factory.createResource("gate.creole.ANNIETransducer", 91 fm1); 92 93 controller.add(vpchunk);*/ 94 95 FeatureMap fm_gaz = Factory.newFeatureMap(); 96 fm_gaz.put("encoding","ISO-8859-1"); 97 98 try { 99 URL urlgaz = new URL("jar:file:" + filePath + "files.jar!/resources/gazetters/general/lists.def"); 100 fm_gaz.put("listsURL",urlgaz); 101 } catch(MalformedURLException e) { 102 e.printStackTrace(); 103 } 104 105 ProcessingResource gazetteer = (ProcessingResource) 106 Factory.createResource("gate.creole.gazetteer.DefaultGazetteer", 107 fm_gaz); 108 109 controller.add(gazetteer); 110 111 FeatureMap fm_gra = Factory.newFeatureMap(); 112 113 try { 114 URL urlgra = new URL("jar:file:" + filePath + "files.jar!/resources/grammars/NamedEntities/socismain.jape"); 115 fm_gra.put("grammarURL",urlgra); 116 } catch(MalformedURLException e) { 117 e.printStackTrace(); 118 } 119 120 ProcessingResource grammar = (ProcessingResource) 121 Factory.createResource("gate.creole.ANNIETransducer", 122 fm_gra); 123 124 controller.add(grammar); 125 126 } // initIndexAnalyser() 127 128 public String process(ServletContext app, String url, String[] annotations) 129 throws GateException, IOException { 130 131 long start; 132 133 // Is this the first time a gate demo has been run? If so, 134 // initiali[s|z]e gate. It's a very heavy process, so only do 135 // it once. 136 137 if (app.getAttribute(GATE_INIT_KEY) == null) { 138 Gate.setLocalWebServer(false); 139 Gate.setNetConnected(false); 140 141 System.setProperty("java.protocol.handler.pkgs", 142 "gate.util.protocols"); 143 144 // Do the deed 145 Gate.init(); 146 147 app.setAttribute(GATE_INIT_KEY, "true"); 148 } 149 150 // Now do the same for the SOCIS controller 151 152 if (app.getAttribute(SOCIS_CONTROLLER_KEY) == null) { 153 154 CreoleRegister reg = Gate.getCreoleRegister(); 155 156 filePath = app.getInitParameter("files.path"); 157 158 URL filesURL = new URL("jar:file:" + filePath + "files.jar!/"); 159 try { 160 reg.registerDirectories(filesURL); 161 } catch(GateException e) { 162 System.out.println(e.getMessage()); 163 } 164 165 initCrimeReportAnalyser(); 166 167 app.setAttribute(SOCIS_CONTROLLER_KEY, controller); 168 } 169 else { 170 // The SOCIS demo has already run, so take the existing 171 // controller from the application attribute hash 172 173 controller = (SerialAnalyserController) 174 app.getAttribute(SOCIS_CONTROLLER_KEY); 175 } 176 177 Corpus corpus = 178 (Corpus) Factory.createResource("gate.corpora.CorpusImpl"); 179 180 /* here the url specified by the user */ 181 URL textURL = new URL(url); 182 183 FeatureMap params = Factory.newFeatureMap(); 184 params.put("sourceUrl", textURL); 185 params.put("preserveOriginalContent", new Boolean(true)); 186 params.put("collectRepositioningInfo", new Boolean(true)); 187 188 Document doc = (Document) 189 Factory.createResource("gate.corpora.DocumentImpl",params); 190 191 corpus.add(doc); 192 193 controller.setCorpus(corpus); 194 controller.execute(); 195 196 AnnotationSet defaultAnnotSet = doc.getAnnotations(); 197 AnnotationSet chunkAnnotSet = doc.getAnnotations("ChunkAnnotations"); 198 Set annotTypesRequired = new HashSet(); 199 Set chunkTypesRequired = new HashSet(); 200 201 for (int i=0;i<annotations.length;i++) { 202 annotTypesRequired.add(annotations[i]); 203 } 204 205 /* socis stuff */ 206 /*annotTypesRequired.add("Location"); 207 annotTypesRequired.add("Time"); 208 annotTypesRequired.add("Organization"); 209 annotTypesRequired.add("Person"); 210 annotTypesRequired.add("Id_No"); 211 annotTypesRequired.add("Date"); 212 annotTypesRequired.add("Money"); 213 annotTypesRequired.add("Percent"); 214 annotTypesRequired.add("Conv_make"); 215 annotTypesRequired.add("Offence"); 216 annotTypesRequired.add("Age"); 217 annotTypesRequired.add("Drug"); 218 annotTypesRequired.add("Address"); */ 219 220 /* required chunks */ 221 /* 222 chunkTypesRequired.add("NPCHUNK"); 223 chunkTypesRequired.add("VPCHUNK"); */ 224 225 AnnotationSet socis = defaultAnnotSet.get(annotTypesRequired); 226 //AnnotationSet chunks = chunkAnnotSet.get(chunkTypesRequired); 227 228 FeatureMap features = doc.getFeatures(); 229 String originalContent = (String) 230 features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); 231 232 RepositioningInfo info = (RepositioningInfo) 233 features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME); 234 235 Iterator it = socis.iterator(); 236 Annotation currAnnot; 237 SortedAnnotationList sortedAnnotationsNamedEntities = 238 new SortedAnnotationList(); 239 240 while(it.hasNext()) { 241 currAnnot = (Annotation) it.next(); 242 sortedAnnotationsNamedEntities.addSortedExclusive(currAnnot); 243 } 244 245 AnnotationSet uniqueNamedEntities = 246 new AnnotationSetImpl((Document) null); 247 248 uniqueNamedEntities.addAll(sortedAnnotationsNamedEntities); 249 250 SortedAnnotationList sortedAnnotationsChunks = 251 new SortedAnnotationList(); 252 253 /*it = chunks.iterator(); 254 while(it.hasNext()) { 255 currAnnot = (Annotation) it.next(); 256 sortedAnnotationsChunks.addSortedExclusive(currAnnot); 257 } //while 258 259 AnnotationSet uniqueChunks = new AnnotationSetImpl((Document) null); 260 261 uniqueChunks.addAll(sortedAnnotationsChunks); */ 262 263 String xmlDocumentNamedEntities = doc.toXml(uniqueNamedEntities, true); 264 //String xmlDocumentChunks = doc.toXml(uniqueChunks,true); 265 return xmlDocumentNamedEntities; 266 267 } 268 269 public static class SortedAnnotationList extends Vector { 270 271 public SortedAnnotationList() { 272 super(); 273 } 274 public boolean addSortedExclusive(Annotation annot) { 275 Annotation currAnnot = null; 276 for(int i=0; i<size() ; ++i) { 277 currAnnot = (Annotation) get(i); 278 if(annot.overlaps(currAnnot)) { 279 return false; 280 281 } //if 282 283 } //for 284 long annotStart = annot.getStartNode().getOffset().longValue(); 285 long currStart; 286 for (int i=0; i < size(); ++i) { 287 currAnnot = (Annotation) get(i); 288 currStart = currAnnot.getStartNode().getOffset().longValue(); 289 if(annotStart < currStart) { 290 insertElementAt(annot, i); 291 return true; 292 293 } //if 294 295 } //for 296 297 int size = size(); 298 insertElementAt(annot, size); 299 return true; 300 } //addSortedExclusive 301 302 } //SortedAnnotationList 303 304 } 305 306
|
WebCrimeReportAnalyser |
|