|
WebAnnie |
|
1 package gate.util.web; 2 3 import java.util.*; 4 import java.io.*; 5 import java.net.*; 6 7 import gate.*; 8 import gate.creole.*; 9 import gate.util.*; 10 import gate.gui.*; 11 12 import javax.servlet.*; 13 14 /** 15 * This class illustrates how to use ANNIE as a sausage machine 16 * in another application - put ingredients in one end (URLs pointing 17 * to documents) and get sausages (e.g. Named Entities) out the 18 * other end. 19 * <P><B>NOTE:</B><BR> 20 * For simplicity's sake, we don't do any exception handling. 21 */ 22 public class WebAnnie { 23 24 public static final String GATE_INIT_KEY = "gate.init"; 25 public static final String ANNIE_CONTROLLER_KEY = "annie.controller"; 26 27 /** The Corpus Pipeline application to contain ANNIE */ 28 private SerialAnalyserController annieController; 29 30 /** 31 * Initialise the ANNIE system. This creates a "corpus pipeline" 32 * application that can be used to run sets of documents through 33 * the extraction system. 34 */ 35 public void initAnnie() throws GateException { 36 37 // create a serial analyser controller to run ANNIE with 38 annieController = (SerialAnalyserController) 39 Factory.createResource("gate.creole.SerialAnalyserController", 40 Factory.newFeatureMap(), 41 Factory.newFeatureMap(), 42 "ANNIE_" + Gate.genSym() 43 ); 44 45 /* 46 "gate.creole.tokeniser.DefaultTokeniser", 47 "gate.creole.gazetteer.DefaultGazetteer", 48 "gate.creole.splitter.SentenceSplitter", 49 "gate.creole.POSTagger", 50 "gate.creole.ANNIETransducer", 51 "gate.creole.orthomatcher.OrthoMatcher" 52 */ 53 54 // load each PR as defined in ANNIEConstants 55 for (int i = 0; i < ANNIEConstants.PR_NAMES.length; i++) { 56 // use default parameters 57 FeatureMap params = Factory.newFeatureMap(); 58 ProcessingResource pr = (ProcessingResource) 59 Factory.createResource(ANNIEConstants.PR_NAMES[i], params); 60 61 // add the PR to the pipeline controller 62 annieController.add(pr); 63 } // for each ANNIE PR 64 65 } // initAnnie() 66 67 /** 68 * Run from the command-line, with a list of URLs as argument. 69 * <P><B>NOTE:</B><BR> 70 * This code will run with all the documents in memory - if you 71 * want to unload each from memory after use, add code to store 72 * the corpus in a DataStore. 73 */ 74 public String process(ServletContext app, String url, String[] annotations) 75 throws GateException, IOException { 76 77 if (app.getAttribute(GATE_INIT_KEY) == null) { 78 Gate.setLocalWebServer(false); 79 Gate.setNetConnected(false); 80 81 System.setProperty("java.protocol.handler.pkgs", 82 "gate.util.protocols"); 83 84 System.out.println("before gate init"); 85 System.out.println("Freemem: " + Runtime.getRuntime().freeMemory()); 86 87 // Do the deed 88 Gate.init(); 89 System.out.println("after gate init"); 90 System.out.println("Freemem: " + Runtime.getRuntime().freeMemory()); 91 92 app.setAttribute(GATE_INIT_KEY, "true"); 93 } 94 95 if (app.getAttribute(ANNIE_CONTROLLER_KEY) == null) { 96 // initialise ANNIE (this may take several minutes) 97 System.out.println("before annie init"); 98 System.out.println("Freemem: " + Runtime.getRuntime().freeMemory()); 99 this.initAnnie(); 100 System.out.println("after annie init"); 101 System.out.println("Freemem: " + Runtime.getRuntime().freeMemory()); 102 103 app.setAttribute(ANNIE_CONTROLLER_KEY, annieController); 104 } 105 else { 106 annieController = (SerialAnalyserController) 107 app.getAttribute(ANNIE_CONTROLLER_KEY); 108 } 109 110 111 // create a GATE corpus and add a document for each command-line 112 // argument 113 Corpus corpus = 114 (Corpus) Factory.createResource("gate.corpora.CorpusImpl"); 115 URL u = new URL(url); 116 FeatureMap params = Factory.newFeatureMap(); 117 params.put("sourceUrl", u); 118 119 Document doc = (Document) 120 Factory.createResource("gate.corpora.DocumentImpl", params); 121 corpus.add(doc); 122 123 // tell the pipeline about the corpus and run it 124 125 annieController.setCorpus(corpus); 126 annieController.execute(); 127 128 // Get XML marked up document 129 130 AnnotationSet defaultAnnotSet = doc.getAnnotations(); 131 Set annotTypesRequired = new HashSet(); 132 133 for (int i=0;i<annotations.length;i++) { 134 annotTypesRequired.add(annotations[i]); 135 } 136 AnnotationSet peopleAndPlaces = 137 defaultAnnotSet.get(annotTypesRequired); 138 return doc.toXml(peopleAndPlaces, true); 139 140 } // process 141 142 } // class WebAnnie 143
|
WebAnnie |
|