1   package gate.util.web;
2   
3   import java.util.*;
4   import java.io.*;
5   import java.net.*;
6   
7   import gate.*;
8   import gate.creole.*;
9   import gate.util.*;
10  import gate.gui.*;
11  
12  import javax.servlet.*;
13  
14  /**
15   * This class illustrates how to use ANNIE as a sausage machine
16   * in another application - put ingredients in one end (URLs pointing
17   * to documents) and get sausages (e.g. Named Entities) out the
18   * other end.
19   * <P><B>NOTE:</B><BR>
20   * For simplicity's sake, we don't do any exception handling.
21   */
22  public class WebAnnie  {
23      
24      public static final String GATE_INIT_KEY = "gate.init";
25      public static final String ANNIE_CONTROLLER_KEY = "annie.controller";
26  
27      /** The Corpus Pipeline application to contain ANNIE */
28      private SerialAnalyserController annieController;
29      
30      /**
31       * Initialise the ANNIE system. This creates a "corpus pipeline"
32       * application that can be used to run sets of documents through
33       * the extraction system.
34       */
35      public void initAnnie() throws GateException {
36          
37          // create a serial analyser controller to run ANNIE with
38          annieController = (SerialAnalyserController)
39              Factory.createResource("gate.creole.SerialAnalyserController",
40                                     Factory.newFeatureMap(),
41                                     Factory.newFeatureMap(),
42                                     "ANNIE_" + Gate.genSym()
43                                     );
44  
45          /*
46      "gate.creole.tokeniser.DefaultTokeniser",
47      "gate.creole.gazetteer.DefaultGazetteer",
48      "gate.creole.splitter.SentenceSplitter",
49      "gate.creole.POSTagger",
50      "gate.creole.ANNIETransducer",
51      "gate.creole.orthomatcher.OrthoMatcher"
52          */
53          
54          // load each PR as defined in ANNIEConstants
55          for (int i = 0; i < ANNIEConstants.PR_NAMES.length; i++) {
56              // use default parameters
57              FeatureMap params = Factory.newFeatureMap(); 
58              ProcessingResource pr = (ProcessingResource)
59                  Factory.createResource(ANNIEConstants.PR_NAMES[i], params);
60              
61              // add the PR to the pipeline controller
62              annieController.add(pr);
63          } // for each ANNIE PR
64          
65      } // initAnnie()
66      
67      /**
68       * Run from the command-line, with a list of URLs as argument.
69       * <P><B>NOTE:</B><BR>
70       * This code will run with all the documents in memory - if you
71       * want to unload each from memory after use, add code to store
72       * the corpus in a DataStore.
73       */
74      public String process(ServletContext app, String url, String[] annotations)
75          throws GateException, IOException {
76  
77          if (app.getAttribute(GATE_INIT_KEY) == null) {
78              Gate.setLocalWebServer(false);
79              Gate.setNetConnected(false);
80  
81              System.setProperty("java.protocol.handler.pkgs",
82                                 "gate.util.protocols");
83              
84              System.out.println("before gate init");
85              System.out.println("Freemem: " + Runtime.getRuntime().freeMemory());
86  
87              // Do the deed
88              Gate.init();
89              System.out.println("after gate init");
90              System.out.println("Freemem: " + Runtime.getRuntime().freeMemory());
91  
92              app.setAttribute(GATE_INIT_KEY, "true");
93          }
94  
95          if (app.getAttribute(ANNIE_CONTROLLER_KEY) == null) {
96              // initialise ANNIE (this may take several minutes)
97              System.out.println("before annie init");
98              System.out.println("Freemem: " + Runtime.getRuntime().freeMemory());
99              this.initAnnie();
100             System.out.println("after annie init");
101             System.out.println("Freemem: " + Runtime.getRuntime().freeMemory());
102 
103             app.setAttribute(ANNIE_CONTROLLER_KEY, annieController);
104         }
105         else {
106             annieController = (SerialAnalyserController) 
107                 app.getAttribute(ANNIE_CONTROLLER_KEY);
108         }
109 
110         
111         // create a GATE corpus and add a document for each command-line
112         // argument
113         Corpus corpus =
114             (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
115         URL u = new URL(url);
116         FeatureMap params = Factory.newFeatureMap();
117         params.put("sourceUrl", u);
118 
119         Document doc = (Document)
120             Factory.createResource("gate.corpora.DocumentImpl", params);
121         corpus.add(doc);
122         
123         // tell the pipeline about the corpus and run it
124 
125         annieController.setCorpus(corpus);
126         annieController.execute();
127         
128         // Get XML marked up document
129 
130         AnnotationSet defaultAnnotSet = doc.getAnnotations();
131         Set annotTypesRequired = new HashSet();
132 
133         for (int i=0;i<annotations.length;i++) {
134             annotTypesRequired.add(annotations[i]);
135         }
136         AnnotationSet peopleAndPlaces =
137             defaultAnnotSet.get(annotTypesRequired);
138         return doc.toXml(peopleAndPlaces, true);
139 
140     } // process
141     
142 } // class WebAnnie
143