1   package gate.util.web;
2   
3   import java.io.IOException;
4   import java.net.MalformedURLException;
5   import java.net.URL;
6   import java.util.HashSet;
7   import java.util.Set;
8   
9   import javax.servlet.ServletContext;
10  
11  import gate.*;
12  import gate.creole.SerialAnalyserController;
13  import gate.util.GateException;
14  
15  /**
16   * This class is designed to demonstrate ANNIE in a web context. It should be
17   * called from either a servlet or a JSP.
18   */
19  public class WebAnnie  {
20      
21      public static final String GATE_INIT_KEY = "gate.init";
22      public static final String ANNIE_CONTROLLER_KEY = "annie.controller";
23  
24      /** The Corpus Pipeline application to contain ANNIE */
25      private SerialAnalyserController annieController;
26      
27      private String filePath = "";
28  
29      /**
30       * Initialise the ANNIE system. This creates a "corpus pipeline"
31       * application that can be used to run sets of documents through
32       * the extraction system.
33       */
34      private void initAnnie() throws GateException {
35          
36          // create a serial analyser controller to run ANNIE with
37          annieController = (SerialAnalyserController)
38              Factory.createResource("gate.creole.SerialAnalyserController",
39                                     Factory.newFeatureMap(),
40                                     Factory.newFeatureMap(),
41                                     "ANNIE_" + Gate.genSym()
42                                     );
43          
44          // Load tokenizer
45          ProcessingResource tokeniser = (ProcessingResource)
46              Factory.createResource("gate.creole.tokeniser.DefaultTokeniser",
47                                     Factory.newFeatureMap());
48          
49          annieController.add(tokeniser);
50          
51          // Load sentence splitter
52          ProcessingResource split = (ProcessingResource)
53              Factory.createResource("gate.creole.splitter.SentenceSplitter",
54                                     Factory.newFeatureMap());
55          
56          annieController.add(split);
57          
58          // Load POS tagger
59          ProcessingResource postagger = (ProcessingResource)
60              Factory.createResource("gate.creole.POSTagger",
61                                     Factory.newFeatureMap());
62          
63          annieController.add(postagger);
64  
65  
66          // Load Gazetteer -- this is a two step process
67          FeatureMap gazetteerFeatures = Factory.newFeatureMap();
68          gazetteerFeatures.put("encoding","ISO-8859-1");
69  
70          // Step one: Locate the gazetteer file
71          try {
72              URL gazetteerURL =
73                  new URL("jar:file:" + filePath +
74                          "muse.jar!/muse/resources/gazetteer/lists.def");
75              gazetteerFeatures.put("listsURL", gazetteerURL);
76          } catch(MalformedURLException e) {
77              e.printStackTrace();
78          }
79          
80          // Step two: Load the gazetteer from the file
81          ProcessingResource gazetteer = (ProcessingResource)
82              Factory.createResource("gate.creole.gazetteer.DefaultGazetteer",
83                                     gazetteerFeatures);
84          
85          annieController.add(gazetteer);        
86  
87          // Load Grammar -- similar to gazetteer
88          FeatureMap grammarFeatures = Factory.newFeatureMap();
89          
90          try {
91              URL grammarURL =
92                  new URL("jar:file:" + filePath +
93                          "muse.jar!/muse/resources/grammar/main/main.jape");
94              grammarFeatures.put("grammarURL", grammarURL);
95          } catch(MalformedURLException e) {
96              e.printStackTrace();
97          }
98          
99          ProcessingResource grammar = (ProcessingResource)
100             Factory.createResource("gate.creole.ANNIETransducer",
101                                    grammarFeatures);
102         
103         annieController.add(grammar);
104 
105         // Load Ortho Matcher
106         ProcessingResource orthoMatcher = (ProcessingResource)
107             Factory.createResource("gate.creole.orthomatcher.OrthoMatcher",
108                                    Factory.newFeatureMap());
109         
110         annieController.add(orthoMatcher);
111 
112     } // initAnnie()
113     
114     /**
115      * This method should be called from a servlet or JSP.
116      * @param app The current servlet context, eg the JSP implicit variable "application"
117      * @param url The url of the file to be analysed
118      * @param annotations An array of annotations
119      */
120     public String process(ServletContext app, String url, String[] annotations)
121         throws GateException, IOException {
122 
123         if (app.getAttribute(GATE_INIT_KEY) == null) {
124             Gate.setLocalWebServer(false);
125             Gate.setNetConnected(false);
126 
127             System.setProperty("java.protocol.handler.pkgs",
128                                "gate.util.protocols");
129             
130             // Do the deed
131             Gate.init();
132 
133             app.setAttribute(GATE_INIT_KEY, "true");
134         }
135 
136         if (app.getAttribute(ANNIE_CONTROLLER_KEY) == null) {
137             // initialise ANNIE (this may take several minutes)
138 
139             filePath = app.getInitParameter("muse.path");
140             this.initAnnie();
141 
142             app.setAttribute(ANNIE_CONTROLLER_KEY, annieController);
143         }
144         else {
145             annieController = (SerialAnalyserController) 
146                 app.getAttribute(ANNIE_CONTROLLER_KEY);
147         }
148 
149         
150         // create a GATE corpus and add a document from the URL specified
151         Corpus corpus =
152             (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
153         URL u = new URL(url);
154         FeatureMap params = Factory.newFeatureMap();
155         params.put("sourceUrl", u);
156 
157         Document doc = (Document)
158             Factory.createResource("gate.corpora.DocumentImpl", params);
159         corpus.add(doc);
160             
161         
162         // tell the pipeline about the corpus and run it
163         annieController.setCorpus(corpus);
164         annieController.execute();
165         
166         // Get XML marked up document
167         AnnotationSet defaultAnnotSet = doc.getAnnotations();
168         Set annotTypesRequired = new HashSet();
169 
170         String output = null;
171         if (annotations != null) {
172             for (int i=0;i<annotations.length;i++) {
173                 annotTypesRequired.add(annotations[i]);
174             }
175             AnnotationSet selectedAnnotations =
176                 defaultAnnotSet.get(annotTypesRequired);
177             output = doc.toXml(selectedAnnotations, true);
178         }
179         else {
180             output = doc.toXml();
181         }
182         //delete the used resources
183         Factory.deleteResource(doc);
184         Factory.deleteResource(corpus);
185         return output;
186     } // process
187     
188 } // class WebAnnie
189