1   package gate.util.web;
2   
3   import java.util.*;
4   import java.io.*;
5   import java.net.*;
6   
7   import gate.*;
8   import gate.creole.*;
9   import gate.util.*;
10  import gate.gui.*;
11  
12  import javax.servlet.*;
13  
14  /**
15   * This class is designed to demonstrate ANNIE in a web context. It should be
16   * called from either a servlet or a JSP.
17   */
18  public class WebAnnie  {
19      
20      public static final String GATE_INIT_KEY = "gate.init";
21      public static final String ANNIE_CONTROLLER_KEY = "annie.controller";
22  
23      /** The Corpus Pipeline application to contain ANNIE */
24      private SerialAnalyserController annieController;
25      
26      private String filePath = "";
27  
28      /**
29       * Initialise the ANNIE system. This creates a "corpus pipeline"
30       * application that can be used to run sets of documents through
31       * the extraction system.
32       */
33      private void initAnnie() throws GateException {
34          
35          // create a serial analyser controller to run ANNIE with
36          annieController = (SerialAnalyserController)
37              Factory.createResource("gate.creole.SerialAnalyserController",
38                                     Factory.newFeatureMap(),
39                                     Factory.newFeatureMap(),
40                                     "ANNIE_" + Gate.genSym()
41                                     );
42          
43          // Load tokenizer
44          ProcessingResource tokeniser = (ProcessingResource)
45              Factory.createResource("gate.creole.tokeniser.DefaultTokeniser",
46                                     Factory.newFeatureMap());
47          
48          annieController.add(tokeniser);
49          
50          // Load sentence splitter
51          ProcessingResource split = (ProcessingResource)
52              Factory.createResource("gate.creole.splitter.SentenceSplitter",
53                                     Factory.newFeatureMap());
54          
55          annieController.add(split);
56          
57          // Load POS tagger
58          ProcessingResource postagger = (ProcessingResource)
59              Factory.createResource("gate.creole.POSTagger",
60                                     Factory.newFeatureMap());
61          
62          annieController.add(postagger);
63  
64  
65          // Load Gazetteer -- this is a two step process
66          FeatureMap gazetteerFeatures = Factory.newFeatureMap();
67          gazetteerFeatures.put("encoding","ISO-8859-1");
68  
69          // Step one: Locate the gazetteer file
70          try {
71              URL gazetteerURL =
72                  new URL("jar:file:" + filePath +
73                          "muse.jar!/muse/resources/gazetteer/lists.def");
74              gazetteerFeatures.put("listsURL", gazetteerURL);
75          } catch(MalformedURLException e) {
76              e.printStackTrace();
77          }
78          
79          // Step two: Load the gazetteer from the file
80          ProcessingResource gazetteer = (ProcessingResource)
81              Factory.createResource("gate.creole.gazetteer.DefaultGazetteer",
82                                     gazetteerFeatures);
83          
84          annieController.add(gazetteer);        
85  
86          // Load Grammar -- similar to gazetteer
87          FeatureMap grammarFeatures = Factory.newFeatureMap();
88          
89          try {
90              URL grammarURL =
91                  new URL("jar:file:" + filePath +
92                          "muse.jar!/muse/resources/grammar/main/main.jape");
93              grammarFeatures.put("grammarURL", grammarURL);
94          } catch(MalformedURLException e) {
95              e.printStackTrace();
96          }
97          
98          ProcessingResource grammar = (ProcessingResource)
99              Factory.createResource("gate.creole.ANNIETransducer",
100                                    grammarFeatures);
101         
102         annieController.add(grammar);
103 
104         // Load Ortho Matcher
105         ProcessingResource orthoMatcher = (ProcessingResource)
106             Factory.createResource("gate.creole.orthomatcher.OrthoMatcher",
107                                    Factory.newFeatureMap());
108         
109         annieController.add(orthoMatcher);
110 
111     } // initAnnie()
112     
113     /**
114      * This method should be called from a servlet or JSP.
115      * @param app The current servlet context, eg the JSP implicit variable "application"
116      * @param url The url of the file to be analysed
117      * @param annotations An array of annotations
118      */
119     public String process(ServletContext app, String url, String[] annotations)
120         throws GateException, IOException {
121 
122         if (app.getAttribute(GATE_INIT_KEY) == null) {
123             Gate.setLocalWebServer(false);
124             Gate.setNetConnected(false);
125 
126             System.setProperty("java.protocol.handler.pkgs",
127                                "gate.util.protocols");
128             
129             // Do the deed
130             Gate.init();
131 
132             app.setAttribute(GATE_INIT_KEY, "true");
133         }
134 
135         if (app.getAttribute(ANNIE_CONTROLLER_KEY) == null) {
136             // initialise ANNIE (this may take several minutes)
137 
138             filePath = app.getInitParameter("muse.path");
139             this.initAnnie();
140 
141             app.setAttribute(ANNIE_CONTROLLER_KEY, annieController);
142         }
143         else {
144             annieController = (SerialAnalyserController) 
145                 app.getAttribute(ANNIE_CONTROLLER_KEY);
146         }
147 
148         
149         // create a GATE corpus and add a document from the URL specified
150         Corpus corpus =
151             (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
152         URL u = new URL(url);
153         FeatureMap params = Factory.newFeatureMap();
154         params.put("sourceUrl", u);
155 
156         Document doc = (Document)
157             Factory.createResource("gate.corpora.DocumentImpl", params);
158         corpus.add(doc);
159             
160         
161         // tell the pipeline about the corpus and run it
162         annieController.setCorpus(corpus);
163         annieController.execute();
164         
165         // Get XML marked up document
166         AnnotationSet defaultAnnotSet = doc.getAnnotations();
167         Set annotTypesRequired = new HashSet();
168 
169         if (annotations != null) {
170             for (int i=0;i<annotations.length;i++) {
171                 annotTypesRequired.add(annotations[i]);
172             }
173             AnnotationSet selectedAnnotations =
174                 defaultAnnotSet.get(annotTypesRequired);
175             return doc.toXml(selectedAnnotations, true);
176         }
177         else {
178             return doc.toXml();
179         }
180      
181     } // process
182     
183 } // class WebAnnie
184