1   /*
2    *  Scratch.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 22/03/00
12   *
13   *  $Id: Scratch.java,v 1.60 2002/07/12 13:24:30 valyt Exp $
14   */
15  
16  
17  package gate.util;
18  
19  import java.util.*;
20  import java.net.*;
21  import java.io.*;
22  import java.util.zip.*;
23  
24  import gate.*;
25  import gate.creole.*;
26  import gate.creole.ir.*;
27  import gate.creole.tokeniser.*;
28  import gate.creole.gazetteer.*;
29  import gate.persist.*;
30  import gate.gui.*;
31  
32  import org.xml.sax.*;
33  import javax.xml.parsers.*;
34  import org.w3c.www.mime.*;
35  
36  /** A scratch pad for experimenting.
37    */
38  public class Scratch
39  {
40    /** Debug flag */
41    private static final boolean DEBUG = false;
42  
43    public static void main(String args[]) throws Exception {
44  //    Gate.init();
45  //
46  //    List classes = Tools.findSubclasses(gate.creole.ir.Search.class);
47  //    if(classes != null) for(int i = 0; i < classes.size(); i++){
48  //      Out.prln(classes.get(i).toString());
49  //    }
50  //    createIndex();
51  //    URL anURL = new URL("file:/z:/a/b/c/d.txt");
52  //    URL anotherURL = new URL("file:/z:/a/b/c/d.txt");
53  //    String relPath = gate.util.persistence.PersistenceManager.
54  //                     getRelativePath(anURL, anotherURL);
55  //    Out.prln("Context: " + anURL);
56  //    Out.prln("Target: " + anotherURL);
57  //    Out.prln("Relative path: " + relPath);
58  //    Out.prln("Result " + new URL(anURL, relPath));
59  //    javax.swing.text.FlowView fv;
60  //    javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName());
61  //    Map uidefaults  = (Map)javax.swing.UIManager.getDefaults();
62  //    List keys = new ArrayList(uidefaults.keySet());
63  //    Collections.sort(keys);
64  //    Iterator keyIter = keys.iterator();
65  //    while(keyIter.hasNext()){
66  //      Object key = keyIter.next();
67  //      System.out.println(key + " : " + uidefaults.get(key));
68  //    }
69  
70      // initialise the thing
71  //    Gate.setNetConnected(false);
72  //    Gate.setLocalWebServer(false);
73  //    Gate.init();
74  
75  //    Scratch oneOfMe = new Scratch();
76  //    try{
77  //      oneOfMe.runNerc();
78  //    } catch (Exception e) {
79  //      e.printStackTrace(Out.getPrintWriter());
80  //    }
81  
82  
83  //    CreoleRegister reg = Gate.getCreoleRegister();
84  //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema"));
85  //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema"));
86  
87  //System.out.println("VRs for " + reg.getAnnotationVRs("Tree"));
88  //System.out.println("VRs for " + reg.getAnnotationVRs());
89  
90  //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl"));
91  
92    } // main
93  
94    /** Example of using an exit-time hook. */
95    public static void exitTimeHook() {
96      Runtime.getRuntime().addShutdownHook(new Thread() {
97        public void run() {
98          System.out.println("shutting down");
99          System.out.flush();
100 
101         // create a File to store the state in
102         File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
103 
104         // dump the state into the new File
105         try {
106           ObjectOutputStream oos = new ObjectOutputStream(
107             new GZIPOutputStream(new FileOutputStream(stateFile))
108           );
109           System.out.println("writing main frame");
110           System.out.flush();
111           oos.writeObject(Main.getMainFrame());
112           oos.close();
113         } catch(Exception e) {
114           System.out.println("Couldn't write to state file: " + e);
115         }
116 
117         System.out.println("done");
118         System.out.flush();
119       }
120     });
121   } // exitTimeHook()
122 
123   /**
124    * ***** <B>Failed</B> *****
125    * attempt to serialise whole gui state - various swing components
126    * don't like to be serialised :-(. might be worth trying again when
127    * jdk1.4 arrives.
128    */
129   public static void dumpGuiState() {
130     System.out.println("dumping gui state...");
131     System.out.flush();
132 
133     // create a File to store the state in
134     File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
135 
136     // dump the state into the new File
137     try {
138       ObjectOutputStream oos = new ObjectOutputStream(
139         new GZIPOutputStream(new FileOutputStream(stateFile))
140       );
141       MainFrame mf = Main.getMainFrame();
142 
143       // wait for 1 sec
144       long startTime = System.currentTimeMillis();
145       long timeNow = System.currentTimeMillis();
146       while(timeNow - startTime < 3000){
147         try {
148           Thread.sleep(150);
149           timeNow = System.currentTimeMillis();
150         } catch(InterruptedException ie) {}
151       }
152 
153       System.out.println("writing main frame");
154       System.out.flush();
155       oos.writeObject(mf);
156       oos.close();
157     } catch(Exception e) {
158       System.out.println("Couldn't write to state file: " + e);
159     }
160 
161     System.out.println("...done gui dump");
162     System.out.flush();
163   } // dumpGuiState
164 
165   /**
166    * Run NERC and print out the various stages (doesn't actually
167    * use Nerc but the individual bits), and serialise then deserialise
168    * the NERC system.
169    */
170   public void runNerc() throws Exception {
171     long startTime = System.currentTimeMillis();
172 
173     Out.prln("gate init");
174     Gate.setLocalWebServer(false);
175     Gate.setNetConnected(false);
176     Gate.init();
177 
178     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
179     Out.prln("creating resources");
180 
181     // a controller
182     Controller c1 = (Controller) Factory.createResource(
183       "gate.creole.SerialController",
184       Factory.newFeatureMap()
185     );
186     c1.setName("Scratch controller");
187 
188     //get a document
189     FeatureMap params = Factory.newFeatureMap();
190     params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html"));
191     params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
192     Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl",
193                                                     params);
194 
195     //create a default tokeniser
196     params = Factory.newFeatureMap();
197     params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME,
198       "gate:/creole/tokeniser/DefaultTokeniser.rules");
199     params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8");
200     params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
201     ProcessingResource tokeniser = (ProcessingResource) Factory.createResource(
202       "gate.creole.tokeniser.DefaultTokeniser", params
203     );
204 
205     //create a default gazetteer
206     params = Factory.newFeatureMap();
207     params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc);
208     params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
209       "gate:/creole/gazeteer/default/lists.def");
210     ProcessingResource gaz = (ProcessingResource) Factory.createResource(
211       "gate.creole.gazetteer.DefaultGazetteer", params
212     );
213 
214     //create a default transducer
215     params = Factory.newFeatureMap();
216     params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc);
217     //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL());
218     ProcessingResource trans = (ProcessingResource) Factory.createResource(
219       "gate.creole.Transducer", params
220     );
221 
222     // get the controller to encapsulate the tok and gaz
223     c1.getPRs().add(tokeniser);
224     c1.getPRs().add(gaz);
225     c1.getPRs().add(trans);
226 
227     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
228     Out.prln("dumping state");
229 
230     // create a File to store the state in
231     File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr");
232 
233     // dump the state into the new File
234     try {
235       ObjectOutputStream oos = new ObjectOutputStream(
236         new GZIPOutputStream(new FileOutputStream(stateFile))
237       );
238       oos.writeObject(new SessionState());
239       oos.close();
240     } catch(IOException e) {
241       throw new GateException("Couldn't write to state file: " + e);
242     }
243 
244     Out.prln(System.getProperty("user.home"));
245 
246     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
247     Out.prln("reinstating");
248 
249     try {
250       FileInputStream fis = new FileInputStream(stateFile);
251       GZIPInputStream zis = new GZIPInputStream(fis);
252       ObjectInputStream ois = new ObjectInputStream(zis);
253       SessionState state = (SessionState) ois.readObject();
254       ois.close();
255     } catch(IOException e) {
256       throw
257         new GateException("Couldn't read file "+stateFile+": "+e);
258     } catch(ClassNotFoundException ee) {
259       throw
260         new GateException("Couldn't find class: "+ee);
261     }
262 
263     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
264     Out.prln("done");
265   } // runNerc()
266 
267 
268   /** Inner class for holding CR and DSR for serialisation experiments */
269   class SessionState implements Serializable {
270     SessionState() {
271       cr = Gate.getCreoleRegister();
272       dsr = Gate.getDataStoreRegister();
273     }
274 
275     CreoleRegister cr;
276 
277     DataStoreRegister dsr;
278 
279     // other state from Gate? and elsewhere?
280   } // SessionState
281 
282   /** Generate a random integer for file naming. */
283   protected static int random() {
284     return randomiser.nextInt(9999);
285   } // random
286 
287   /**
288    * Generates an index for a corpus in a datastore on Valy's computer in order
289    * to have some test data.
290    */
291   public static void createIndex() throws Exception{
292     String dsURLString = "file:///d:/temp/ds";
293     String indexLocation = "d:/temp/ds.idx";
294 
295     Gate.init();
296 
297     //open the datastore
298     SerialDataStore sds = (SerialDataStore)Factory.openDataStore(
299                             "gate.persist.SerialDataStore", dsURLString);
300     sds.open();
301     List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
302     IndexedCorpus corpus = (IndexedCorpus)
303                            sds.getLr("gate.corpora.SerialCorpusImpl",
304 
305                                      corporaIds.get(0));
306     DefaultIndexDefinition did = new DefaultIndexDefinition();
307     did.setIrEngineClassName(gate.creole.ir.lucene.
308                              LuceneIREngine.class.getName());
309 
310     did.setIndexLocation(indexLocation);
311     did.addIndexField(new IndexField("body", new ContentPropertyReader(), false));
312 
313     corpus.setIndexDefinition(did);
314 
315     Out.prln("removing old index");
316     corpus.getIndexManager().deleteIndex();
317     Out.prln("building new index");
318     corpus.getIndexManager().createIndex();
319     Out.prln("optimising new index");
320     corpus.getIndexManager().optimizeIndex();
321     Out.prln("saving corpus");
322     sds.sync(corpus);
323     Out.prln("done!");
324   }
325 
326   /**
327    *
328    * @param file a TXT file containing the text
329    */
330   public static void tokeniseFile(File file) throws Exception{
331     //initialise GATE (only call it once!!)
332     Gate.init();
333     //create the document
334     Document doc = Factory.newDocument(file.toURL());
335     //create the tokeniser
336     DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource(
337       "gate.creole.tokeniser.DefaultTokeniser");
338 
339     //tokenise the document
340     tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
341     tokeniser.execute();
342 
343     //extract data from document
344     //we need tokens and spaces
345     Set annotationTypes = new HashSet();
346     annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
347     annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE);
348 
349     List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes));
350     Collections.sort(tokenList, new OffsetComparator());
351 
352     //iterate through the tokens
353     Iterator tokIter = tokenList.iterator();
354     while(tokIter.hasNext()){
355       Annotation anAnnotation = (Annotation)tokIter.next();
356       System.out.println("Annotation: (" +
357                         anAnnotation.getStartNode().getOffset().toString() +
358                         ", " + anAnnotation.getEndNode().getOffset().toString() +
359                         "[type: " + anAnnotation.getType() +
360                          ", features: " + anAnnotation.getFeatures().toString()+
361                          "]" );
362     }
363   }
364 
365 
366   public static class ContentPropertyReader implements PropertyReader{
367     public String getPropertyValue(gate.Document doc){
368       return doc.getContent().toString();
369     }
370   }
371 
372   /** Random number generator */
373   protected static Random randomiser = new Random();
374 
375 } // class Scratch
376 
377