1   /*
2    *  Scratch.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 22/03/00
12   *
13   *  $Id: Scratch.java,v 1.68 2003/07/29 12:46:53 valyt Exp $
14   */
15  
16  
17  package gate.util;
18  
19  import java.util.*;
20  import java.net.*;
21  import java.io.*;
22  import java.util.zip.*;
23  
24  import gate.*;
25  import gate.creole.*;
26  import gate.creole.ir.*;
27  import gate.creole.tokeniser.*;
28  import gate.creole.gazetteer.*;
29  import gate.persist.*;
30  import gate.gui.*;
31  
32  import org.xml.sax.*;
33  import javax.xml.parsers.*;
34  import org.w3c.www.mime.*;
35  
36  /** A scratch pad for experimenting.
37    */
38  public class Scratch
39  {
40    /** Debug flag */
41    private static final boolean DEBUG = false;
42  
43    public static void main(String args[]) throws Exception {
44  
45      Gate.init();
46      Document doc = Factory.newDocument("The quick brown fox jumped over the lazy dog");
47      AnnotationSet annSet1 = doc.getAnnotations("Set1");
48      annSet1.add(new Long(1), new Long(5), "Foo", Factory.newFeatureMap());
49  
50      AnnotationSet annSet2 = doc.getAnnotations("Set2");
51      annSet2.add(new Long(1), new Long(5), "Bar", Factory.newFeatureMap());
52      annSet2.addAll(annSet1);
53  
54      List annotations = new ArrayList(annSet2);
55      Collections.sort(annotations, new OffsetComparator());
56      Iterator annIter = annotations.iterator();
57      while(annIter.hasNext()){
58        Annotation ann =(Annotation)annIter.next();
59        System.out.print("Start node: ID = " + ann.getStartNode().getId());
60        System.out.println(" Offset = " + ann.getStartNode().getOffset());
61        System.out.print("End node: ID = " + ann.getEndNode().getId());
62        System.out.println(" Offset = " + ann.getEndNode().getOffset());
63  
64      }
65  //    File tempFile = File.createTempFile("gaga", "");
66  //    tempFile.delete();
67  //    tempFile.mkdir();
68  //    tempFile.deleteOnExit();
69  //    File tempFile2 = File.createTempFile("fil", ".tmp", tempFile);
70  //    tempFile2.deleteOnExit();
71  //System.out.println(tempFile.getCanonicalPath());
72  //    Thread.sleep(100000);
73  //
74  //    Map charsets = java.nio.charset.Charset.availableCharsets();
75  //    Iterator namesIter = charsets.keySet().iterator();
76  //    while(namesIter.hasNext()){
77  //      String name = (String)namesIter.next();
78  //      System.out.println(name + " : " + charsets.get(name));
79  //    }
80  //    System.out.println(System.getProperty("file.encoding"));
81  //    System.out.println(java.nio.charset.Charset.forName(System.getProperty("file.encoding")).name());
82  //    System.out.println(new Character((char)0xa3));
83  //    Gate.init();
84  //
85  //    List classes = Tools.findSubclasses(gate.creole.ir.Search.class);
86  //    if(classes != null) for(int i = 0; i < classes.size(); i++){
87  //      Out.prln(classes.get(i).toString());
88  //    }
89  //    createIndex();
90  //    URL anURL = new URL("file:/z:/a/b/c/d.txt");
91  //    URL anotherURL = new URL("file:/z:/a/b/c/d.txt");
92  //    String relPath = gate.util.persistence.PersistenceManager.
93  //                     getRelativePath(anURL, anotherURL);
94  //    Out.prln("Context: " + anURL);
95  //    Out.prln("Target: " + anotherURL);
96  //    Out.prln("Relative path: " + relPath);
97  //    Out.prln("Result " + new URL(anURL, relPath));
98  //    javax.swing.text.FlowView fv;
99  //    javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName());
100 //    Map uidefaults  = (Map)javax.swing.UIManager.getDefaults();
101 //    List keys = new ArrayList(uidefaults.keySet());
102 //    Collections.sort(keys);
103 //    Iterator keyIter = keys.iterator();
104 //    while(keyIter.hasNext()){
105 //      Object key = keyIter.next();
106 //      System.out.println(key + " : " + uidefaults.get(key));
107 //    }
108 
109     // initialise the thing
110 //    Gate.setNetConnected(false);
111 //    Gate.setLocalWebServer(false);
112 //    Gate.init();
113 
114 //    Scratch oneOfMe = new Scratch();
115 //    try{
116 //      oneOfMe.runNerc();
117 //    } catch (Exception e) {
118 //      e.printStackTrace(Out.getPrintWriter());
119 //    }
120 
121 
122 //    CreoleRegister reg = Gate.getCreoleRegister();
123 //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema"));
124 //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema"));
125 
126 //System.out.println("VRs for " + reg.getAnnotationVRs("Tree"));
127 //System.out.println("VRs for " + reg.getAnnotationVRs());
128 
129 //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl"));
130 
131   } // main
132 
133   /** Example of using an exit-time hook. */
134   public static void exitTimeHook() {
135     Runtime.getRuntime().addShutdownHook(new Thread() {
136       public void run() {
137         System.out.println("shutting down");
138         System.out.flush();
139 
140         // create a File to store the state in
141         File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
142 
143         // dump the state into the new File
144         try {
145           ObjectOutputStream oos = new ObjectOutputStream(
146             new GZIPOutputStream(new FileOutputStream(stateFile))
147           );
148           System.out.println("writing main frame");
149           System.out.flush();
150           oos.writeObject(Main.getMainFrame());
151           oos.close();
152         } catch(Exception e) {
153           System.out.println("Couldn't write to state file: " + e);
154         }
155 
156         System.out.println("done");
157         System.out.flush();
158       }
159     });
160   } // exitTimeHook()
161 
162   /**
163    * ***** <B>Failed</B> *****
164    * attempt to serialise whole gui state - various swing components
165    * don't like to be serialised :-(. might be worth trying again when
166    * jdk1.4 arrives.
167    */
168   public static void dumpGuiState() {
169     System.out.println("dumping gui state...");
170     System.out.flush();
171 
172     // create a File to store the state in
173     File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
174 
175     // dump the state into the new File
176     try {
177       ObjectOutputStream oos = new ObjectOutputStream(
178         new GZIPOutputStream(new FileOutputStream(stateFile))
179       );
180       MainFrame mf = Main.getMainFrame();
181 
182       // wait for 1 sec
183       long startTime = System.currentTimeMillis();
184       long timeNow = System.currentTimeMillis();
185       while(timeNow - startTime < 3000){
186         try {
187           Thread.sleep(150);
188           timeNow = System.currentTimeMillis();
189         } catch(InterruptedException ie) {}
190       }
191 
192       System.out.println("writing main frame");
193       System.out.flush();
194       oos.writeObject(mf);
195       oos.close();
196     } catch(Exception e) {
197       System.out.println("Couldn't write to state file: " + e);
198     }
199 
200     System.out.println("...done gui dump");
201     System.out.flush();
202   } // dumpGuiState
203 
204   /**
205    * Run NERC and print out the various stages (doesn't actually
206    * use Nerc but the individual bits), and serialise then deserialise
207    * the NERC system.
208    */
209   public void runNerc() throws Exception {
210     long startTime = System.currentTimeMillis();
211 
212     Out.prln("gate init");
213     Gate.setLocalWebServer(false);
214     Gate.setNetConnected(false);
215     Gate.init();
216 
217     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
218     Out.prln("creating resources");
219 
220     // a controller
221     Controller c1 = (Controller) Factory.createResource(
222       "gate.creole.SerialController",
223       Factory.newFeatureMap()
224     );
225     c1.setName("Scratch controller");
226 
227     //get a document
228     FeatureMap params = Factory.newFeatureMap();
229     params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html"));
230     params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
231     Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl",
232                                                     params);
233 
234     //create a default tokeniser
235     params = Factory.newFeatureMap();
236     params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME,
237       "gate:/creole/tokeniser/DefaultTokeniser.rules");
238     params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8");
239     params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
240     ProcessingResource tokeniser = (ProcessingResource) Factory.createResource(
241       "gate.creole.tokeniser.DefaultTokeniser", params
242     );
243 
244     //create a default gazetteer
245     params = Factory.newFeatureMap();
246     params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc);
247     params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
248       "gate:/creole/gazeteer/default/lists.def");
249     ProcessingResource gaz = (ProcessingResource) Factory.createResource(
250       "gate.creole.gazetteer.DefaultGazetteer", params
251     );
252 
253     //create a default transducer
254     params = Factory.newFeatureMap();
255     params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc);
256     //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL());
257     ProcessingResource trans = (ProcessingResource) Factory.createResource(
258       "gate.creole.Transducer", params
259     );
260 
261     // get the controller to encapsulate the tok and gaz
262     c1.getPRs().add(tokeniser);
263     c1.getPRs().add(gaz);
264     c1.getPRs().add(trans);
265 
266     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
267     Out.prln("dumping state");
268 
269     // create a File to store the state in
270     File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr");
271 
272     // dump the state into the new File
273     try {
274       ObjectOutputStream oos = new ObjectOutputStream(
275         new GZIPOutputStream(new FileOutputStream(stateFile))
276       );
277       oos.writeObject(new SessionState());
278       oos.close();
279     } catch(IOException e) {
280       throw new GateException("Couldn't write to state file: " + e);
281     }
282 
283     Out.prln(System.getProperty("user.home"));
284 
285     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
286     Out.prln("reinstating");
287 
288     try {
289       FileInputStream fis = new FileInputStream(stateFile);
290       GZIPInputStream zis = new GZIPInputStream(fis);
291       ObjectInputStream ois = new ObjectInputStream(zis);
292       SessionState state = (SessionState) ois.readObject();
293       ois.close();
294     } catch(IOException e) {
295       throw
296         new GateException("Couldn't read file "+stateFile+": "+e);
297     } catch(ClassNotFoundException ee) {
298       throw
299         new GateException("Couldn't find class: "+ee);
300     }
301 
302     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
303     Out.prln("done");
304   } // runNerc()
305 
306 
307   /** Inner class for holding CR and DSR for serialisation experiments */
308   class SessionState implements Serializable {
309     SessionState() {
310       cr = Gate.getCreoleRegister();
311       dsr = Gate.getDataStoreRegister();
312     }
313 
314     CreoleRegister cr;
315 
316     DataStoreRegister dsr;
317 
318     // other state from Gate? and elsewhere?
319   } // SessionState
320 
321   /** Generate a random integer for file naming. */
322   protected static int random() {
323     return randomiser.nextInt(9999);
324   } // random
325 
326   /**
327    * Generates an index for a corpus in a datastore on Valy's computer in order
328    * to have some test data.
329    */
330   public static void createIndex() throws Exception{
331     String dsURLString = "file:///d:/temp/ds";
332     String indexLocation = "d:/temp/ds.idx";
333 
334     Gate.init();
335 
336     //open the datastore
337     SerialDataStore sds = (SerialDataStore)Factory.openDataStore(
338                             "gate.persist.SerialDataStore", dsURLString);
339     sds.open();
340     List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
341     IndexedCorpus corpus = (IndexedCorpus)
342                            sds.getLr("gate.corpora.SerialCorpusImpl",
343 
344                                      corporaIds.get(0));
345     DefaultIndexDefinition did = new DefaultIndexDefinition();
346     did.setIrEngineClassName(gate.creole.ir.lucene.
347                              LuceneIREngine.class.getName());
348 
349     did.setIndexLocation(indexLocation);
350     did.addIndexField(new IndexField("body", new ContentPropertyReader(), false));
351 
352     corpus.setIndexDefinition(did);
353 
354     Out.prln("removing old index");
355     corpus.getIndexManager().deleteIndex();
356     Out.prln("building new index");
357     corpus.getIndexManager().createIndex();
358     Out.prln("optimising new index");
359     corpus.getIndexManager().optimizeIndex();
360     Out.prln("saving corpus");
361     sds.sync(corpus);
362     Out.prln("done!");
363   }
364 
365   /**
366    *
367    * @param file a TXT file containing the text
368    */
369   public static void tokeniseFile(File file) throws Exception{
370     //initialise GATE (only call it once!!)
371     Gate.init();
372     //create the document
373     Document doc = Factory.newDocument(file.toURL());
374     //create the tokeniser
375     DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource(
376       "gate.creole.tokeniser.DefaultTokeniser");
377 
378     //tokenise the document
379     tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
380     tokeniser.execute();
381 
382     //extract data from document
383     //we need tokens and spaces
384     Set annotationTypes = new HashSet();
385     annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
386     annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE);
387 
388     List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes));
389     Collections.sort(tokenList, new OffsetComparator());
390 
391     //iterate through the tokens
392     Iterator tokIter = tokenList.iterator();
393     while(tokIter.hasNext()){
394       Annotation anAnnotation = (Annotation)tokIter.next();
395       System.out.println("Annotation: (" +
396                         anAnnotation.getStartNode().getOffset().toString() +
397                         ", " + anAnnotation.getEndNode().getOffset().toString() +
398                         "[type: " + anAnnotation.getType() +
399                          ", features: " + anAnnotation.getFeatures().toString()+
400                          "]" );
401     }
402   }
403 
404 
405   public static class ContentPropertyReader implements PropertyReader{
406     public String getPropertyValue(gate.Document doc){
407       return doc.getContent().toString();
408     }
409   }
410 
411   /** Random number generator */
412   protected static Random randomiser = new Random();
413 
414 } // class Scratch
415 
416 
417