1   /*
2    *  Scratch.java
3    *
4    *  Copyright (c) 1998-2004, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 22/03/00
12   *
13   *  $Id: Scratch.java,v 1.78 2004/07/23 16:23:24 valyt Exp $
14   */
15  
16  
17  package gate.util;
18  
19  import java.awt.Color;
20  import java.io.*;
21  import java.util.*;
22  import java.util.prefs.Preferences;
23  import java.util.zip.GZIPInputStream;
24  import java.util.zip.GZIPOutputStream;
25  
26  import javax.swing.UIManager;
27  
28  import gate.*;
29  import gate.creole.ANNIEConstants;
30  import gate.creole.Transducer;
31  import gate.creole.gazetteer.DefaultGazetteer;
32  import gate.creole.ir.*;
33  import gate.creole.tokeniser.DefaultTokeniser;
34  import gate.gui.MainFrame;
35  import gate.gui.docview.AnnotationSetsView;
36  import gate.persist.SerialDataStore;
37  
38  /** A scratch pad for experimenting.
39    */
40  public class Scratch
41  {
42    /** Debug flag */
43    private static final boolean DEBUG = false;
44  
45    public static void main(String args[]) throws Exception {
46      Map listsMap = new HashMap();
47      listsMap.put("blah", new ArrayList());
48      List theList = (List)listsMap.get("blah");
49      System.out.println(theList);
50      theList.add("object");
51      theList = (List)listsMap.get("blah");
52      System.out.println(theList);
53      
54      
55      
56      File home = new File("z:/gate/plugins");
57      File tok = new File(home, "ANNIE/resources/tokeniser/Default.rul");
58      System.out.println(tok);
59      
60      Preferences prefRoot = Preferences.userNodeForPackage(AnnotationSetsView.class);
61      System.out.println(prefRoot.keys().length);
62      prefRoot.removeNode();
63      prefRoot = Preferences.userNodeForPackage(AnnotationSetsView.class);
64      System.out.println(prefRoot.keys().length);
65      Color col = new Color(100, 101, 102, 103);
66      int rgb = col.getRGB();
67      int alpha = col.getAlpha();
68      int rgba = rgb | (alpha << 24);
69      Color col1 = new Color(rgba, true);
70      System.out.println(col + " a: " + col.getAlpha());
71      System.out.println(col1+ " a: " + col1.getAlpha());
72      System.out.println(col.equals(col1));
73  //    Map defaultsMap = UIManager.getLookAndFeelDefaults();
74  //    System.out.println(defaultsMap.keySet());
75      
76      
77  //    double a = 16.99;
78  //    double b = 9.99;
79  //    double c = a - b;
80  //    System.out.println(c);
81  
82  //    Runtime.getRuntime().exec(new String[]{"cmd",
83  //                                           "C:\\Program Files\\GATE 2.2\\bin\\gate.bat"},
84  //                              null,
85  //                              new File("C:\\Program Files\\GATE 2.2\\bin"));
86  
87  //    Gate.init();
88  //    Document doc = Factory.newDocument("The quick brown fox jumped over the lazy dog");
89  //    AnnotationSet annSet1 = doc.getAnnotations("Set1");
90  //    annSet1.add(new Long(1), new Long(5), "Foo", Factory.newFeatureMap());
91  //
92  //    AnnotationSet annSet2 = doc.getAnnotations("Set2");
93  //    annSet2.add(new Long(1), new Long(5), "Bar", Factory.newFeatureMap());
94  //    annSet2.addAll(annSet1);
95  //
96  //    List annotations = new ArrayList(annSet2);
97  //    Collections.sort(annotations, new OffsetComparator());
98  //    Iterator annIter = annotations.iterator();
99  //    while(annIter.hasNext()){
100 //      Annotation ann =(Annotation)annIter.next();
101 //      System.out.print("Start node: ID = " + ann.getStartNode().getId());
102 //      System.out.println(" Offset = " + ann.getStartNode().getOffset());
103 //      System.out.print("End node: ID = " + ann.getEndNode().getId());
104 //      System.out.println(" Offset = " + ann.getEndNode().getOffset());
105 //
106 //    }
107 //    File tempFile = File.createTempFile("gaga", "");
108 //    tempFile.delete();
109 //    tempFile.mkdir();
110 //    tempFile.deleteOnExit();
111 //    File tempFile2 = File.createTempFile("fil", ".tmp", tempFile);
112 //    tempFile2.deleteOnExit();
113 //System.out.println(tempFile.getCanonicalPath());
114 //    Thread.sleep(100000);
115 //
116 //    Map charsets = java.nio.charset.Charset.availableCharsets();
117 //    Iterator namesIter = charsets.keySet().iterator();
118 //    while(namesIter.hasNext()){
119 //      String name = (String)namesIter.next();
120 //      System.out.println(name + " : " + charsets.get(name));
121 //    }
122 //    System.out.println(System.getProperty("file.encoding"));
123 //    System.out.println(java.nio.charset.Charset.forName(System.getProperty("file.encoding")).name());
124 //    System.out.println(new Character((char)0xa3));
125 //    Gate.init();
126 //
127 //    List classes = Tools.findSubclasses(gate.creole.ir.Search.class);
128 //    if(classes != null) for(int i = 0; i < classes.size(); i++){
129 //      Out.prln(classes.get(i).toString());
130 //    }
131 //    createIndex();
132 //    URL anURL = new URL("file:/z:/a/b/c/d.txt");
133 //    URL anotherURL = new URL("file:/z:/a/b/c/d.txt");
134 //    String relPath = gate.util.persistence.PersistenceManager.
135 //                     getRelativePath(anURL, anotherURL);
136 //    Out.prln("Context: " + anURL);
137 //    Out.prln("Target: " + anotherURL);
138 //    Out.prln("Relative path: " + relPath);
139 //    Out.prln("Result " + new URL(anURL, relPath));
140 //    javax.swing.text.FlowView fv;
141 //    javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName());
142 //    Map uidefaults  = (Map)javax.swing.UIManager.getDefaults();
143 //    List keys = new ArrayList(uidefaults.keySet());
144 //    Collections.sort(keys);
145 //    Iterator keyIter = keys.iterator();
146 //    while(keyIter.hasNext()){
147 //      Object key = keyIter.next();
148 //      System.out.println(key + " : " + uidefaults.get(key));
149 //    }
150 
151     // initialise the thing
152 //    Gate.setNetConnected(false);
153 //    Gate.setLocalWebServer(false);
154 //    Gate.init();
155 
156 //    Scratch oneOfMe = new Scratch();
157 //    try{
158 //      oneOfMe.runNerc();
159 //    } catch (Exception e) {
160 //      e.printStackTrace(Out.getPrintWriter());
161 //    }
162 
163 
164 //    CreoleRegister reg = Gate.getCreoleRegister();
165 //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema"));
166 //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema"));
167 
168 //System.out.println("VRs for " + reg.getAnnotationVRs("Tree"));
169 //System.out.println("VRs for " + reg.getAnnotationVRs());
170 
171 //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl"));
172 
173   } // main
174 
175   /** Example of using an exit-time hook. */
176   public static void exitTimeHook() {
177     Runtime.getRuntime().addShutdownHook(new Thread() {
178       public void run() {
179         System.out.println("shutting down");
180         System.out.flush();
181 
182         // create a File to store the state in
183         File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
184 
185         // dump the state into the new File
186         try {
187           ObjectOutputStream oos = new ObjectOutputStream(
188             new GZIPOutputStream(new FileOutputStream(stateFile))
189           );
190           System.out.println("writing main frame");
191           System.out.flush();
192           oos.writeObject(Main.getMainFrame());
193           oos.close();
194         } catch(Exception e) {
195           System.out.println("Couldn't write to state file: " + e);
196         }
197 
198         System.out.println("done");
199         System.out.flush();
200       }
201     });
202   } // exitTimeHook()
203 
204   /**
205    * ***** <B>Failed</B> *****
206    * attempt to serialise whole gui state - various swing components
207    * don't like to be serialised :-(. might be worth trying again when
208    * jdk1.4 arrives.
209    */
210   public static void dumpGuiState() {
211     System.out.println("dumping gui state...");
212     System.out.flush();
213 
214     // create a File to store the state in
215     File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
216 
217     // dump the state into the new File
218     try {
219       ObjectOutputStream oos = new ObjectOutputStream(
220         new GZIPOutputStream(new FileOutputStream(stateFile))
221       );
222       MainFrame mf = Main.getMainFrame();
223 
224       // wait for 1 sec
225       long startTime = System.currentTimeMillis();
226       long timeNow = System.currentTimeMillis();
227       while(timeNow - startTime < 3000){
228         try {
229           Thread.sleep(150);
230           timeNow = System.currentTimeMillis();
231         } catch(InterruptedException ie) {}
232       }
233 
234       System.out.println("writing main frame");
235       System.out.flush();
236       oos.writeObject(mf);
237       oos.close();
238     } catch(Exception e) {
239       System.out.println("Couldn't write to state file: " + e);
240     }
241 
242     System.out.println("...done gui dump");
243     System.out.flush();
244   } // dumpGuiState
245 
246   /**
247    * Run NERC and print out the various stages (doesn't actually
248    * use Nerc but the individual bits), and serialise then deserialise
249    * the NERC system.
250    */
251   public void runNerc() throws Exception {
252     long startTime = System.currentTimeMillis();
253 
254     Out.prln("gate init");
255     Gate.setLocalWebServer(false);
256     Gate.setNetConnected(false);
257     Gate.init();
258 
259     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
260     Out.prln("creating resources");
261 
262     // a controller
263     Controller c1 = (Controller) Factory.createResource(
264       "gate.creole.SerialController",
265       Factory.newFeatureMap()
266     );
267     c1.setName("Scratch controller");
268 
269     //get a document
270     FeatureMap params = Factory.newFeatureMap();
271     params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html"));
272     params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
273     Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl",
274                                                     params);
275 
276     //create a default tokeniser
277     params = Factory.newFeatureMap();
278     params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME,
279       "gate:/creole/tokeniser/DefaultTokeniser.rules");
280     params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8");
281     params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
282     ProcessingResource tokeniser = (ProcessingResource) Factory.createResource(
283       "gate.creole.tokeniser.DefaultTokeniser", params
284     );
285 
286     //create a default gazetteer
287     params = Factory.newFeatureMap();
288     params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc);
289     params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
290       "gate:/creole/gazeteer/default/lists.def");
291     ProcessingResource gaz = (ProcessingResource) Factory.createResource(
292       "gate.creole.gazetteer.DefaultGazetteer", params
293     );
294 
295     //create a default transducer
296     params = Factory.newFeatureMap();
297     params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc);
298     //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL());
299     ProcessingResource trans = (ProcessingResource) Factory.createResource(
300       "gate.creole.Transducer", params
301     );
302 
303     // get the controller to encapsulate the tok and gaz
304     c1.getPRs().add(tokeniser);
305     c1.getPRs().add(gaz);
306     c1.getPRs().add(trans);
307 
308     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
309     Out.prln("dumping state");
310 
311     // create a File to store the state in
312     File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr");
313 
314     // dump the state into the new File
315     try {
316       ObjectOutputStream oos = new ObjectOutputStream(
317         new GZIPOutputStream(new FileOutputStream(stateFile))
318       );
319       oos.writeObject(new SessionState());
320       oos.close();
321     } catch(IOException e) {
322       throw new GateException("Couldn't write to state file: " + e);
323     }
324 
325     Out.prln(System.getProperty("user.home"));
326 
327     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
328     Out.prln("reinstating");
329 
330     try {
331       FileInputStream fis = new FileInputStream(stateFile);
332       GZIPInputStream zis = new GZIPInputStream(fis);
333       ObjectInputStream ois = new ObjectInputStream(zis);
334       SessionState state = (SessionState) ois.readObject();
335       ois.close();
336     } catch(IOException e) {
337       throw
338         new GateException("Couldn't read file "+stateFile+": "+e);
339     } catch(ClassNotFoundException ee) {
340       throw
341         new GateException("Couldn't find class: "+ee);
342     }
343 
344     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
345     Out.prln("done");
346   } // runNerc()
347 
348 
349   /** Inner class for holding CR and DSR for serialisation experiments */
350   class SessionState implements Serializable {
351     SessionState() {
352       cr = Gate.getCreoleRegister();
353       dsr = Gate.getDataStoreRegister();
354     }
355 
356     CreoleRegister cr;
357 
358     DataStoreRegister dsr;
359 
360     // other state from Gate? and elsewhere?
361   } // SessionState
362 
363   /** Generate a random integer for file naming. */
364   protected static int random() {
365     return randomiser.nextInt(9999);
366   } // random
367 
368   /**
369    * Generates an index for a corpus in a datastore on Valy's computer in order
370    * to have some test data.
371    */
372   public static void createIndex() throws Exception{
373     String dsURLString = "file:///d:/temp/ds";
374     String indexLocation = "d:/temp/ds.idx";
375 
376     Gate.init();
377 
378     //open the datastore
379     SerialDataStore sds = (SerialDataStore)Factory.openDataStore(
380                             "gate.persist.SerialDataStore", dsURLString);
381     sds.open();
382     List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
383     IndexedCorpus corpus = (IndexedCorpus)
384                            sds.getLr("gate.corpora.SerialCorpusImpl",
385 
386                                      corporaIds.get(0));
387     DefaultIndexDefinition did = new DefaultIndexDefinition();
388     did.setIrEngineClassName(gate.creole.ir.lucene.
389                              LuceneIREngine.class.getName());
390 
391     did.setIndexLocation(indexLocation);
392     did.addIndexField(new IndexField("body", new ContentPropertyReader(), false));
393 
394     corpus.setIndexDefinition(did);
395 
396     Out.prln("removing old index");
397     corpus.getIndexManager().deleteIndex();
398     Out.prln("building new index");
399     corpus.getIndexManager().createIndex();
400     Out.prln("optimising new index");
401     corpus.getIndexManager().optimizeIndex();
402     Out.prln("saving corpus");
403     sds.sync(corpus);
404     Out.prln("done!");
405   }
406 
407   /**
408    *
409    * @param file a TXT file containing the text
410    */
411   public static void tokeniseFile(File file) throws Exception{
412     //initialise GATE (only call it once!!)
413     Gate.init();
414     //create the document
415     Document doc = Factory.newDocument(file.toURL());
416     //create the tokeniser
417     DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource(
418       "gate.creole.tokeniser.DefaultTokeniser");
419 
420     //tokenise the document
421     tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
422     tokeniser.execute();
423 
424     //extract data from document
425     //we need tokens and spaces
426     Set annotationTypes = new HashSet();
427     annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
428     annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE);
429 
430     List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes));
431     Collections.sort(tokenList, new OffsetComparator());
432 
433     //iterate through the tokens
434     Iterator tokIter = tokenList.iterator();
435     while(tokIter.hasNext()){
436       Annotation anAnnotation = (Annotation)tokIter.next();
437       System.out.println("Annotation: (" +
438                         anAnnotation.getStartNode().getOffset().toString() +
439                         ", " + anAnnotation.getEndNode().getOffset().toString() +
440                         "[type: " + anAnnotation.getType() +
441                          ", features: " + anAnnotation.getFeatures().toString()+
442                          "]" );
443     }
444   }
445 
446 
447   public static class ContentPropertyReader implements PropertyReader{
448     public String getPropertyValue(gate.Document doc){
449       return doc.getContent().toString();
450     }
451   }
452 
453   /** Random number generator */
454   protected static Random randomiser = new Random();
455 
456 } // class Scratch
457 
458 
459