|
Scratch |
|
1 /* 2 * Scratch.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 22/03/00 12 * 13 * $Id: Scratch.java,v 1.68 2003/07/29 12:46:53 valyt Exp $ 14 */ 15 16 17 package gate.util; 18 19 import java.util.*; 20 import java.net.*; 21 import java.io.*; 22 import java.util.zip.*; 23 24 import gate.*; 25 import gate.creole.*; 26 import gate.creole.ir.*; 27 import gate.creole.tokeniser.*; 28 import gate.creole.gazetteer.*; 29 import gate.persist.*; 30 import gate.gui.*; 31 32 import org.xml.sax.*; 33 import javax.xml.parsers.*; 34 import org.w3c.www.mime.*; 35 36 /** A scratch pad for experimenting. 37 */ 38 public class Scratch 39 { 40 /** Debug flag */ 41 private static final boolean DEBUG = false; 42 43 public static void main(String args[]) throws Exception { 44 45 Gate.init(); 46 Document doc = Factory.newDocument("The quick brown fox jumped over the lazy dog"); 47 AnnotationSet annSet1 = doc.getAnnotations("Set1"); 48 annSet1.add(new Long(1), new Long(5), "Foo", Factory.newFeatureMap()); 49 50 AnnotationSet annSet2 = doc.getAnnotations("Set2"); 51 annSet2.add(new Long(1), new Long(5), "Bar", Factory.newFeatureMap()); 52 annSet2.addAll(annSet1); 53 54 List annotations = new ArrayList(annSet2); 55 Collections.sort(annotations, new OffsetComparator()); 56 Iterator annIter = annotations.iterator(); 57 while(annIter.hasNext()){ 58 Annotation ann =(Annotation)annIter.next(); 59 System.out.print("Start node: ID = " + ann.getStartNode().getId()); 60 System.out.println(" Offset = " + ann.getStartNode().getOffset()); 61 System.out.print("End node: ID = " + ann.getEndNode().getId()); 62 System.out.println(" Offset = " + ann.getEndNode().getOffset()); 63 64 } 65 // File tempFile = File.createTempFile("gaga", ""); 66 // tempFile.delete(); 67 // tempFile.mkdir(); 68 // tempFile.deleteOnExit(); 69 // File tempFile2 = File.createTempFile("fil", ".tmp", tempFile); 70 // tempFile2.deleteOnExit(); 71 //System.out.println(tempFile.getCanonicalPath()); 72 // Thread.sleep(100000); 73 // 74 // Map charsets = java.nio.charset.Charset.availableCharsets(); 75 // Iterator namesIter = charsets.keySet().iterator(); 76 // while(namesIter.hasNext()){ 77 // String name = (String)namesIter.next(); 78 // System.out.println(name + " : " + charsets.get(name)); 79 // } 80 // System.out.println(System.getProperty("file.encoding")); 81 // System.out.println(java.nio.charset.Charset.forName(System.getProperty("file.encoding")).name()); 82 // System.out.println(new Character((char)0xa3)); 83 // Gate.init(); 84 // 85 // List classes = Tools.findSubclasses(gate.creole.ir.Search.class); 86 // if(classes != null) for(int i = 0; i < classes.size(); i++){ 87 // Out.prln(classes.get(i).toString()); 88 // } 89 // createIndex(); 90 // URL anURL = new URL("file:/z:/a/b/c/d.txt"); 91 // URL anotherURL = new URL("file:/z:/a/b/c/d.txt"); 92 // String relPath = gate.util.persistence.PersistenceManager. 93 // getRelativePath(anURL, anotherURL); 94 // Out.prln("Context: " + anURL); 95 // Out.prln("Target: " + anotherURL); 96 // Out.prln("Relative path: " + relPath); 97 // Out.prln("Result " + new URL(anURL, relPath)); 98 // javax.swing.text.FlowView fv; 99 // javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName()); 100 // Map uidefaults = (Map)javax.swing.UIManager.getDefaults(); 101 // List keys = new ArrayList(uidefaults.keySet()); 102 // Collections.sort(keys); 103 // Iterator keyIter = keys.iterator(); 104 // while(keyIter.hasNext()){ 105 // Object key = keyIter.next(); 106 // System.out.println(key + " : " + uidefaults.get(key)); 107 // } 108 109 // initialise the thing 110 // Gate.setNetConnected(false); 111 // Gate.setLocalWebServer(false); 112 // Gate.init(); 113 114 // Scratch oneOfMe = new Scratch(); 115 // try{ 116 // oneOfMe.runNerc(); 117 // } catch (Exception e) { 118 // e.printStackTrace(Out.getPrintWriter()); 119 // } 120 121 122 // CreoleRegister reg = Gate.getCreoleRegister(); 123 //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema")); 124 //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema")); 125 126 //System.out.println("VRs for " + reg.getAnnotationVRs("Tree")); 127 //System.out.println("VRs for " + reg.getAnnotationVRs()); 128 129 //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl")); 130 131 } // main 132 133 /** Example of using an exit-time hook. */ 134 public static void exitTimeHook() { 135 Runtime.getRuntime().addShutdownHook(new Thread() { 136 public void run() { 137 System.out.println("shutting down"); 138 System.out.flush(); 139 140 // create a File to store the state in 141 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr"); 142 143 // dump the state into the new File 144 try { 145 ObjectOutputStream oos = new ObjectOutputStream( 146 new GZIPOutputStream(new FileOutputStream(stateFile)) 147 ); 148 System.out.println("writing main frame"); 149 System.out.flush(); 150 oos.writeObject(Main.getMainFrame()); 151 oos.close(); 152 } catch(Exception e) { 153 System.out.println("Couldn't write to state file: " + e); 154 } 155 156 System.out.println("done"); 157 System.out.flush(); 158 } 159 }); 160 } // exitTimeHook() 161 162 /** 163 * ***** <B>Failed</B> ***** 164 * attempt to serialise whole gui state - various swing components 165 * don't like to be serialised :-(. might be worth trying again when 166 * jdk1.4 arrives. 167 */ 168 public static void dumpGuiState() { 169 System.out.println("dumping gui state..."); 170 System.out.flush(); 171 172 // create a File to store the state in 173 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr"); 174 175 // dump the state into the new File 176 try { 177 ObjectOutputStream oos = new ObjectOutputStream( 178 new GZIPOutputStream(new FileOutputStream(stateFile)) 179 ); 180 MainFrame mf = Main.getMainFrame(); 181 182 // wait for 1 sec 183 long startTime = System.currentTimeMillis(); 184 long timeNow = System.currentTimeMillis(); 185 while(timeNow - startTime < 3000){ 186 try { 187 Thread.sleep(150); 188 timeNow = System.currentTimeMillis(); 189 } catch(InterruptedException ie) {} 190 } 191 192 System.out.println("writing main frame"); 193 System.out.flush(); 194 oos.writeObject(mf); 195 oos.close(); 196 } catch(Exception e) { 197 System.out.println("Couldn't write to state file: " + e); 198 } 199 200 System.out.println("...done gui dump"); 201 System.out.flush(); 202 } // dumpGuiState 203 204 /** 205 * Run NERC and print out the various stages (doesn't actually 206 * use Nerc but the individual bits), and serialise then deserialise 207 * the NERC system. 208 */ 209 public void runNerc() throws Exception { 210 long startTime = System.currentTimeMillis(); 211 212 Out.prln("gate init"); 213 Gate.setLocalWebServer(false); 214 Gate.setNetConnected(false); 215 Gate.init(); 216 217 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 218 Out.prln("creating resources"); 219 220 // a controller 221 Controller c1 = (Controller) Factory.createResource( 222 "gate.creole.SerialController", 223 Factory.newFeatureMap() 224 ); 225 c1.setName("Scratch controller"); 226 227 //get a document 228 FeatureMap params = Factory.newFeatureMap(); 229 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html")); 230 params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false"); 231 Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl", 232 params); 233 234 //create a default tokeniser 235 params = Factory.newFeatureMap(); 236 params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME, 237 "gate:/creole/tokeniser/DefaultTokeniser.rules"); 238 params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8"); 239 params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc); 240 ProcessingResource tokeniser = (ProcessingResource) Factory.createResource( 241 "gate.creole.tokeniser.DefaultTokeniser", params 242 ); 243 244 //create a default gazetteer 245 params = Factory.newFeatureMap(); 246 params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc); 247 params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME, 248 "gate:/creole/gazeteer/default/lists.def"); 249 ProcessingResource gaz = (ProcessingResource) Factory.createResource( 250 "gate.creole.gazetteer.DefaultGazetteer", params 251 ); 252 253 //create a default transducer 254 params = Factory.newFeatureMap(); 255 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc); 256 //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL()); 257 ProcessingResource trans = (ProcessingResource) Factory.createResource( 258 "gate.creole.Transducer", params 259 ); 260 261 // get the controller to encapsulate the tok and gaz 262 c1.getPRs().add(tokeniser); 263 c1.getPRs().add(gaz); 264 c1.getPRs().add(trans); 265 266 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 267 Out.prln("dumping state"); 268 269 // create a File to store the state in 270 File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr"); 271 272 // dump the state into the new File 273 try { 274 ObjectOutputStream oos = new ObjectOutputStream( 275 new GZIPOutputStream(new FileOutputStream(stateFile)) 276 ); 277 oos.writeObject(new SessionState()); 278 oos.close(); 279 } catch(IOException e) { 280 throw new GateException("Couldn't write to state file: " + e); 281 } 282 283 Out.prln(System.getProperty("user.home")); 284 285 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 286 Out.prln("reinstating"); 287 288 try { 289 FileInputStream fis = new FileInputStream(stateFile); 290 GZIPInputStream zis = new GZIPInputStream(fis); 291 ObjectInputStream ois = new ObjectInputStream(zis); 292 SessionState state = (SessionState) ois.readObject(); 293 ois.close(); 294 } catch(IOException e) { 295 throw 296 new GateException("Couldn't read file "+stateFile+": "+e); 297 } catch(ClassNotFoundException ee) { 298 throw 299 new GateException("Couldn't find class: "+ee); 300 } 301 302 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 303 Out.prln("done"); 304 } // runNerc() 305 306 307 /** Inner class for holding CR and DSR for serialisation experiments */ 308 class SessionState implements Serializable { 309 SessionState() { 310 cr = Gate.getCreoleRegister(); 311 dsr = Gate.getDataStoreRegister(); 312 } 313 314 CreoleRegister cr; 315 316 DataStoreRegister dsr; 317 318 // other state from Gate? and elsewhere? 319 } // SessionState 320 321 /** Generate a random integer for file naming. */ 322 protected static int random() { 323 return randomiser.nextInt(9999); 324 } // random 325 326 /** 327 * Generates an index for a corpus in a datastore on Valy's computer in order 328 * to have some test data. 329 */ 330 public static void createIndex() throws Exception{ 331 String dsURLString = "file:///d:/temp/ds"; 332 String indexLocation = "d:/temp/ds.idx"; 333 334 Gate.init(); 335 336 //open the datastore 337 SerialDataStore sds = (SerialDataStore)Factory.openDataStore( 338 "gate.persist.SerialDataStore", dsURLString); 339 sds.open(); 340 List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl"); 341 IndexedCorpus corpus = (IndexedCorpus) 342 sds.getLr("gate.corpora.SerialCorpusImpl", 343 344 corporaIds.get(0)); 345 DefaultIndexDefinition did = new DefaultIndexDefinition(); 346 did.setIrEngineClassName(gate.creole.ir.lucene. 347 LuceneIREngine.class.getName()); 348 349 did.setIndexLocation(indexLocation); 350 did.addIndexField(new IndexField("body", new ContentPropertyReader(), false)); 351 352 corpus.setIndexDefinition(did); 353 354 Out.prln("removing old index"); 355 corpus.getIndexManager().deleteIndex(); 356 Out.prln("building new index"); 357 corpus.getIndexManager().createIndex(); 358 Out.prln("optimising new index"); 359 corpus.getIndexManager().optimizeIndex(); 360 Out.prln("saving corpus"); 361 sds.sync(corpus); 362 Out.prln("done!"); 363 } 364 365 /** 366 * 367 * @param file a TXT file containing the text 368 */ 369 public static void tokeniseFile(File file) throws Exception{ 370 //initialise GATE (only call it once!!) 371 Gate.init(); 372 //create the document 373 Document doc = Factory.newDocument(file.toURL()); 374 //create the tokeniser 375 DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource( 376 "gate.creole.tokeniser.DefaultTokeniser"); 377 378 //tokenise the document 379 tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc); 380 tokeniser.execute(); 381 382 //extract data from document 383 //we need tokens and spaces 384 Set annotationTypes = new HashSet(); 385 annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE); 386 annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE); 387 388 List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes)); 389 Collections.sort(tokenList, new OffsetComparator()); 390 391 //iterate through the tokens 392 Iterator tokIter = tokenList.iterator(); 393 while(tokIter.hasNext()){ 394 Annotation anAnnotation = (Annotation)tokIter.next(); 395 System.out.println("Annotation: (" + 396 anAnnotation.getStartNode().getOffset().toString() + 397 ", " + anAnnotation.getEndNode().getOffset().toString() + 398 "[type: " + anAnnotation.getType() + 399 ", features: " + anAnnotation.getFeatures().toString()+ 400 "]" ); 401 } 402 } 403 404 405 public static class ContentPropertyReader implements PropertyReader{ 406 public String getPropertyValue(gate.Document doc){ 407 return doc.getContent().toString(); 408 } 409 } 410 411 /** Random number generator */ 412 protected static Random randomiser = new Random(); 413 414 } // class Scratch 415 416 417
|
Scratch |
|