|
TestJape |
|
1 /* 2 * TestJape.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 23/Feb/00 12 * 13 * $Id: TestJape.java,v 1.46 2002/02/26 13:49:09 valyt Exp $ 14 */ 15 16 package gate.jape; 17 18 import java.util.*; 19 import java.io.*; 20 import java.text.*; 21 import junit.framework.*; 22 23 import gate.*; 24 import gate.util.*; 25 import gate.annotation.*; 26 import gate.creole.tokeniser.*; 27 import gate.creole.gazetteer.*; 28 import gate.creole.*; 29 30 31 32 /** Tests for the Corpus classes 33 */ 34 public class TestJape extends TestCase 35 { 36 /** Debug flag */ 37 private static final boolean DEBUG = false; 38 39 /** Construction */ 40 public TestJape(String name) { super(name); } 41 42 /** Fixture set up */ 43 public void setUp() { 44 //Out.println("TestJape.setUp()"); 45 } // setUp 46 47 /** Test using the large "combined" grammar from the gate/resources 48 * tree. 49 */ 50 public void _testCombined() throws IOException, GateException, Exception { 51 DoTestBigGrammar("AveShort"); 52 53 /* 54 Corpus c = Factory.newCorpus("TestJape corpus"); 55 c.add( 56 Factory.newDocument(Files.getResourceAsString("texts/doc0.html")) 57 ); 58 59 //add some annotations on the first (only) document in corpus c 60 Document doc = (Document) c.first(); 61 AnnotationSet defaultAS = doc.getAnnotations(); 62 FeatureMap feat = Factory.newFeatureMap(); 63 defaultAS.add(new Long( 2), new Long( 4), "A",feat); 64 defaultAS.add(new Long( 4), new Long(6), "B",feat); 65 defaultAS.add(new Long(6), new Long(8), "C",feat); 66 defaultAS.add(new Long(8), new Long(10), "C",feat); 67 68 // run the parser test 69 Gate.init(); 70 Batch batch = null; 71 batch = new Batch("jape/combined/", "main.jape"); 72 73 // test the transducers 74 batch.transduce(c); 75 //Out.println(batch.getTransducer()); 76 77 // check the results 78 doc = (Document)c.first(); 79 */ 80 } // testCombined() 81 82 /** Batch run */ 83 public void testBatch() throws Exception{ 84 Corpus c = Factory.newCorpus("TestJape corpus"); 85 c.add( 86 Factory.newDocument(Files.getGateResourceAsString("texts/doc0.html")) 87 ); 88 //add some annotations on the first (only) document in corpus c 89 Document doc = (Document)c.get(0); 90 AnnotationSet defaultAS = doc.getAnnotations(); 91 92 try { 93 FeatureMap feat = Factory.newFeatureMap(); 94 // defaultAS.add(new Long( 0), new Long( 2), "A",feat); 95 defaultAS.add(new Long( 2), new Long( 4), "A",feat); 96 // defaultAS.add(new Long( 4), new Long( 6), "A",feat); 97 // defaultAS.add(new Long( 6), new Long( 8), "A",feat); 98 defaultAS.add(new Long( 4), new Long(6), "B",feat); 99 // defaultAS.add(new Long(10), new Long(12), "B",feat); 100 // defaultAS.add(new Long(12), new Long(14), "B",feat); 101 // defaultAS.add(new Long(14), new Long(16), "B",feat); 102 // defaultAS.add(new Long(16), new Long(18), "B",feat); 103 defaultAS.add(new Long(6), new Long(8), "C",feat); 104 defaultAS.add(new Long(8), new Long(10), "C",feat); 105 // defaultAS.add(new Long(22), new Long(24), "C",feat); 106 // defaultAS.add(new Long(24), new Long(26), "C",feat); 107 } catch(gate.util.InvalidOffsetException ioe) { 108 ioe.printStackTrace(Err.getPrintWriter()); 109 } 110 /* 111 // run the parser test 112 Batch batch = null; 113 // String japeFileName = "/gate/jape/Test11.jape"; 114 String japeFileName = Files.getResourcePath() + "/jape/TestABC.jape"; 115 // String japeFileName = "/gate/jape/Country.jape"; 116 InputStream japeFileStream = Files.getResourceAsStream(japeFileName); 117 if(japeFileStream == null) 118 throw new JapeException("couldn't open " + japeFileName); 119 */ 120 Batch batch = new Batch(TestJape.class.getResource( 121 Files.getResourcePath() + "/jape/TestABC.jape"), "UTF-8"); 122 // test code: print the first line of the jape stream 123 // Out.println( 124 // new BufferedReader(new InputStreamReader(japeFileStream)).readLine() 125 // ); 126 127 // test the transducers 128 batch.transduce(c); 129 // check the results 130 doc = (Document)c.get(0); 131 // defaultAS = doc.getAnnotations(); 132 // Out.println(defaultAS); 133 } // testBatch() 134 135 public void DoTestBigGrammar(String textName) throws GateException, Exception{ 136 long startCorpusLoad = 0, startCorpusTokenization = 0, 137 startGazeteerLoad = 0, startLookup = 0, 138 startJapeFileOpen = 0, startCorpusTransduce = 0, 139 endProcess = 0; 140 Out.print("Procesing " + textName + "...\n" + 141 "Started at: " + (new Date()) + "\n"); 142 startCorpusLoad = System.currentTimeMillis(); 143 Out.print("Loading corpus... "); 144 Corpus corpus = Factory.newCorpus("Jape Corpus"); 145 try { 146 corpus.add(Factory.newDocument( 147 Files.getGateResourceAsString("jape/InputTexts/" + textName))); 148 } catch(IOException ioe) { 149 ioe.printStackTrace(Err.getPrintWriter()); 150 } 151 152 if(corpus.isEmpty()) { 153 Err.println("Missing corpus !"); 154 return; 155 } 156 157 //tokenize all documents 158 gate.creole.tokeniser.DefaultTokeniser tokeniser = null; 159 try { 160 //create a default tokeniser 161 FeatureMap params = Factory.newFeatureMap(); 162 tokeniser = (DefaultTokeniser) Factory.createResource( 163 "gate.creole.tokeniser.DefaultTokeniser", params); 164 /*Files.getResourceAsStream("creole/tokeniser/DefaultTokeniser.rules"));*/ 165 } catch(ResourceInstantiationException re) { 166 re.printStackTrace(Err.getPrintWriter()); 167 } 168 startCorpusTokenization = System.currentTimeMillis(); 169 Out.print(": " + 170 (startCorpusTokenization - startCorpusLoad) + 171 "ms\n"); 172 173 Out.print("Tokenizing the corpus... "); 174 int progress = 0; 175 int docCnt = corpus.size(); 176 Iterator docIter = corpus.iterator(); 177 Document currentDoc; 178 while(docIter.hasNext()){ 179 currentDoc = (Document)docIter.next(); 180 tokeniser.setDocument(currentDoc); 181 //use the default anotation set 182 tokeniser.setAnnotationSetName(null); 183 tokeniser.execute(); 184 // Verfy if all annotations from the default annotation set are consistent 185 gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc); 186 } 187 188 startJapeFileOpen = System.currentTimeMillis(); 189 Out.print(": " + (startJapeFileOpen - startCorpusTokenization) + 190 "ms\n"); 191 192 //Do gazeteer lookup 193 gate.creole.gazetteer.DefaultGazetteer gazeteer = null; 194 startGazeteerLoad = startLookup = System.currentTimeMillis(); 195 Out.print("Loading gazeteer lists..."); 196 try { 197 //create a default gazetteer 198 FeatureMap params = Factory.newFeatureMap(); 199 gazeteer = (DefaultGazetteer) Factory.createResource( 200 "gate.creole.gazetteer.DefaultGazetteer", params); 201 gazeteer.init(); 202 startLookup = System.currentTimeMillis(); 203 Out.print(": " + 204 (startLookup - startGazeteerLoad) + 205 "ms\n"); 206 207 Out.print("Doing gazeteer lookup... "); 208 docIter = corpus.iterator(); 209 while(docIter.hasNext()){ 210 currentDoc = (Document)docIter.next(); 211 gazeteer.setDocument(currentDoc); 212 gazeteer.execute(); 213 // Verfy if all annotations from the default annotation set are consistent 214 gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc); 215 } 216 } catch(ResourceInstantiationException re) { 217 Err.println("Cannot read the gazeteer lists!" + 218 "\nAre the Gate resources in place?\n" + re); 219 } 220 221 startJapeFileOpen = System.currentTimeMillis(); 222 Out.print(": " + (startJapeFileOpen - startLookup) + 223 "ms\n"); 224 225 226 //do the jape stuff 227 Gate.init(); 228 229 230 try { 231 Out.print("Opening Jape grammar... "); 232 Batch batch = new Batch(TestJape.class.getResource( 233 Files.getResourcePath() + "/jape/combined/main.jape"), "UTF-8"); 234 /* 235 Batch batch = new Batch("jape/combined/", "brian-soc-loc1.jape"); 236 Batch batch = 237 new Batch("z:/gate/src/gate/resources/jape/combined/main.jape"); 238 Batch batch = new Batch("jape/", "Country.jape"); 239 */ 240 startCorpusTransduce = (new Date()).getTime(); 241 Out.print(": " + (startCorpusTransduce - startJapeFileOpen) + 242 "ms\n"); 243 Out.print("Transducing the corpus... "); 244 batch.transduce(corpus); 245 endProcess = System.currentTimeMillis(); 246 Out.print(": " + (endProcess - startCorpusTransduce) + "ms\n"); 247 } catch(JapeException je) { 248 je.printStackTrace(Err.getPrintWriter()); 249 } 250 } // DoBugTestGrammar 251 252 /** 253 * This test sets up a JAPE transducer based on a grammar 254 * (RhsError.jape) that will throw a null pointer exception. 255 * The test succeeds so long as we get that exception. 256 */ 257 public void testRhsErrorMessages() { 258 boolean gotException = false; 259 260 try { 261 if(DEBUG) { 262 Out.print( 263 "Opening Jape grammar... " + Gate.getUrl("tests/RhsError.jape") 264 ); 265 } 266 // a JAPE batcher 267 Batch batch = new Batch(Gate.getUrl("tests/RhsError.jape"), "UTF-8"); 268 269 // a document with an annotation 270 Document doc = Factory.newDocument("This is a Small Document."); 271 FeatureMap features = Factory.newFeatureMap(); 272 features.put("orth", "upperInitial"); 273 doc.getAnnotations().add(new Long(0), new Long(8), "Token", features); 274 275 // run jape on the document 276 batch.transduce(doc); 277 } catch(Exception e) { 278 if(DEBUG) Out.prln(e); 279 gotException = true; 280 } 281 282 assertTrue("Bad JAPE grammar didn't throw an exception", gotException); 283 284 } // testRhsErrorMessages 285 286 // /** 287 // * This test sets up a JAPE transducer based on a grammar 288 // * (RhsError2.jape) that will throw a compiler error. 289 // * The test succeeds so long as we get that exception. 290 // */ 291 // public void testRhsErrorMessages2() { 292 // boolean gotException = false; 293 // 294 // // disable System.out so that the compiler can't splash its error on screen 295 // if(DEBUG) System.out.println("hello 1"); 296 // PrintStream sysout = System.out; 297 // System.setOut(new PrintStream(new ByteArrayOutputStream())); 298 // if(DEBUG) System.out.println("hello 2"); 299 // 300 // // run a JAPE batch on the faulty grammar 301 // try { 302 // if(DEBUG) { 303 // Out.print( 304 // "Opening Jape grammar... " + Gate.getUrl("tests/RhsError2.jape") 305 // ); 306 // } 307 // // a JAPE batcher 308 // Batch batch = new Batch(Gate.getUrl("tests/RhsError2.jape"), "UTF-8"); 309 // } catch(Exception e) { 310 // if(DEBUG) Out.prln(e); 311 // gotException = true; 312 // } finally { 313 // 314 // // re-enable System.out 315 // System.setOut(sysout); 316 // if(DEBUG) System.out.println("hello 3"); 317 // } 318 // 319 // assertTrue("Bad JAPE grammar (2) didn't throw an exception", gotException); 320 // 321 // } // testRhsErrorMessages2 322 // 323 324 /** Test suite routine for the test runner */ 325 public static Test suite() { 326 return new TestSuite(TestJape.class); 327 } // suite 328 329 //main method for running this test as a standalone test 330 public static void main(String[] args) { 331 for(int i = 0; i < 6; i++){ 332 System.gc(); 333 Out.println("Run " + i + " =============="); 334 try{ 335 TestJape testJape = new TestJape("Test Jape"); 336 testJape.setUp(); 337 if(args.length < 1) testJape.DoTestBigGrammar("AveShort"); 338 else testJape.DoTestBigGrammar(args[0]); 339 } catch(Exception e) { 340 e.printStackTrace(Err.getPrintWriter()); 341 } 342 } 343 } 344 } // class TestJape 345
|
TestJape |
|