/* * TestJape.java * * Copyright (c) 1998-2001, The University of Sheffield. * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 2, June 1991 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * Hamish Cunningham, 23/Feb/00 * * Minor modifications made by Luc Plamondon, Universit� de Montr�al, 27/11/03: * - migrated original file from gate.jape to * ca.umontreal.iro.rali.gate.jape package. * * $Id$ */ package ca.umontreal.iro.rali.gate.jape; import java.util.*; import java.io.*; import java.text.*; import junit.framework.*; import gate.*; import gate.util.*; import gate.annotation.*; import gate.creole.tokeniser.*; import gate.creole.gazetteer.*; import gate.creole.*; /** Tests for the Corpus classes */ public class TestJape extends TestCase { /** Debug flag */ private static final boolean DEBUG = false; /** Construction */ public TestJape(String name) { super(name); } /** Fixture set up */ public void setUp() { //Out.println("TestJape.setUp()"); } // setUp /** Test using the large "combined" grammar from the gate/resources * tree. */ public void _testCombined() throws IOException, GateException, Exception { DoTestBigGrammar("AveShort"); /* Corpus c = Factory.newCorpus("TestJape corpus"); c.add( Factory.newDocument(Files.getResourceAsString("texts/doc0.html")) ); //add some annotations on the first (only) document in corpus c Document doc = (Document) c.first(); AnnotationSet defaultAS = doc.getAnnotations(); FeatureMap feat = Factory.newFeatureMap(); defaultAS.add(new Long( 2), new Long( 4), "A",feat); defaultAS.add(new Long( 4), new Long(6), "B",feat); defaultAS.add(new Long(6), new Long(8), "C",feat); defaultAS.add(new Long(8), new Long(10), "C",feat); // run the parser test Gate.init(); Batch batch = null; batch = new Batch("jape/combined/", "main.jape"); // test the transducers batch.transduce(c); //Out.println(batch.getTransducer()); // check the results doc = (Document)c.first(); */ } // testCombined() /** Batch run */ public void testBatch() throws Exception{ Corpus c = Factory.newCorpus("TestJape corpus"); c.add( Factory.newDocument(Files.getGateResourceAsString("texts/doc0.html")) ); //add some annotations on the first (only) document in corpus c Document doc = (Document)c.get(0); AnnotationSet defaultAS = doc.getAnnotations(); try { FeatureMap feat = Factory.newFeatureMap(); // defaultAS.add(new Long( 0), new Long( 2), "A",feat); defaultAS.add(new Long( 2), new Long( 4), "A",feat); // defaultAS.add(new Long( 4), new Long( 6), "A",feat); // defaultAS.add(new Long( 6), new Long( 8), "A",feat); defaultAS.add(new Long( 4), new Long(6), "B",feat); // defaultAS.add(new Long(10), new Long(12), "B",feat); // defaultAS.add(new Long(12), new Long(14), "B",feat); // defaultAS.add(new Long(14), new Long(16), "B",feat); // defaultAS.add(new Long(16), new Long(18), "B",feat); defaultAS.add(new Long(6), new Long(8), "C",feat); defaultAS.add(new Long(8), new Long(10), "C",feat); // defaultAS.add(new Long(22), new Long(24), "C",feat); // defaultAS.add(new Long(24), new Long(26), "C",feat); } catch(gate.util.InvalidOffsetException ioe) { ioe.printStackTrace(Err.getPrintWriter()); } /* // run the parser test Batch batch = null; // String japeFileName = "/gate/jape/Test11.jape"; String japeFileName = Files.getResourcePath() + "/jape/TestABC.jape"; // String japeFileName = "/gate/jape/Country.jape"; InputStream japeFileStream = Files.getResourceAsStream(japeFileName); if(japeFileStream == null) throw new JapeException("couldn't open " + japeFileName); */ Batch batch = new Batch( Files.getGateResource("/jape/TestABC.jape"), "UTF-8"); // test code: print the first line of the jape stream // Out.println( // new BufferedReader(new InputStreamReader(japeFileStream)).readLine() // ); // test the transducers batch.transduce(c); // check the results doc = (Document)c.get(0); // defaultAS = doc.getAnnotations(); // Out.println(defaultAS); } // testBatch() public void DoTestBigGrammar(String textName) throws GateException, Exception{ long startCorpusLoad = 0, startCorpusTokenization = 0, startGazeteerLoad = 0, startLookup = 0, startJapeFileOpen = 0, startCorpusTransduce = 0, endProcess = 0; Out.print("Procesing " + textName + "...\n" + "Started at: " + (new Date()) + "\n"); startCorpusLoad = System.currentTimeMillis(); Out.print("Loading corpus... "); Corpus corpus = Factory.newCorpus("Jape Corpus"); try { corpus.add(Factory.newDocument( Files.getGateResourceAsString("jape/InputTexts/" + textName))); } catch(IOException ioe) { ioe.printStackTrace(Err.getPrintWriter()); } if(corpus.isEmpty()) { Err.println("Missing corpus !"); return; } //tokenize all documents gate.creole.tokeniser.DefaultTokeniser tokeniser = null; try { //create a default tokeniser FeatureMap params = Factory.newFeatureMap(); tokeniser = (DefaultTokeniser) Factory.createResource( "gate.creole.tokeniser.DefaultTokeniser", params); /*Files.getResourceAsStream("creole/tokeniser/DefaultTokeniser.rules"));*/ } catch(ResourceInstantiationException re) { re.printStackTrace(Err.getPrintWriter()); } startCorpusTokenization = System.currentTimeMillis(); Out.print(": " + (startCorpusTokenization - startCorpusLoad) + "ms\n"); Out.print("Tokenizing the corpus... "); int progress = 0; int docCnt = corpus.size(); Iterator docIter = corpus.iterator(); Document currentDoc; while(docIter.hasNext()){ currentDoc = (Document)docIter.next(); tokeniser.setDocument(currentDoc); //use the default anotation set tokeniser.setAnnotationSetName(null); tokeniser.execute(); // Verfy if all annotations from the default annotation set are consistent gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc); } startJapeFileOpen = System.currentTimeMillis(); Out.print(": " + (startJapeFileOpen - startCorpusTokenization) + "ms\n"); //Do gazeteer lookup gate.creole.gazetteer.DefaultGazetteer gazeteer = null; startGazeteerLoad = startLookup = System.currentTimeMillis(); Out.print("Loading gazeteer lists..."); try { //create a default gazetteer FeatureMap params = Factory.newFeatureMap(); gazeteer = (DefaultGazetteer) Factory.createResource( "gate.creole.gazetteer.DefaultGazetteer", params); gazeteer.init(); startLookup = System.currentTimeMillis(); Out.print(": " + (startLookup - startGazeteerLoad) + "ms\n"); Out.print("Doing gazeteer lookup... "); docIter = corpus.iterator(); while(docIter.hasNext()){ currentDoc = (Document)docIter.next(); gazeteer.setDocument(currentDoc); gazeteer.execute(); // Verfy if all annotations from the default annotation set are consistent gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc); } } catch(ResourceInstantiationException re) { Err.println("Cannot read the gazeteer lists!" + "\nAre the Gate resources in place?\n" + re); } startJapeFileOpen = System.currentTimeMillis(); Out.print(": " + (startJapeFileOpen - startLookup) + "ms\n"); //do the jape stuff Gate.init(); try { Out.print("Opening Jape grammar... "); Batch batch = new Batch( Files.getGateResource("/jape/combined/main.jape"), "UTF-8"); /* Batch batch = new Batch("jape/combined/", "brian-soc-loc1.jape"); Batch batch = new Batch("z:/gate/src/gate/resources/jape/combined/main.jape"); Batch batch = new Batch("jape/", "Country.jape"); */ startCorpusTransduce = (new Date()).getTime(); Out.print(": " + (startCorpusTransduce - startJapeFileOpen) + "ms\n"); Out.print("Transducing the corpus... "); batch.transduce(corpus); endProcess = System.currentTimeMillis(); Out.print(": " + (endProcess - startCorpusTransduce) + "ms\n"); } catch(JapeException je) { je.printStackTrace(Err.getPrintWriter()); } } // DoBugTestGrammar /** * This test sets up a JAPE transducer based on a grammar * (RhsError.jape) that will throw a null pointer exception. * The test succeeds so long as we get that exception. */ public void testRhsErrorMessages() { boolean gotException = false; try { if(DEBUG) { Out.print( "Opening Jape grammar... " + Gate.getUrl("tests/RhsError.jape") ); } // a JAPE batcher Batch batch = new Batch(Gate.getUrl("tests/RhsError.jape"), "UTF-8"); // a document with an annotation Document doc = Factory.newDocument("This is a Small Document."); FeatureMap features = Factory.newFeatureMap(); features.put("orth", "upperInitial"); doc.getAnnotations().add(new Long(0), new Long(8), "Token", features); // run jape on the document batch.transduce(doc); } catch(Exception e) { if(DEBUG) Out.prln(e); gotException = true; } assertTrue("Bad JAPE grammar didn't throw an exception", gotException); } // testRhsErrorMessages // /** // * This test sets up a JAPE transducer based on a grammar // * (RhsError2.jape) that will throw a compiler error. // * The test succeeds so long as we get that exception. // */ // public void testRhsErrorMessages2() { // boolean gotException = false; // // // disable System.out so that the compiler can't splash its error on screen // if(DEBUG) System.out.println("hello 1"); // PrintStream sysout = System.out; // System.setOut(new PrintStream(new ByteArrayOutputStream())); // if(DEBUG) System.out.println("hello 2"); // // // run a JAPE batch on the faulty grammar // try { // if(DEBUG) { // Out.print( // "Opening Jape grammar... " + Gate.getUrl("tests/RhsError2.jape") // ); // } // // a JAPE batcher // Batch batch = new Batch(Gate.getUrl("tests/RhsError2.jape"), "UTF-8"); // } catch(Exception e) { // if(DEBUG) Out.prln(e); // gotException = true; // } finally { // // // re-enable System.out // System.setOut(sysout); // if(DEBUG) System.out.println("hello 3"); // } // // assertTrue("Bad JAPE grammar (2) didn't throw an exception", gotException); // // } // testRhsErrorMessages2 // /** Test suite routine for the test runner */ public static Test suite() { return new TestSuite(TestJape.class); } // suite //main method for running this test as a standalone test public static void main(String[] args) { for(int i = 0; i < 6; i++){ System.gc(); Out.println("Run " + i + " =============="); try{ TestJape testJape = new TestJape("Test Jape"); testJape.setUp(); if(args.length < 1) testJape.DoTestBigGrammar("AveShort"); else testJape.DoTestBigGrammar(args[0]); } catch(Exception e) { e.printStackTrace(Err.getPrintWriter()); } } } } // class TestJape