/* * TestJape2.java (Java Annotation Patterns Engine) * * Copyright (c) 1998-2001, The University of Sheffield. * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 2, June 1991 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * Hamish Cunningham, 23/02/2000 * * Minor modifications made by Luc Plamondon, Universit� de Montr�al, 27/11/03: * - migrated original file from gate.jape to * ca.umontreal.iro.rali.gate.jape package. * * $Id$ * * Description: Test class for JAPE. */ package ca.umontreal.iro.rali.gate.jape; import java.util.*; import java.io.*; import ca.umontreal.iro.rali.gate.jape.parser.*; import gate.*; import gate.annotation.*; import gate.util.*; import gate.creole.*; /** * Second test harness for JAPE. * Uses the Sheffield Tokeniser and Gazetteer, and must be run * from the gate directory. * @author Hamish Cunningham */ public class TestJape2 { /** Debug flag */ private static final boolean DEBUG = false; /** How much noise to make. */ static private boolean verbose = false; /** Take a list of text files and a collection name, and * call tokeniser/gazetteer/jape on them, creating the * collection. */ static public void main(String[] args) { // turn debug output on/off //Debug.setDebug(true); //Debug.setDebug(AnnotationSet.class, true); //Debug.setDebug(BasicPatternElement.class, true); //Debug.setDebug(ComplexPatternElement.class, true); //Debug.setDebug(ConstraintGroup.class, true); //Debug.setDebug(SinglePhaseTransducer.class, true); // variables to parse the command line options into String collName = null; String japeName = null; ArrayList fileNames = null; // process options for(int i=0; i<args.length; i++) { if(args[i].equals("-c") && ++i < args.length) // -c = coll name collName = args[i]; else if(args[i].equals("-j") && ++i < args.length) // -j: .jape name japeName = args[i]; else if(args[i].equals("-v")) // -v = verbose verbose = true; else { // a list of files fileNames = new ArrayList(); do { fileNames.add(args[i++]); } while(i < args.length); } } // for each arg // did they give valid options? message("checking options"); if(collName == null || japeName == null || fileNames == null) usage("you must supply collection, transducer and file names"); // create a collection and run the tokeniser message("creating coll, tokenising and gazetteering"); Corpus coll = null; try { coll = tokAndGaz(collName, fileNames); } catch(ResourceInstantiationException e) { usage("couldn't open collection: " + e); } /* // run the parser test message("parsing the .jape file (or deserialising the .ser file)"); Batch batch = null; try { batch = new Batch(japeName); } catch(JapeException e) { usage("can't create transducer " + e.getMessage()); } */ /*Transducer transducer = parseJape(japeName); //Out.println(transducer); if(transducer == null) System.exit(1);*/ // test the transducers from the parser /* message("running the transducer"); try { batch.transduce(coll); } catch(JapeException e) { usage("couldn't run transducer " + e.getMessage()); } //runTransducer(transducer, coll); //Out.println(transducer); message("done\n\r"); //System.exit(0); */ } // main /** * Create a collection and put tokenised and gazetteered docs in it. */ static public Corpus tokAndGaz(String collName, ArrayList fileNames) throws ResourceInstantiationException { // create or overwrite the collection Corpus collection = null; File collDir = new File(collName); collection = Factory.newCorpus( collDir.getAbsolutePath() ); // add all the documents for(Iterator i = fileNames.iterator(); i.hasNext(); ) { String fname = (String) i.next(); File f = new File(fname); FeatureMap attrs = Factory.newFeatureMap(); Document doc = null; try { AnnotationSet annots = new AnnotationSetImpl(doc); collection.add( Factory.newDocument(f.getAbsolutePath()) ); } catch(ResourceInstantiationException e) { e.printStackTrace(); } /* // Tokenise the document Tokeniser tokeniser = new Tokeniser(doc, Tokeniser.HMM); try { tokeniser.hmmTokenSequence(); } catch(sheffield.creole.tokeniser.ParseException ex) { ex.printStackTrace(); return null; } catch (CreoleException ex) { ex.printStackTrace(); return null; } // Gazetteer the document gate.creole.Annotator gazetteer = new GazetteerAnnotator(); gazetteer.annotate(doc, null); */ } // for each doc name // return the annotated collection return collection; } //tokAndGaz /** * Must be run from the gate directory. * Parse the .jape file. */ /* static public Transducer parseJape(String japeName) { Transducer transducer = null; if(japeName.endsWith(".ser")) { // it's compiled already message("deserialising " + japeName); File f = new File(japeName); if(! f.exists()) Out.println(japeName + " not found"); try { FileInputStream fis = new FileInputStream(f.getPath()); ObjectInputStream ois = new ObjectInputStream(fis); transducer = (Transducer) ois.readObject(); ois.close(); } catch (Exception ex) { Err.println( "Can't read from " + f.getName() + ": " + ex.toString() ); } } else { // parse it message("parsing " + japeName); try { ParseCpsl cpslParser = new ParseCpsl(japeName); transducer = cpslParser.MultiPhaseTransducer(); } catch(IOException e) { e.printStackTrace(); } catch(ca.umontreal.iro.rali.gate.jape.parser.ParseException ee) { Err.println("Error parsing transducer: " + ee.getMessage()); } } return transducer; } // parseJape static public void runTransducer( Transducer transducer, Corpus coll ) { try { Document doc = coll.firstDocument(); do { message("doing document " + doc.getId()); transducer.transduce(doc); // Out.println(transducer.toString()); } while( (doc = coll.nextDocument()) != null ); } catch(JdmException e) { e.printStackTrace(); } catch(JapeException e) { e.printStackTrace(); } } // runTransducer */ /** You got something wrong, dumbo. */ public static void usage(String errorMessage) { String usageMessage = "usage: java ca.umontreal.iro.rali.gate.jape.TestJape2.main [-v] " + "-j JapePatternFile -c CollectionName FileName(s)"; Err.println(errorMessage); Err.println(usageMessage); //System.exit(1); } // usage /** Hello? Anybody there?? */ public static void message(String mess) { if(verbose) Out.println("TestJape2: " + mess); } // message } // class TestJape2