/*
* Batch.java - transducer class
*
* Copyright (c) 1998-2001, The University of Sheffield.
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* Hamish Cunningham, 10/08/98
*
* Minor modifications by Luc Plamondon, Universit� de Montr�al, 20/11/03:
* - migrated original file to the ca.umontreal.iro.rali.gate.jape package.
*
* $Id$
*
* DEVELOPER NOTES:
*
* This is one that got away; the relation between constructors,
* initTransducer and parseTransducer are totally screwy and get worse
* every time I add something (e.g. support for resource loading).
* We should probably junk this whole thing and start again....
*/
package ca.umontreal.iro.rali.gate.jape;
import java.util.*;
import java.util.jar.*;
import java.io.*;
import java.net.*;
import gate.annotation.*;
import gate.util.*;
import gate.*;
import gate.event.*;
import gate.creole.*;
/** Batch processing of JAPE transducers against documents or collections.
* Construction will parse or deserialise a transducer as required.
*/
public class Batch implements JapeConstants {
/** Debug flag */
private static final boolean DEBUG = false;
/** The name of the transducer file, a .jape or .ser. */
// private String japeFileName;
/** The URL that points to a .jape file */
private URL japeURL;
/**The encoding used for reading the grammar file(s)*/
private String encoding;
/** The JAPE transducer. */
private Transducer transducer;
/** A stream connected to the JAPE file (often null). */
// private InputStream japeStream = null;
/** Create non-initialised instance (private, used in main). */
private Batch() { }
/** Create a fully initialised instance.
* <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
* file. This may be an absolute path, or may a .jar
* that lives somewhere on the classpath.
*/
public Batch(URL url, String encoding) throws JapeException {
this.japeURL = url;
this.encoding = encoding;
parseJape();
if(transducer != null){
transducer.addStatusListener(new StatusListener(){
public void statusChanged(String text){
fireStatusChanged(text);
}
});
transducer.addProgressListener(new ProgressListener(){
public void progressChanged(int value){
fireProgressChanged(value);
}
public void processFinished(){
fireProcessFinished();
}
});
}
} // full init constructor
public Batch(URL url, String encoding, StatusListener sListener)
throws JapeException {
this.addStatusListener(sListener);
this.japeURL = url;
this.encoding = encoding;
parseJape();
if(transducer != null){
transducer.addStatusListener(new StatusListener(){
public void statusChanged(String text){
fireStatusChanged(text);
}
});
transducer.addProgressListener(new ProgressListener(){
public void progressChanged(int value){
fireProgressChanged(value);
}
public void processFinished(){
fireProcessFinished();
}
});
}
} // full init constructor
/**
* Notifies this PR that it should stop its execution as soon as possible.
*/
public synchronized void interrupt(){
transducer.interrupt();
}
/** Create a fully initialised instance.
* <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
* file. This may be an absolute path, or may a .jar
* that lives somewhere on the classpath.
*/
/*
public Batch(String japeFileName) throws JapeException {
this.japeFileName = japeFileName;
initTransducer();
} // full init constructor
*/
/*
public Batch(String japeFileName, StatusListener sListener)
throws JapeException {
this.japeFileName = japeFileName;
this.addStatusListener(sListener);
initTransducer();
} // full init constructor
*/
/** Create a fully initialised instance from an InputStream connected
* to the JAPE file.
*/
/*
public Batch(InputStream japeStream) throws JapeException {
if(japeStream == null)
throw new JapeException(
"attempt to create a batch parser with null input stream"
);
this.japeFileName = "stream";
this.japeStream = japeStream;
initTransducer();
} // full init constructor
*/
/** Create a fully initialised instance from a resource path and resource
* name.
*/
/*
public Batch(String resPath, String resName) throws JapeException {
fromResource = true;
this.japeFileName = resName;
this.resPath = resPath;
initTransducer();
} // full init constructor
*/
/** Get the transducer. */
public Transducer getTransducer() { return transducer; }
/** Instantiate transducer member as necessary. */
/*
private void initTransducer()
throws JapeException {
if(fromResource) {
parseJape(resPath, japeFileName);
} else if(japeFileName.endsWith(".ser") || japeFileName.endsWith(".SER"))
deserialiseJape(new File(japeFileName));
else if(japeFileName.endsWith(".jape") || japeFileName.endsWith(".JAPE"))
parseJape();
else if(japeFileName.endsWith(".jar") || japeFileName.endsWith(".JAR"))
deserialiseJape();
else if(japeFileName.equals("stream"))
parseJape(japeStream);
else
throw new JapeException(
"unknown file type (not .jape, .ser or .jar):" + japeFileName
);
if(transducer != null) transducer.addStatusListener(new StatusListener() {
public void statusChanged(String text){
fireStatusChangedEvent(text);
}
});
}
*/
/** Parse a jape file from {@link #japeURL} and store the transducer. */
private void parseJape() throws JapeException {
try {
ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl parser =
new ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl(japeURL, encoding);
StatusListener listener = null;
listener = new StatusListener(){
public void statusChanged(String text){
fireStatusChanged(text);
}
};
parser.addStatusListener(listener);
transducer = parser.MultiPhaseTransducer();
parser.removeStatusListener(listener);
} catch (ca.umontreal.iro.rali.gate.jape.parser.ParseException e) {
throw new
JapeException("Batch: error parsing transducer: " + e.getMessage());
} catch (java.io.IOException e) {
throw new
JapeException("Batch: couldn't open JAPE file: " + e.getMessage());
}
} // parseJape
/** Parse a jape file from an InputStream and store the transducer. */
/*
private void parseJape(InputStream japeStream) throws JapeException {
try {
ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl parser =
new ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl(japeFileName, japeStream);
transducer = parser.MultiPhaseTransducer();
} catch (ca.umontreal.iro.rali.gate.jape.parser.ParseException e) {
throw new
JapeException("Batch: error parsing transducer: " + e.getMessage());
} catch (java.io.IOException e) {
throw new
JapeException("Batch: couldn't read JAPE stream: " + e.getMessage());
}
} // parseJape(InputStream)
*/
/** Parse a jape file from a resource and store the transducer. */
/*
private void parseJape(String resPath, String resName) throws JapeException {
try {
ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl parser =
new ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl(resPath, resName);
transducer = parser.MultiPhaseTransducer();
} catch (ca.umontreal.iro.rali.gate.jape.parser.ParseException e) {
throw new
JapeException("Batch: error parsing transducer: " + e.getMessage());
} catch (java.io.IOException e) {
throw new
JapeException("Batch: couldn't read JAPE resource: " + e.getMessage());
}
} // parseJape(resPath, resName)
*/
/** Deserialise from a .ser file. */
/*
private void deserialiseJape(File japeFile) throws JapeException {
// set up a file input stream
FileInputStream japeInputStream = null;
try {
japeInputStream = new FileInputStream(japeFile.getPath());
} catch (IOException e) {
throw new JapeException(
"Can't read from " + japeFile.getPath() + ": " + e.getMessage()
);
}
// call the input stream deserialise method
deserialiseJape(japeInputStream);
} // deserialiseJape(File)
*/
/** Deserialise from a JAR file. */
/*
private void deserialiseJape() throws JapeException {
// find the jar from CLASSPATH
//SearchPath classPath =
// new SearchPath(System.getProperty("java.class.path"), ".");
File jarFile = new File(japeFileName); //classPath.getFile(japeFileName);
if(jarFile == null)
throw new JapeException("Batch: can't find " + japeFileName);
// get a byte array input stream with the .ser in out of the jar file
JarFile jar = null;
BufferedInputStream japeInputStream = null;
try {
jar = new JarFile(jarFile.getPath());
japeInputStream = new BufferedInputStream(
jar.getInputStream(jar.getJarEntry(jarNameToSerName(japeFileName)))
);
} catch(IOException e) {
throw new JapeException("couldn't read jar file " + japeFileName);
}
// call the input stream deserialise method
deserialiseJape(japeInputStream);
} // deserialiseJape()
*/
/** Create a transducer from an object input stream (deserialisation). */
/*
private void deserialiseJape(InputStream japeInputStream)
throws JapeException {
try {
ObjectInputStream ois = new ObjectInputStream(japeInputStream);
transducer = (Transducer) ois.readObject();
ois.close();
japeInputStream.close(); // redundant?
} catch (IOException e) {
throw new JapeException(
"Batch: can't deserialise InputStream (1): " + e.getMessage()
);
} catch (ClassNotFoundException e) {
throw new JapeException(
"Batch: can't deserialise InputStream (2): " + e.getMessage()
);
}
} // deserialise(OIS)
*/
/** Create a .ser name from a .jar name. */
/*
private String jarNameToSerName(String jarName) {
return jarName.substring(0, jarName.length() - 4) + ".ser";
} // jarNameToSerName
*/
/** Process the given collection. */
public void transduce(Corpus coll) throws JapeException, ExecutionException {
// for each doc run the transducer
Iterator iter = coll.iterator();
while(iter.hasNext()) {
Document doc = (Document) iter.next();
// transducer.transduce(doc);
transduce(doc, doc.getAnnotations(), doc.getAnnotations());
}
} // transduce(coll)
/** Process a single document. */
public void transduce(Document doc) throws JapeException, ExecutionException {
transducer.transduce(doc, doc.getAnnotations(), doc.getAnnotations());
} // transduce(doc)
/** Process a single document. */
public void transduce(Document doc, AnnotationSet inputAS,
AnnotationSet outputAS) throws JapeException,
ExecutionException {
//no need to transduce empty document
if (inputAS == null || inputAS.isEmpty())
return;
transducer.transduce(doc, inputAS, outputAS);
} // transduce(doc)
/** Process a single text. */
/*
public Document transduce(String text) throws JapeException {
Document doc = null;
try {
doc = Factory.newDocument(text);
} catch (ResourceInstantiationException e) {
throw new JapeException(e.toString());
}
transducer.transduce(doc, doc.getAnnotations());
return doc;
} // transduce(text)
*/
/** Process a single file. */
/*
public Document transduce(File textFile) throws JapeException {
String text = null;
try {
text = gate.util.Files.getString(textFile);
} catch(IOException e) { throw new JapeException(e.toString()); }
return transduce(text);
} // transduce(textFile)
*/
/** Process a set of files. */
/*
public Corpus transduce(String[] textFileNames) throws JapeException {
Corpus coll = null;
try {
coll = Factory.newCorpus("JAPE batch corpus");
Document doc = null;
for(int i = 0; i < textFileNames.length; i++) {
doc = Factory.newDocument(textFileNames[i]);
doc.setFeatures(Factory.newFeatureMap());
/*coll.createDocument(
textFileNames[i],
null, // the text - should get read from disk
new AnnotationSetImpl(doc),
Factory.newFeatureMap(),
Document.COPIED
);*/
/*
transducer.transduce(doc, doc.getAnnotations());
}
} catch(ResourceInstantiationException e) {
throw new JapeException(e.toString());
}
return coll;
} // transduce(textFileNames)
*/
/** This is where it all happens. This is <I>the</I> place to be. Take
* your summer holidays here. Visit on Saturday nights. Buy a season
* ticket from <CODE>www.programmer.gone.insane.com</CODE>.
* <P>
* Takes a .jape/.jar/.ser
* file name (-j option) which is assumed to hold a pattern
* grammar for a multi-phase transducer, and a collection
* name (-c option) or a list of files. As needed it then parses and
* compiles the transducer, then transduces all the documents in the
* collection and saves it to disk.
*/
public static void main(String args[]) {
/*
// oh great bug in the sky give us this day our daily fuckup
//gate.util.Debug.setDebug(true);
//gate.util.Debug.setDebug(Rule.class, true);
//gate.util.Debug.setDebug(LeftHandSide.class, true);
//gate.util.Debug.setDebug(BasicPatternElement.class, true);
//gate.util.Debug.setDebug(AnnotationSet.class, true);
// The persistent name of the collection.
String persCollName = null;;
// The collection to process.
Corpus collection = null;
// create one of us
Batch batch = new Batch();
// process the options
int i = 0;
for( ; i<args.length; i++) {
if(args[i].equals("-c") && ++i < args.length) // -c = coll name
persCollName = args[i];
else if(args[i].equals("-j") && ++i < args.length)// -j = transducer name
batch.japeFileName = args[i];
else if(args[i].equals("-v")) // -v = verbose
batch.setVerbose(true);
else if(args[i].startsWith("-"))
batch.usage("unknown option " + args[i]);
else
break;
} // for each arg
// file name list
String[] fileNames = null;
if(args.length > i) {
fileNames = new String[args.length - i];
for(int j = 0; i<args.length; j++, i++)
fileNames[j] = args[i];
}
// did they give valid options?
if(batch.japeFileName == null)
batch.usage("you must supply a transducer name");
if(fileNames != null && persCollName != null)
batch.usage("can't read a collection AND process a file list");
// parse the transducer or bomb
batch.message("parsing the transducer");
try { batch.initTransducer(); }
catch(JapeException e) {
batch.usage("oops: " + e.toString());
}
Corpus coll = null;
if(persCollName != null) { // we got a collection name, not a list of files
// open the collection or bomb
coll = null;
batch.message("opening the collection");
try {
coll = Factory.newCorpus(persCollName);
} catch(ResourceInstantiationException e) {
batch.usage("oops (x): " + e);
}
// transduce
batch.message("calling transducer");
try { batch.transduce(coll); }
catch(JapeException e) {
batch.usage("oops (1): " + e.toString());
}
// save to disk
batch.message("saving the collection");
batch.usage("couldn't sync coll ");
// we got a list of files, not a collection
} else {
batch.message("transducing transient collection");
try {
coll = batch.transduce(fileNames);
} catch(JapeException e) {
batch.usage("oops (2): " + e.toString());
}
}
// we won! we won! we can smash up all the computers now!
batch.message("done");
//System.exit(0);
*/
} // main
/** Whether to print progress messages or not. */
private boolean verbose = false;
/** Set verbosity. */
public void setVerbose(boolean turtleSoup) { verbose = turtleSoup; }
/** You got something wrong, dumbo. */
public void usage(String errorMessage) {
String usageMessage =
"usage: java ca.umontreal.iro.rali.gate.jape.Batch.main [-v] " +
"-j japefile(.ser|.jape|.jar) " +
"(-c CollectionName | filenames)";
Err.println(errorMessage);
Err.println(usageMessage);
// System.exit(1);
} // usage
/** Hello? Anybody there?? */
public void message(String mess) {
if(verbose) Out.println("Batch: " + mess);
} // message
public void setFeatures(gate.FeatureMap newFeatures) {
features = newFeatures;
}
public gate.FeatureMap getFeatures() {
return features;
}
public synchronized void removeProgressListener(ProgressListener l) {
if (progressListeners != null && progressListeners.contains(l)) {
Vector v = (Vector) progressListeners.clone();
v.removeElement(l);
progressListeners = v;
}
}
public synchronized void addProgressListener(ProgressListener l) {
Vector v = progressListeners == null ? new Vector(2) : (Vector) progressListeners.clone();
if (!v.contains(l)) {
v.addElement(l);
progressListeners = v;
}
}
//ProcessProgressReporter implementation ends here
/** Are we initialising from a resource? */
// private boolean fromResource = false;
/** Path to the resources tree */
// private String resPath = null;
private gate.FeatureMap features;
private transient Vector progressListeners;
private transient Vector statusListeners;
protected void fireProgressChanged(int e) {
if (progressListeners != null) {
Vector listeners = progressListeners;
int count = listeners.size();
for (int i = 0; i < count; i++) {
((ProgressListener) listeners.elementAt(i)).progressChanged(e);
}
}
}
protected void fireProcessFinished() {
if (progressListeners != null) {
Vector listeners = progressListeners;
int count = listeners.size();
for (int i = 0; i < count; i++) {
((ProgressListener) listeners.elementAt(i)).processFinished();
}
}
}
public synchronized void removeStatusListener(StatusListener l) {
if (statusListeners != null && statusListeners.contains(l)) {
Vector v = (Vector) statusListeners.clone();
v.removeElement(l);
statusListeners = v;
}
}
public synchronized void addStatusListener(StatusListener l) {
Vector v = statusListeners == null ? new Vector(2) : (Vector) statusListeners.clone();
if (!v.contains(l)) {
v.addElement(l);
statusListeners = v;
}
}
protected void fireStatusChanged(String e) {
if (statusListeners != null) {
Vector listeners = statusListeners;
int count = listeners.size();
for (int i = 0; i < count; i++) {
((StatusListener) listeners.elementAt(i)).statusChanged(e);
}
}
}
/**
* Sets the ontology to be used by the transducers
* @param ontology
*/
public void setOntology(gate.creole.ontology.Ontology ontology) {
transducer.setOntology(ontology);
}
/*
private void writeObject(ObjectOutputStream oos) throws IOException {
Out.prln("writing batch");
oos.defaultWriteObject();
Out.prln("finished writing batch");
} // writeObject
*/
} // class Batch