Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsObsoleteMontreal_Transducersrccaumontrealiroraligatejape 〉 Batch.java
 
/*
 *  Batch.java - transducer class
 *
 *  Copyright (c) 1998-2001, The University of Sheffield.
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Hamish Cunningham, 10/08/98
 *
 *  Minor modifications by Luc Plamondon, Universit� de Montr�al, 20/11/03:
 *  - migrated original file to the ca.umontreal.iro.rali.gate.jape package.
 *
 *  $Id$
 *
 *  DEVELOPER NOTES:
 *
 *  This is one that got away; the relation between constructors,
 *  initTransducer and parseTransducer are totally screwy and get worse
 *  every time I add something (e.g. support for resource loading).
 *  We should probably junk this whole thing and start again....
 */

package ca.umontreal.iro.rali.gate.jape;

import java.util.*;
import java.util.jar.*;
import java.io.*;
import java.net.*;

import gate.annotation.*;
import gate.util.*;
import gate.*;
import gate.event.*;
import gate.creole.*;

/** Batch processing of JAPE transducers against documents or collections.
  * Construction will parse or deserialise a transducer as required.
  */
public class Batch implements JapeConstants {
  /** Debug flag */
  private static final boolean DEBUG = false;

  /** The name of the transducer file, a .jape or .ser. */
//  private String japeFileName;

  /** The URL that points to a .jape file */
  private URL japeURL;

  /**The encoding used for reading the grammar file(s)*/
  private String encoding;

  /** The JAPE transducer. */
  private Transducer transducer;

  /** A stream connected to the JAPE file (often null). */
//  private InputStream japeStream = null;

  /** Create non-initialised instance (private, used in main). */
  private Batch() { }

  /** Create a fully initialised instance.
    * <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
    * file. This may be an absolute path, or may a .jar
    * that lives somewhere on the classpath.
    */
  public Batch(URL url, String encoding) throws JapeException {
    this.japeURL = url;
    this.encoding =  encoding;
    parseJape();
    if(transducer != null){
      transducer.addStatusListener(new StatusListener(){
        public void statusChanged(String text){
          fireStatusChanged(text);
        }
      });

      transducer.addProgressListener(new ProgressListener(){
        public void progressChanged(int value){
          fireProgressChanged(value);
        }

        public void processFinished(){
          fireProcessFinished();
        }
      });
    }

  } // full init constructor

  public Batch(URL url, String encoding, StatusListener sListener)
         throws JapeException {

    this.addStatusListener(sListener);
    this.japeURL = url;
    this.encoding =  encoding;
    parseJape();
    if(transducer != null){
      transducer.addStatusListener(new StatusListener(){
        public void statusChanged(String text){
          fireStatusChanged(text);
        }
      });

      transducer.addProgressListener(new ProgressListener(){
        public void progressChanged(int value){
          fireProgressChanged(value);
        }

        public void processFinished(){
          fireProcessFinished();
        }
      });
    }
  } // full init constructor

  /**
   * Notifies this PR that it should stop its execution as soon as possible.
   */
  public synchronized void interrupt(){
    transducer.interrupt();
  }
  /** Create a fully initialised instance.
    * <P><CODE>japeFileName</CODE>: the name of a .jape or .ser transducer
    * file. This may be an absolute path, or may a .jar
    * that lives somewhere on the classpath.
    */
/*
  public Batch(String japeFileName) throws JapeException {
    this.japeFileName = japeFileName;
    initTransducer();
  } // full init constructor
*/
/*
  public Batch(String japeFileName, StatusListener sListener)
                                                        throws JapeException {
    this.japeFileName = japeFileName;
    this.addStatusListener(sListener);
    initTransducer();
  } // full init constructor
*/

  /** Create a fully initialised instance from an InputStream connected
    * to the JAPE file.
    */
/*
  public Batch(InputStream japeStream) throws JapeException {
    if(japeStream == null)
      throw new JapeException(
        "attempt to create a batch parser with null input stream"
      );
    this.japeFileName = "stream";
    this.japeStream = japeStream;
    initTransducer();
  } // full init constructor
*/
  /** Create a fully initialised instance from a resource path and resource
    * name.
    */
/*
  public Batch(String resPath, String resName) throws JapeException {
    fromResource = true;
    this.japeFileName = resName;
    this.resPath = resPath;
    initTransducer();
  } // full init constructor
*/

  /** Get the transducer. */
  public Transducer getTransducer() { return transducer; }

  /** Instantiate transducer member as necessary. */
/*
  private void initTransducer()
  throws JapeException {
    if(fromResource) {
      parseJape(resPath, japeFileName);
    } else if(japeFileName.endsWith(".ser") || japeFileName.endsWith(".SER"))
      deserialiseJape(new File(japeFileName));
    else if(japeFileName.endsWith(".jape") || japeFileName.endsWith(".JAPE"))
      parseJape();
    else if(japeFileName.endsWith(".jar") || japeFileName.endsWith(".JAR"))
      deserialiseJape();
    else if(japeFileName.equals("stream"))
      parseJape(japeStream);
    else
      throw new JapeException(
        "unknown file type (not .jape, .ser or .jar):" + japeFileName
      );
    if(transducer != null) transducer.addStatusListener(new StatusListener() {
      public void statusChanged(String text){
        fireStatusChangedEvent(text);
      }
    });
  }
*/
  /** Parse a jape file from {@link #japeURL} and store the transducer. */
  private void parseJape() throws JapeException {
    try {
      ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl parser =
        new ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl(japeURL, encoding);

      StatusListener listener = null;
      listener = new StatusListener(){
        public void statusChanged(String text){
          fireStatusChanged(text);
        }
      };
      parser.addStatusListener(listener);
      transducer = parser.MultiPhaseTransducer();
      parser.removeStatusListener(listener);
    } catch (ca.umontreal.iro.rali.gate.jape.parser.ParseException e) {
      throw new
        JapeException("Batch: error parsing transducer: " + e.getMessage());
    } catch (java.io.IOException e) {
      throw new
        JapeException("Batch: couldn't open JAPE file: " + e.getMessage());
    }
  } // parseJape

  /** Parse a jape file from an InputStream and store the transducer. */
/*
  private void parseJape(InputStream japeStream) throws JapeException {
    try {
      ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl parser =
        new ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl(japeFileName, japeStream);
      transducer = parser.MultiPhaseTransducer();
    } catch (ca.umontreal.iro.rali.gate.jape.parser.ParseException e) {
      throw new
        JapeException("Batch: error parsing transducer: " + e.getMessage());
    } catch (java.io.IOException e) {
      throw new
        JapeException("Batch: couldn't read JAPE stream: " + e.getMessage());
    }
  } // parseJape(InputStream)
*/
  /** Parse a jape file from a resource and store the transducer. */
/*
  private void parseJape(String resPath, String resName) throws JapeException {
    try {
      ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl parser =
        new ca.umontreal.iro.rali.gate.jape.parser.ParseCpsl(resPath, resName);
      transducer = parser.MultiPhaseTransducer();
    } catch (ca.umontreal.iro.rali.gate.jape.parser.ParseException e) {
      throw new
        JapeException("Batch: error parsing transducer: " + e.getMessage());
    } catch (java.io.IOException e) {
      throw new
        JapeException("Batch: couldn't read JAPE resource: " + e.getMessage());
    }
  } // parseJape(resPath, resName)
*/

  /** Deserialise from a .ser file. */
/*
  private void deserialiseJape(File japeFile) throws JapeException {

    // set up a file input stream
    FileInputStream japeInputStream = null;
    try {
      japeInputStream = new FileInputStream(japeFile.getPath());
    } catch (IOException e) {
      throw new JapeException(
        "Can't read from " + japeFile.getPath() + ": " + e.getMessage()
      );
    }

    // call the input stream deserialise method
    deserialiseJape(japeInputStream);
  } // deserialiseJape(File)
*/
  /** Deserialise from a JAR file. */
/*
  private void deserialiseJape() throws JapeException {
    // find the jar from CLASSPATH
    //SearchPath classPath =
    //  new SearchPath(System.getProperty("java.class.path"), ".");
    File jarFile = new File(japeFileName); //classPath.getFile(japeFileName);
    if(jarFile == null)
      throw new JapeException("Batch: can't find " + japeFileName);

    // get a byte array input stream with the .ser in out of the jar file
    JarFile jar = null;
    BufferedInputStream japeInputStream = null;
    try {
      jar = new JarFile(jarFile.getPath());
      japeInputStream = new BufferedInputStream(
        jar.getInputStream(jar.getJarEntry(jarNameToSerName(japeFileName)))
      );
    } catch(IOException e) {
      throw new JapeException("couldn't read jar file " + japeFileName);
    }


    // call the input stream deserialise method
    deserialiseJape(japeInputStream);
  } // deserialiseJape()
*/
  /** Create a transducer from an object input stream (deserialisation). */
/*
  private void deserialiseJape(InputStream japeInputStream)
  throws JapeException {
    try {
      ObjectInputStream ois = new ObjectInputStream(japeInputStream);
      transducer = (Transducer) ois.readObject();
      ois.close();
      japeInputStream.close(); // redundant?
    } catch (IOException e) {
      throw new JapeException(
        "Batch: can't deserialise InputStream (1): " + e.getMessage()
      );
    } catch (ClassNotFoundException e) {
      throw new JapeException(
        "Batch: can't deserialise InputStream (2): " + e.getMessage()
      );
    }
  } // deserialise(OIS)
*/
  /** Create a .ser name from a .jar name. */
/*
  private String jarNameToSerName(String jarName) {
    return jarName.substring(0, jarName.length() - 4) + ".ser";
  } // jarNameToSerName
*/

  /** Process the given collection. */
  public void transduce(Corpus coll) throws JapeException, ExecutionException {
    // for each doc run the transducer
    Iterator iter = coll.iterator();
    while(iter.hasNext()) {
      Document doc = (Document) iter.next();
      // transducer.transduce(doc);
      transduce(doc, doc.getAnnotations(), doc.getAnnotations());
    }
  } // transduce(coll)

  /** Process a single document. */
  public void transduce(Document doc) throws JapeException, ExecutionException {
    transducer.transduce(doc, doc.getAnnotations(), doc.getAnnotations());
  } // transduce(doc)

  /** Process a single document. */
  public void transduce(Document doc, AnnotationSet inputAS,
                        AnnotationSet outputAS) throws JapeException,
                                                       ExecutionException {
    //no need to transduce empty document
    if (inputAS == null || inputAS.isEmpty())
      return;
    transducer.transduce(doc, inputAS, outputAS);

  } // transduce(doc)

  /** Process a single text. */
/*
  public Document transduce(String text) throws JapeException {
    Document doc = null;
    try {
      doc = Factory.newDocument(text);
    } catch (ResourceInstantiationException e) {
      throw new JapeException(e.toString());
    }
    transducer.transduce(doc, doc.getAnnotations());
    return doc;
  } // transduce(text)
*/
  /** Process a single file. */
/*
  public Document transduce(File textFile) throws JapeException {
    String text = null;
    try {
      text = gate.util.Files.getString(textFile);
    } catch(IOException e) { throw new JapeException(e.toString()); }
    return transduce(text);
  } // transduce(textFile)
*/
  /** Process a set of files. */
/*
  public Corpus transduce(String[] textFileNames) throws JapeException {
    Corpus coll = null;
    try {
      coll = Factory.newCorpus("JAPE batch corpus");
      Document doc = null;
      for(int i = 0; i < textFileNames.length; i++) {
          doc = Factory.newDocument(textFileNames[i]);
          doc.setFeatures(Factory.newFeatureMap());
          /*coll.createDocument(
            textFileNames[i],
            null, // the text - should get read from disk
            new AnnotationSetImpl(doc),
            Factory.newFeatureMap(),
            Document.COPIED
          );*/
/*
        transducer.transduce(doc, doc.getAnnotations());
      }
    } catch(ResourceInstantiationException e) {
      throw new JapeException(e.toString());
    }
    return coll;
  } // transduce(textFileNames)
*/
  /** This is where it all happens. This is <I>the</I> place to be. Take
    * your summer holidays here. Visit on Saturday nights. Buy a season
    * ticket from <CODE>www.programmer.gone.insane.com</CODE>.
    * <P>
    * Takes a .jape/.jar/.ser
    *  file name (-j option) which is assumed to hold a pattern
    * grammar for a multi-phase transducer, and a collection
    * name (-c option) or a list of files. As needed it then parses and
    * compiles the transducer, then transduces all the documents in the
    * collection and saves it to disk.
    */
  public static void main(String args[]) {
/*
    // oh great bug in the sky give us this day our daily fuckup
    //gate.util.Debug.setDebug(true);
    //gate.util.Debug.setDebug(Rule.class, true);
    //gate.util.Debug.setDebug(LeftHandSide.class, true);
    //gate.util.Debug.setDebug(BasicPatternElement.class, true);
    //gate.util.Debug.setDebug(AnnotationSet.class, true);

    // The persistent name of the collection.
    String persCollName = null;;

    // The collection to process.
    Corpus collection = null;

    // create one of us
    Batch batch = new Batch();

    // process the options
    int i = 0;
    for( ; i<args.length; i++) {
      if(args[i].equals("-c") && ++i < args.length) // -c = coll name
        persCollName = args[i];
      else if(args[i].equals("-j") && ++i < args.length)// -j = transducer name
        batch.japeFileName = args[i];
      else if(args[i].equals("-v")) // -v = verbose
        batch.setVerbose(true);
      else if(args[i].startsWith("-"))
        batch.usage("unknown option " + args[i]);
      else
        break;
    } // for each arg

    // file name list
    String[] fileNames = null;
    if(args.length > i) {
      fileNames = new String[args.length - i];
      for(int j = 0; i<args.length; j++, i++)
        fileNames[j] = args[i];
    }

    // did they give valid options?
    if(batch.japeFileName == null)
      batch.usage("you must supply a transducer name");
    if(fileNames != null && persCollName != null)
      batch.usage("can't read a collection AND process a file list");

    // parse the transducer or bomb
    batch.message("parsing the transducer");
    try { batch.initTransducer(); }
    catch(JapeException e) {
      batch.usage("oops: " + e.toString());
    }

    Corpus coll = null;
    if(persCollName != null) { // we got a collection name, not a list of files

      // open the collection or bomb
      coll = null;
      batch.message("opening the collection");
      try {
        coll = Factory.newCorpus(persCollName);
      } catch(ResourceInstantiationException e) {
        batch.usage("oops (x): " + e);
      }

      // transduce
      batch.message("calling transducer");
      try { batch.transduce(coll); }
      catch(JapeException e) {
        batch.usage("oops (1): " + e.toString());
      }

      // save to disk
      batch.message("saving the collection");
      batch.usage("couldn't sync coll ");

    // we got a list of files, not a collection
    } else {
      batch.message("transducing transient collection");
      try {
        coll = batch.transduce(fileNames);
      } catch(JapeException e) {
        batch.usage("oops (2): " + e.toString());
      }
    }

    // we won! we won! we can smash up all the computers now!
    batch.message("done");
    //System.exit(0);
*/
  } // main


  /** Whether to print progress messages or not. */
  private boolean verbose = false;

  /** Set verbosity. */
  public void setVerbose(boolean turtleSoup) { verbose = turtleSoup; }

  /** You got something wrong, dumbo. */
  public void usage(String errorMessage) {
    String usageMessage =
      "usage: java ca.umontreal.iro.rali.gate.jape.Batch.main [-v] " +
        "-j japefile(.ser|.jape|.jar) " +
        "(-c CollectionName | filenames)";

    Err.println(errorMessage);
    Err.println(usageMessage);
    // System.exit(1);

  } // usage

  /** Hello? Anybody there?? */
  public void message(String mess) {
    if(verbose) Out.println("Batch: " + mess);
  } // message

  public void setFeatures(gate.FeatureMap newFeatures) {
    features = newFeatures;
  }
  public gate.FeatureMap getFeatures() {
    return features;
  }
  public synchronized void removeProgressListener(ProgressListener l) {
    if (progressListeners != null && progressListeners.contains(l)) {
      Vector v = (Vector) progressListeners.clone();
      v.removeElement(l);
      progressListeners = v;
    }
  }
  public synchronized void addProgressListener(ProgressListener l) {
    Vector v = progressListeners == null ? new Vector(2) : (Vector) progressListeners.clone();
    if (!v.contains(l)) {
      v.addElement(l);
      progressListeners = v;
    }
  }

  //ProcessProgressReporter implementation ends here

  /** Are we initialising from a resource? */
//  private boolean fromResource = false;

  /** Path to the resources tree */
//  private String resPath = null;


  private gate.FeatureMap features;
  private transient Vector progressListeners;
  private transient Vector statusListeners;

  protected void fireProgressChanged(int e) {
    if (progressListeners != null) {
      Vector listeners = progressListeners;
      int count = listeners.size();
      for (int i = 0; i < count; i++) {
        ((ProgressListener) listeners.elementAt(i)).progressChanged(e);
      }
    }
  }
  protected void fireProcessFinished() {
    if (progressListeners != null) {
      Vector listeners = progressListeners;
      int count = listeners.size();
      for (int i = 0; i < count; i++) {
        ((ProgressListener) listeners.elementAt(i)).processFinished();
      }
    }
  }
  public synchronized void removeStatusListener(StatusListener l) {
    if (statusListeners != null && statusListeners.contains(l)) {
      Vector v = (Vector) statusListeners.clone();
      v.removeElement(l);
      statusListeners = v;
    }
  }
  public synchronized void addStatusListener(StatusListener l) {
    Vector v = statusListeners == null ? new Vector(2) : (Vector) statusListeners.clone();
    if (!v.contains(l)) {
      v.addElement(l);
      statusListeners = v;
    }
  }
  protected void fireStatusChanged(String e) {
    if (statusListeners != null) {
      Vector listeners = statusListeners;
      int count = listeners.size();
      for (int i = 0; i < count; i++) {
        ((StatusListener) listeners.elementAt(i)).statusChanged(e);
      }
    }
  }

  /**
   * Sets the ontology to be used by the transducers
   * @param ontology
   */
  public void setOntology(gate.creole.ontology.Ontology ontology) {
    transducer.setOntology(ontology);
  }


  /*
  private void writeObject(ObjectOutputStream oos) throws IOException {
    Out.prln("writing batch");
    oos.defaultWriteObject();
    Out.prln("finished writing batch");
  } // writeObject
  */

} // class Batch