GATE.ac.uk - releases/gate-8.4-build5748-ALL/plugins/Learning/src/gate/learning/LearningAPIMain.java

/*
 * LearningAPIMain.java
 *
 * Yaoyong Li 22/03/2007
 *
 * $Id: LearningAPIMain.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
 */
package gate.learning;

import gate.Document;
import gate.Factory;
import gate.ProcessingResource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.Benchmark;
import gate.util.Benchmarkable;
import gate.util.BomStrippingInputStreamReader;
import gate.util.Files;
import gate.util.GateException;
import gate.util.GateRuntimeException;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URISyntaxException;
import java.net.URL;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.log4j.Logger;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;

/**
 * The main object of the ML Api. It does initialiation, read parameter values
 * from GUI, and run the selected learning mode. It can also be called by java
 * code, as an API (an GATE class), for using this learning api.
 */
@SuppressWarnings("serial")
public class LearningAPIMain extends AbstractLanguageAnalyser
                                                             implements
                                                             ProcessingResource,
                                                             Benchmarkable {
  /** This is where the model(s) should be saved */
  private URL configFileURL;

  /**
   * Name of the AnnotationSet contains annotations specified in the DATASET
   * element of configuration file.
   */
  private String inputASName;

  /**
   * The annotationSet for the resulting annotations by application of models.
   */
  private String outputASName;

  /**
   * Run-time parameter learningMode, having three modes: training, application,
   * and evaluation.
   */
  private RunMode learningMode;

  private RunMode learningModeAppl;

  private RunMode learningModeMiTraining;

  private RunMode learningModeVIEWSVMMODEL;

  private RunMode learningModeSelectingDocs;

  /** Learning settings specified in the configuration file. */
  private LearningEngineSettings learningSettings;

  /**
   * The lightweight learning object for getting the features, training and
   * application.
   */
  LightWeightLearningApi lightWeightApi = null;

  /** The File for NLP learning Log. */
  private File logFile;

  /** Used by lightWeightApi, specifying training or application. */
  private boolean isTraining;

  /** Subdirectory for storing the data file produced by learning api. */
  private File wdResults = null;

  /** Subdirectory used to store temporary files used by APPLICATION mode. */
  private File applicationTempDir;

  /** Doing evaluation. */
  private EvaluationBasedOnDocs evaluation;

  /** The MI learning information object. */
  MiLearningInformation miLearningInfor = null;

  /** The three counters for batch application. */
  int startDocIdApp;

  int endDocIdApp;

  int maxNumApp;

  String date_time_loaded = "";
  static final DateFormat dateTimeStampFormat = new SimpleDateFormat("yyyyMMdd_HHmmss");
  
  /** Trivial constructor. */
  public LearningAPIMain() {
    // do nothing
  }

  // featureMap that is used for exporting log messages
  protected Map<Object,Object> benchmarkingFeatures = new HashMap<Object,Object>();

  /** Initialise this resource, and return it. */
  public gate.Resource init() throws ResourceInstantiationException {
    fireStatusChanged("Checking and reading learning settings!");
    // here all parameters are needs to be checked
    // check for the model storage directory
    if(configFileURL == null)
      throw new ResourceInstantiationException(
              "WorkingDirectory is required to store the learned model and cannot be null");
    // it is not null, check it is a file: URL
    if(!"file".equals(configFileURL.getProtocol())) { throw new ResourceInstantiationException(
            "WorkingDirectory must be a file: URL"); }
    // Get the working directory which the configuration
    // file reside in.
    File wd = null;
    try {
      wd = new File(configFileURL.toURI()).getParentFile();
    } catch(URISyntaxException use) {
      wd = Files.fileFromURL(configFileURL).getParentFile();
    }
    // it must be a directory
    if(!wd.isDirectory()) { throw new ResourceInstantiationException(wd
            + " must be a reference to directory"); }
    if(LogService.minVerbosityLevel > 0)
      System.out.println("Configuration File=" + configFileURL.toString());
    try {
      if(!new File(configFileURL.toURI()).exists()) { throw new ResourceInstantiationException(
              "Error: the configuration file specified does not exist!!"); }
    } catch(URISyntaxException e1) {
      e1.printStackTrace();
      throw new ResourceInstantiationException(e1);
    }
    miLearningInfor = new MiLearningInformation();
    try {
      // Load the learning setting file
      // by reading the configuration file
      learningSettings =
              LearningEngineSettings
                      .loadLearningSettingsFromFile(configFileURL);
    } catch(Exception e) {
      e.printStackTrace();
      throw new ResourceInstantiationException(e);
    }
    try {
      // Creat the sub-directory of the workingdirectroy where the data
      // files will be stored in
      long now = System.currentTimeMillis();
      date_time_loaded = dateTimeStampFormat.format(new Date(now));
      String startmsg = "A new session for NLP learning is starting: "+
                date_time_loaded+
                (learningSettings.experimentId.isEmpty() ? 
                  "" : " experiment id "+learningSettings.experimentId);
      String configMsg = configFileURL.toString();
      if(learningSettings.configFile != null) {
        long lastMod = learningSettings.configFile.lastModified();
        configMsg += " (saved "+dateTimeStampFormat.format(new Date(lastMod))+
                ", "+((now-lastMod)/1000)+"secs ago)";
      }

      if(LogService.minVerbosityLevel > 0) {
        System.out.println("\n\n*************************");
        System.out.println(startmsg);
        System.out.println(configMsg);
        System.out.println();
      }
      wdResults =
              new File(wd, gate.learning.ConstantParameters.SUBDIRFORRESULTS);
      wdResults.mkdir();
      logFile =
              new File(new File(wd, ConstantParameters.SUBDIRFORRESULTS),
                      ConstantParameters.FILENAMEOFLOGFILE);
      LogService.init(logFile, true, learningSettings.verbosityLogService);
      StringBuffer logMessage = new StringBuffer();
      logMessage.append("\n\n*************************\n");
      logMessage.append(startmsg);
      logMessage.append(configMsg);
      // adding WorkingDirectory parameter in the benchmarkingFeatures
      benchmarkingFeatures.put("workingDirectory", wd.getAbsolutePath());
      logMessage.append("The initiliased time of NLP learning: "
              + new Date().toString() + "\n");
      logMessage.append("Working directory: " + wd.getAbsolutePath() + "\n");
      logMessage.append("The feature files and models are saved at: "
              + wdResults.getAbsolutePath() + "\n");
      // Call the lightWeightLearningApi
      lightWeightApi = new LightWeightLearningApi(wd);
      // more initialisation
      lightWeightApi.furtherInit(wdResults, learningSettings);
      // adding WorkingDirectory parameter in the benchmarkingFeatures
      // benchmarkingFeatures.put("LearnerName",
      // learningSettings.learnerSettings.getLearnerName());
      // benchmarkingFeatures.put("LearnerNickName",
      // learningSettings.learnerSettings.getLearnerNickName());
      // benchmarkingFeatures.put("SurroundMode", learningSettings.surround);
      logMessage.append("Learner name: "
              + learningSettings.learnerSettings.getLearnerName() + "\n");
      logMessage.append("Learner nick name: "
              + learningSettings.learnerSettings.getLearnerNickName() + "\n");
      logMessage.append("Learner parameter settings: "
              + learningSettings.learnerSettings.learnerName + "\n");
      logMessage.append("Surroud mode (or chunk learning): "
              + learningSettings.surround);
      LogService.logMessage(logMessage.toString(), 1);
      // LogService.close();
    } catch(Exception e) {
      throw new ResourceInstantiationException(e);
    }
    learningModeAppl = RunMode.APPLICATION;
    maxNumApp = learningSettings.docNumIntevalApp;
    learningModeMiTraining = RunMode.MITRAINING;
    learningModeVIEWSVMMODEL = RunMode.VIEWPRIMALFORMMODELS;
    learningModeSelectingDocs = RunMode.RankingDocsForAL;
    fireProcessFinished();
    return this;
  } // init()

  /**
   * Run the resource.
   *
   * @throws ExecutionException
   */
  public void execute() throws ExecutionException {
    // mode in which the PR is executed
    benchmarkingFeatures.put("learningMode", learningMode);
    if(learningMode.equals(learningModeVIEWSVMMODEL)) {
      if(corpus == null || corpus.size() == 0 || corpus.indexOf(document) == 0)
        lightWeightApi.viewSVMmodelsInNLPFeatures(new File(wdResults,
                ConstantParameters.FILENAMEOFModels), learningSettings);
      return;
    }
    if(learningMode.equals(learningModeSelectingDocs)) {
      // for ordering and selecting the documents for ative learning
      if(corpus == null || corpus.size() == 0 || corpus.indexOf(document) == 0) {
        // ranking the documents
        lightWeightApi.orderDocsWithModels(wdResults, learningSettings);
        // selecting the document
        // lightWeightApi.selectDocForAL()
      }
      return;
    }
    // now we need to see if the corpus is provided
    if(corpus == null)
      throw new ExecutionException("Provided corpus is null!");
    if(corpus.size() == 0)
      throw new ExecutionException("No Document found in corpus!");
    // set benchmark ID on the lightWeightApi
    String oldLightWeightApiParentId = null;
    if(lightWeightApi != null) {
      oldLightWeightApiParentId = lightWeightApi.getParentBenchmarkId();
      lightWeightApi.createBenchmarkId(getBenchmarkId());
    }
    // first, get the NLP features from the documents, according to the
    // feature types specified in DataSetDefinition file
    int positionDoc = corpus.indexOf(document);
    // first document in the corpus
    if(positionDoc == 0) {
      lightWeightApi.inputASName = inputASName;
      lightWeightApi.outputASName = outputASName;
      /** Obtain the MI learning information of the last time learning. */
      if(learningMode.equals(this.learningModeMiTraining)) {
        miLearningInfor = new MiLearningInformation();
        File miLeFile =
                new File(wdResults,
                        ConstantParameters.FILENAMEOFMILearningInfor);
        long startTime = Benchmark.startPoint();
        benchmarkingFeatures.put("miLearningInformationFile", miLeFile
                .getAbsolutePath());
        miLearningInfor.readDataFromFile(miLeFile);
        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                + Benchmark.READING_LEARNING_INFO, this, benchmarkingFeatures);
        benchmarkingFeatures.remove("miLearningInformationFile");
      }
      /** Set the information for batch application. */
      startDocIdApp = 0;
      endDocIdApp = 0;
      if(LogService.minVerbosityLevel > 0)
        System.out.println("Pre-processing the " + corpus.size()
                + " documents...");
      try {
        // PrintWriter logFileIn = new PrintWriter(new FileWriter(logFile,
        // true));
        LogService.init(logFile, true, learningSettings.verbosityLogService);
        LogService.logMessage("\n*** A new run starts.", 1);
        LogService.logMessage(
                "\nThe execution time (pre-processing the first document): "
                        + new Date().toString(), 1);
        if(LogService.minVerbosityLevel > 0) {
          System.out.println("Learning starts.");
          System.out
                  .println("For the information about this learning see the log file "
                          + wdResults.getAbsolutePath()
                          + File.separator
                          + ConstantParameters.FILENAMEOFLOGFILE);
          System.out.println("The number of threads used is "
                  + learningSettings.numThreadUsed);
        }
        // LogService.close();
        // logFileIn.println("EvaluationMode: " + evaluationMode);
        // logFileIn.println("TrainingMode: " + trainingMode);
        // logFileIn.println("InputAS: " + inputASName);
      } catch(IOException e) {
        e.printStackTrace();
      }
    }
    // Apply the model to a bunch of documents
    if(learningMode.equals(learningModeAppl)) {
      ++endDocIdApp;
      if(endDocIdApp - startDocIdApp == maxNumApp) {
        try {
          // first checking if the model file is available or not
          String modelFileName =
                  wdResults.toString() + File.separator
                          + ConstantParameters.FILENAMEOFModels;
          if(!new File(modelFileName).exists()) {
            throw new ExecutionException("The model is not available at " + 
                modelFileName + "!");
          }
          BufferedWriter outNLPFeatures = null;
          BufferedReader inNLPFeatures = null;
          BufferedWriter outFeatureVectors = null;
          // EvaluationBasedOnDocs.emptyDatafile(wdResults, false);
          if(LogService.minVerbosityLevel > 0)
            System.out.println("** " + "Application mode for document from "
                    + startDocIdApp + " to " + endDocIdApp + "(not included):");
          LogService
                  .logMessage("** Application mode for document from "
                          + startDocIdApp + " to " + endDocIdApp
                          + "(not included):", 1);
          isTraining = false;
          String classTypeOriginal =
                  learningSettings.datasetDefinition.getClassAttribute()
                          .getType();
          outNLPFeatures =
                  new BufferedWriter(
                          new OutputStreamWriter(
                                  new FileOutputStream(
                                          new File(
                                                  getApplicationTempDir(),
                                                  ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                  "UTF-8"));
          int numDoc;
          numDoc = endDocIdApp - startDocIdApp;
          long startTime = Benchmark.startPoint();
          benchmarkingFeatures.put("numDocs", "" + numDoc);
          for(int i = startDocIdApp; i < endDocIdApp; ++i) {
            boolean wasLoaded = corpus.isDocumentLoaded(i);
            Document toProcess = corpus.get(i);
            lightWeightApi.annotations2NLPFeatures(toProcess,
                    i - startDocIdApp, outNLPFeatures, isTraining,
                    learningSettings);
            if(toProcess.getDataStore() != null
                    && corpus.getDataStore() != null) {// (isDatastore)
              corpus.getDataStore().sync(corpus);
            }
            if(!wasLoaded) {
              corpus.unloadDocument(toProcess);
              Factory.deleteResource(toProcess);
            }
          }
          outNLPFeatures.flush();
          outNLPFeatures.close();
          lightWeightApi.finishFVs(getApplicationTempDir(), numDoc, isTraining,
                  learningSettings);
          Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                  + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                  benchmarkingFeatures);
          startTime = Benchmark.startPoint();
          /** Open the normal NLP feature file. */
          inNLPFeatures =
                  new BomStrippingInputStreamReader(
                                  new FileInputStream(
                                          new File(
                                                  getApplicationTempDir(),
                                                  ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                  "UTF-8");
          outFeatureVectors =
                  new BufferedWriter(
                          new OutputStreamWriter(
                                  new FileOutputStream(
                                          new File(
                                                  getApplicationTempDir(),
                                                  ConstantParameters.FILENAMEOFFeatureVectorDataApp)),
                                  "UTF-8"));
          lightWeightApi.nlpfeatures2FVs(getApplicationTempDir(),
                  inNLPFeatures, outFeatureVectors, numDoc, isTraining,
                  learningSettings);
          inNLPFeatures.close();
          outFeatureVectors.flush();
          outFeatureVectors.close();
          Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                  + Benchmark.NLP_FEATURES_TO_FVS, this, benchmarkingFeatures);
          // Applying th model
          startTime = Benchmark.startPoint();
          lightWeightApi.applyModelInJava(corpus, startDocIdApp, endDocIdApp,
                  classTypeOriginal, learningSettings, getApplicationTempDir());
          Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                  + Benchmark.MODEL_APPLICATION, this, benchmarkingFeatures);
          benchmarkingFeatures.remove("numDocs");
          startDocIdApp = endDocIdApp;
        } catch(IOException e) {
          e.printStackTrace();
        } catch(GateException e) {
          e.printStackTrace();
        }
      }
    }
    // we've reached the last document
    if(positionDoc == corpus.size() - 1) {
      // first select the training data and test data according to the
      // learning setting
      // set the inputASName in here, because it is a runtime parameter
      int numDoc = corpus.size();
      try {
        LogService.init(logFile, true, learningSettings.verbosityLogService);
        LogService.logMessage("The learning start at " + new Date().toString(),
                1);
        LogService.logMessage("The number of documents in dataset: " + numDoc,
                1);
        // Open the NLP feature file for storing the NLP feature vectors
        BufferedWriter outNLPFeatures = null;
        BufferedReader inNLPFeatures = null;
        BufferedWriter outFeatureVectors = null;
        // if only need the feature data
        switch(learningMode){
          case ProduceFeatureFilesOnly:
            // if only want feature data
            EvaluationBasedOnDocs.emptyDatafile(wdResults, true);
            if(LogService.minVerbosityLevel > 0)
              System.out.println("** Producing the feature files only!");
            LogService.logMessage("** Producing the feature files only!", 1);
            long startTime = Benchmark.startPoint();
            benchmarkingFeatures.put("numDocs", numDoc);
            isTraining = true;
            outNLPFeatures =
                    new BufferedWriter(
                            new OutputStreamWriter(
                                    new FileOutputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                    "UTF-8"));
            
            //In the meantime, also write the name of documents and total number of them into a file
            BufferedWriter outDocsName =
                    new BufferedWriter(new OutputStreamWriter(
                            new FileOutputStream(new File(wdResults,
                                    ConstantParameters.FILENAMEOFDocsName)),
                            "UTF-8"));
            outDocsName.append("##totalDocs=" + numDoc);
            outDocsName.newLine();
            for(int i = 0; i < numDoc; ++i) {
              Document toProcess = corpus.get(i);
              lightWeightApi.annotations2NLPFeatures(toProcess, i,
                      outNLPFeatures, isTraining, learningSettings);
              String docN = toProcess.getName();
              if(docN.contains("_"))
                docN = docN.substring(0, docN.lastIndexOf("_"));
              outDocsName.append(docN);
              outDocsName.newLine();
              if(toProcess.getDataStore() != null
                      && corpus.getDataStore() != null)
                Factory.deleteResource(toProcess);
            }
            outNLPFeatures.flush();
            outNLPFeatures.close();
            outDocsName.flush();
            outDocsName.close();
            lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
                    learningSettings);
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                    + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                    benchmarkingFeatures);
            /** Open the normal NLP feature file. */
            inNLPFeatures =
                    new BomStrippingInputStreamReader(
                                    new FileInputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                    "UTF-8");
            outFeatureVectors =
                    new BufferedWriter(
                            new OutputStreamWriter(
                                    new FileOutputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFFeatureVectorData)),
                                    "UTF-8"));
            startTime = Benchmark.startPoint();
            lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
                    outFeatureVectors, numDoc, isTraining, learningSettings);
            inNLPFeatures.close();
            outFeatureVectors.flush();
            outFeatureVectors.close();
            Benchmark
                    .checkPoint(startTime, getBenchmarkId() + "."
                            + Benchmark.NLP_FEATURES_TO_FVS, this,
                            benchmarkingFeatures);
            // produce the ngram language model from feature list
            if(learningSettings.datasetDefinition.getNgrams() != null) {
              if(LogService.minVerbosityLevel > 0)
                System.out
                        .println("Write the language model in N-grams into the file "
                                + ConstantParameters.FILENAMEOFNgramLM + "!");
              LogService.logMessage(
                      "Write the language model in N-grams into the file "
                              + ConstantParameters.FILENAMEOFNgramLM + "!", 1);
              if(learningSettings.datasetDefinition.getNgrams().size() >= 1) {
                startTime = Benchmark.startPoint();
                lightWeightApi.featureList2LM(wdResults,
                        learningSettings.datasetDefinition.getNgrams()
                                .get(0).getNumber());
                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                        + Benchmark.WRITING_NGRAM_MODEL, this,
                        benchmarkingFeatures);
                // produce the term-frequency matrix
                if(LogService.minVerbosityLevel > 0)
                  System.out
                          .println("Write the term-document statistics into the file "
                                  + ConstantParameters.FILENAMEOFTermFreqMatrix
                                  + "!");
                LogService.logMessage(
                        "Write the term-document statistics into the file "
                                + ConstantParameters.FILENAMEOFTermFreqMatrix
                                + "!", 1);
                startTime = Benchmark.startPoint();
                lightWeightApi.termfrequenceMatrix(wdResults, numDoc);
                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                        + Benchmark.TERM_DOC_STATS, this, benchmarkingFeatures);
              } else {
                System.out
                        .println("!! Warning: cannot produce N-gram data because there is no N-gram "
                                + "defintion in the configuration file!");
              }
            }
            benchmarkingFeatures.remove("numDocs");
            // Create the document for storing the names of selected documents
            // if it doesn't exist.
            File selectedFile =
                    new File(wdResults,
                            ConstantParameters.FILENAMEOFSelectedDOCForAL);
            if(!selectedFile.exists()) selectedFile.createNewFile();
            if(LogService.minVerbosityLevel > 0) displayDataFilesInformation();
            break;
          case TRAINING:
            // empty the data file
            Long tm1 = null,
            tm2 = null,
            tm3 = null;
            if(LogService.minVerbosityLevel >= LogService.DEBUG) {
              tm1 = new Date().getTime();
            }
            EvaluationBasedOnDocs.emptyDatafile(wdResults, true);
            if(LogService.minVerbosityLevel > 0)
              System.out.println("** Training mode:");
            LogService.logMessage("** Training mode:", 1);
            startTime = Benchmark.startPoint();
            benchmarkingFeatures.put("numDocs", "" + numDoc);
            isTraining = true;
            outNLPFeatures =
                    new BufferedWriter(
                            new OutputStreamWriter(
                                    new FileOutputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                    "UTF-8"));
            for(int i = 0; i < numDoc; ++i) {
              Document toProcess = corpus.get(i);
              lightWeightApi.annotations2NLPFeatures(toProcess, i,
                      outNLPFeatures, isTraining, learningSettings);
              if(toProcess.getDataStore() != null
                      && corpus.getDataStore() != null)
                Factory.deleteResource(toProcess);
            }
            outNLPFeatures.flush();
            outNLPFeatures.close();
            lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
                    learningSettings);
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                    + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                    benchmarkingFeatures);
            if(LogService.minVerbosityLevel >= LogService.DEBUG) {
              tm2 = new Date().getTime();
              tm3 = tm2 - tm1;
              tm3 /= 1000;
              System.out.println("time for NLP features: " + tm3);
            }
            /** Open the normal NLP feature file. */
            inNLPFeatures =
                    new BomStrippingInputStreamReader(
                                    new FileInputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                    "UTF-8");
            outFeatureVectors =
                    new BufferedWriter(
                            new OutputStreamWriter(
                                    new FileOutputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFFeatureVectorData)),
                                    "UTF-8"));
            startTime = Benchmark.startPoint();
            lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
                    outFeatureVectors, numDoc, isTraining, learningSettings);
            inNLPFeatures.close();
            outFeatureVectors.flush();
            outFeatureVectors.close();
            Benchmark
                    .checkPoint(startTime, getBenchmarkId() + "."
                            + Benchmark.NLP_FEATURES_TO_FVS, this,
                            benchmarkingFeatures);
            if(LogService.minVerbosityLevel >= LogService.DEBUG) {
              tm1 = new Date().getTime();
              tm3 = tm1 - tm2;
              tm3 /= 1000;
              System.out.println("time for fv: " + tm3);
            }
            // if fitering the training data
            if(learningSettings.fiteringTrainingData
                    && learningSettings.filteringRatio > 0.0) {
              startTime = Benchmark.startPoint();
              lightWeightApi.FilteringNegativeInstsInJava(corpus.size(),
                      learningSettings);
              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                      + Benchmark.FILTERING, this, benchmarkingFeatures);
            }
            if(LogService.minVerbosityLevel >= LogService.DEBUG) {
              tm2 = new Date().getTime();
              tm3 = tm2 - tm1;
              tm3 /= 1000;
              System.out.println("time for filtering: " + tm3);
            }
            startTime = Benchmark.startPoint();
            // using the java code for training
            lightWeightApi.trainingJava(corpus.size(), learningSettings);
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                    + Benchmark.MODEL_TRAINING, this, benchmarkingFeatures);
            benchmarkingFeatures.remove("numDocs");
            if(LogService.minVerbosityLevel >= LogService.DEBUG) {
              tm1 = new Date().getTime();
              tm3 = tm1 - tm2;
              tm3 /= 1000;
              System.out.println("time for NLP training: " + tm3);
            }
            break;
          case APPLICATION:
            // first checking if the model file is available or not
            String modelFileName =
                    wdResults.toString() + File.separator
                            + ConstantParameters.FILENAMEOFModels;
            if(!new File(modelFileName).exists()) {
              throw new ExecutionException("The model is not available at " + 
                  modelFileName + "!");
            }
            if(endDocIdApp > startDocIdApp) {
              if(LogService.minVerbosityLevel > LogService.MINIMUM)
                System.out.println("** "
                        + "Application mode for document from " + startDocIdApp
                        + " to " + endDocIdApp + "(not included):");
              LogService.logMessage("** Application mode for document from "
                      + startDocIdApp + " to " + endDocIdApp
                      + "(not included):", 1);
              isTraining = false;
              String classTypeOriginal =
                      learningSettings.datasetDefinition.getClassAttribute()
                              .getType();
              outNLPFeatures =
                      new BufferedWriter(
                              new OutputStreamWriter(
                                      new FileOutputStream(
                                              new File(
                                                      getApplicationTempDir(),
                                                      ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                      "UTF-8"));
              numDoc = endDocIdApp - startDocIdApp;
              benchmarkingFeatures.put("numDocs", "" + numDoc);
              startTime = Benchmark.startPoint();
              for(int i = startDocIdApp; i < endDocIdApp; ++i) {
                boolean wasLoaded = corpus.isDocumentLoaded(i);
                Document toProcess = corpus.get(i);
                lightWeightApi.annotations2NLPFeatures(toProcess, i
                        - startDocIdApp, outNLPFeatures, isTraining,
                        learningSettings);
                if(toProcess.getDataStore() != null
                        && corpus.getDataStore() != null) {// (isDatastore)
                  corpus.getDataStore().sync(corpus);
                }
                if(!wasLoaded) {
                  corpus.unloadDocument(toProcess);
                  Factory.deleteResource(toProcess);
                }
              }
              outNLPFeatures.flush();
              outNLPFeatures.close();
              lightWeightApi.finishFVs(getApplicationTempDir(), numDoc,
                      isTraining, learningSettings);
              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                      + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                      benchmarkingFeatures);
              /** Open the normal NLP feature file. */
              inNLPFeatures =
                      new BomStrippingInputStreamReader(
                                      new FileInputStream(
                                              new File(
                                                      getApplicationTempDir(),
                                                      ConstantParameters.FILENAMEOFNLPFeaturesData)),
                                      "UTF-8");
              outFeatureVectors =
                      new BufferedWriter(
                              new OutputStreamWriter(
                                      new FileOutputStream(
                                              new File(
                                                      getApplicationTempDir(),
                                                      ConstantParameters.FILENAMEOFFeatureVectorDataApp)),
                                      "UTF-8"));
              startTime = Benchmark.startPoint();
              lightWeightApi.nlpfeatures2FVs(getApplicationTempDir(),
                      inNLPFeatures, outFeatureVectors, numDoc, isTraining,
                      learningSettings);
              inNLPFeatures.close();
              outFeatureVectors.flush();
              outFeatureVectors.close();
              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                      + Benchmark.NLP_FEATURES_TO_FVS, this,
                      benchmarkingFeatures);
              // Applying th model
              startTime = Benchmark.startPoint();
              lightWeightApi.applyModelInJava(corpus, startDocIdApp,
                      endDocIdApp, classTypeOriginal, learningSettings,
                      getApplicationTempDir());
              Benchmark
                      .checkPoint(startTime, getBenchmarkId() + "."
                              + Benchmark.MODEL_APPLICATION, this,
                              benchmarkingFeatures);
              benchmarkingFeatures.remove("numDocs");
              // Update the datastore for the added annotations
            }
            break;
          case EVALUATION:
            if(LogService.minVerbosityLevel > LogService.MINIMUM) {
              System.out.println("** Evaluation mode started:");
            }
            LogService.logMessage("** Evaluation mode:", 1);
            evaluation =
                    new EvaluationBasedOnDocs(corpus, wdResults, inputASName);
            benchmarkingFeatures.put("numDocs", corpus.size());
            startTime = Benchmark.startPoint();
            String date_time_started = dateTimeStampFormat.format(startTime);
            evaluation.evaluation(learningSettings, lightWeightApi);
            // Save the current learning settings and the evaluation result
            // to an XML file in the same directory as the configuration file.
            // The name is made up from the name of the config file, the
            // value of EXPERIMENT-ID in the config file and the date and 
            // time of the evaluation run.
            if(getRunProtocolDir() != null) {
              //File configDir = learningSettings.configFile.getParentFile();
              String fileNameBase = 
                      learningSettings.configFile.getName().replaceAll("\\.xml$","");
              String date_time_ended = dateTimeStampFormat.format(new Date());
              String fileName = fileNameBase + 
                ( learningSettings.experimentId.isEmpty() ? 
                  "" : 
                  "_" + learningSettings.experimentId) + "_" +
                "evaluation_" + date_time_ended + ".xml";
              File outFile = new File(Files.fileFromURL(getRunProtocolDir()),fileName);
              FileWriter fow;
              PrintWriter log = null;
              try {
                fow = new FileWriter(outFile);
                log = new PrintWriter(fow);
              } catch (Exception ex) {
                System.err.println("Got an exception creating the writers: "+ex);
                ex.printStackTrace(System.err);
              } 
              if(log != null) {
                // TODO: we probably should do this with jdom methods instead ...
                log.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
                log.println("<ML-RUN>");
                log.println("<learningMode>EVALUATION</learningMode>");
                log.println("<corpus><![CDATA["+corpus.getName()+"]]></corpus>");
                log.println("<date-loaded>"+date_time_loaded.substring(0,8)+"</date-loaded>"+
                            "<time-loaded>"+date_time_loaded.substring(9)+"</time-loaded>");
                log.println("<date-started>"+date_time_started.substring(0,8)+"</date-started>"+
                            "<time-started>"+date_time_started.substring(9)+"</time-started>");
                log.println("<date-ended>"+date_time_ended.substring(0,8)+"</date-ended>"+
                            "<time-ended>"+date_time_ended.substring(9)+"</time-ended>");
                log.println("<overall>");
                log.println("  <correct>"+evaluation.macroMeasuresOfResults.correct+"</correct>");
                log.println("  <partialCorrect>"+evaluation.macroMeasuresOfResults.partialCor+"</partialCorrect>");
                log.println("  <spurious>"+evaluation.macroMeasuresOfResults.spurious+"</spurious>");
                log.println("  <missing>"+evaluation.macroMeasuresOfResults.missing+"</missing>");
                log.println("  <precision>"+evaluation.macroMeasuresOfResults.precision+"</precision>");
                log.println("  <recall>"+evaluation.macroMeasuresOfResults.recall+"</recall>");
                log.println("  <f1>"+evaluation.macroMeasuresOfResults.f1+"</f1>");
                log.println("  <precisionLenient>"+evaluation.macroMeasuresOfResults.precisionLenient+"</precisionLenient>");
                log.println("  <recallLenient>"+evaluation.macroMeasuresOfResults.recallLenient+"</recallLenient>");
                log.println("  <f1Lenient>"+evaluation.macroMeasuresOfResults.f1Lenient+"</f1Lenient>");
                log.println("</overall>");
                log.println("<per-run>");
                for(int i=0;i<learningSettings.evaluationconfig.kk;i++) {
                  log.println("  <run>");                  
                  log.println("    <run-nr>"+i+"</run-nr>");
                  EvaluationMeasuresComputation measuresPerRun = 
                          evaluation.macroMeasuresOfResultsPerRun.get(i);
                  log.println("    <correct>"+measuresPerRun.correct+"</correct>");
                  log.println("    <partialCorrect>"+measuresPerRun.partialCor+"</partialCorrect>");
                  log.println("    <spurious>"+measuresPerRun.spurious+"</spurious>");
                  log.println("    <missing>"+measuresPerRun.missing+"</missing>");
                  log.println("    <precision>"+measuresPerRun.precision+"</precision>");
                  log.println("    <recall>"+measuresPerRun.recall+"</recall>");
                  log.println("    <f1>"+measuresPerRun.f1+"</f1>");
                  log.println("    <precisionLenient>"+measuresPerRun.precisionLenient+"</precisionLenient>");
                  log.println("    <recallLenient>"+measuresPerRun.recallLenient+"</recallLenient>");
                  log.println("    <f1Lenient>"+measuresPerRun.f1Lenient+"</f1Lenient>");                  
                  log.println("  </run>");
                }
                log.println("</per-run>");
                log.println("<per-class>");
                Map<String,EvaluationMeasuresComputation> perLabel = evaluation.getMeasuresForEachLabel();
                for(String label : perLabel.keySet()) {
                  EvaluationMeasuresComputation oneLabel = perLabel.get(label);
                  log.println("  <class>");
                  log.println("    <class-label><![CDATA["+label+"]]></class-label>");                  
                  log.println("    <correct>"+oneLabel.correct+"</correct>");
                  log.println("    <partialCorrect>"+oneLabel.partialCor+"</partialCorrect>");
                  log.println("    <spurious>"+oneLabel.spurious+"</spurious>");
                  log.println("    <missing>"+oneLabel.missing+"</missing>");
                  log.println("    <precision>"+oneLabel.precision+"</precision>");
                  log.println("    <recall>"+oneLabel.recall+"</recall>");
                  log.println("    <f1>"+oneLabel.f1+"</f1>");
                  log.println("    <precisionLenient>"+oneLabel.precisionLenient+"</precisionLenient>");
                  log.println("    <recallLenient>"+oneLabel.recallLenient+"</recallLenient>");
                  log.println("    <f1Lenient>"+oneLabel.f1Lenient+"</f1Lenient>");
                  log.println("  </class>");
                }
                log.println("</per-class>");
                Format outputFormat = Format.getPrettyFormat();
                outputFormat.setLineSeparator("\n");
                outputFormat.setEncoding("UTF-8");
                String jdomDocString = new XMLOutputter().outputString(learningSettings.jdomDocSaved.getRootElement());
                log.println(jdomDocString);
                log.println("</ML-RUN>");
              } else {
                System.err.println("No writer to write to!");
              }
              log.close();
            }
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                    + Benchmark.EVALUATION, this, benchmarkingFeatures);
            benchmarkingFeatures.remove("numDocs");
            break;
          case MITRAINING:
            if(LogService.minVerbosityLevel > LogService.MINIMUM)
              System.out.println("** MITRAINING mode:");
            LogService.logMessage("** MITRAINING mode:", 1);
            isTraining = true;
            benchmarkingFeatures.put("numDocs", "" + numDoc);
            startTime = Benchmark.startPoint();
            /**
             * Need to write the NLP features into a temporary file, then copy
             * it into the NLP file.
             */
            BufferedWriter outNLPFeaturesTemp =
                    new BufferedWriter(
                            new OutputStreamWriter(
                                    new FileOutputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)),
                                    "UTF-8"));
            for(int i = 0; i < numDoc; ++i) {
              lightWeightApi.annotations2NLPFeatures(corpus.get(i),
                      i, outNLPFeaturesTemp, isTraining, learningSettings);
            }
            outNLPFeaturesTemp.flush();
            outNLPFeaturesTemp.close();
            lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
                    learningSettings);
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                    + Benchmark.ANNOTS_TO_NLP_FEATURES, this,
                    benchmarkingFeatures);
            lightWeightApi.copyNLPFeat2NormalFile(wdResults,
                    miLearningInfor.miNumDocsTraining);
            /**
             * Use the temp NLP feature file instead of the normal one for
             * MI-training.
             */
            inNLPFeatures =
                    new BomStrippingInputStreamReader(
                                    new FileInputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)),
                                    "UTF-8");
            outFeatureVectors =
                    new BufferedWriter(
                            new OutputStreamWriter(
                                    new FileOutputStream(
                                            new File(
                                                    wdResults,
                                                    ConstantParameters.FILENAMEOFFeatureVectorData),
                                            true), "UTF-8"));
            startTime = Benchmark.startPoint();
            lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
                    outFeatureVectors, numDoc, isTraining, learningSettings);
            inNLPFeatures.close();
            outFeatureVectors.flush();
            outFeatureVectors.close();
            Benchmark
                    .checkPoint(startTime, getBenchmarkId() + "."
                            + Benchmark.NLP_FEATURES_TO_FVS, this,
                            benchmarkingFeatures);
            System.gc(); // to make effort to delete the files.
            miLearningInfor.miNumDocsTraining += numDoc;
            miLearningInfor.miNumDocsFromLast += numDoc;
            if(miLearningInfor.miNumDocsFromLast >= learningSettings.miDocInterval) {
              // Start learning
              // if fitering the training data
              if(learningSettings.fiteringTrainingData
                      && learningSettings.filteringRatio > 0.0) {
                benchmarkingFeatures.put("numDocs",
                        miLearningInfor.miNumDocsTraining + "");
                startTime = Benchmark.startPoint();
                lightWeightApi.FilteringNegativeInstsInJava(
                        miLearningInfor.miNumDocsTraining, learningSettings);
                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                        + Benchmark.FILTERING, this, benchmarkingFeatures);
              }
              startTime = Benchmark.startPoint();
              // using the java code for training
              lightWeightApi.trainingJava(miLearningInfor.miNumDocsTraining,
                      learningSettings);
              Benchmark.checkPoint(startTime, getBenchmarkId() + "." + "."
                      + Benchmark.MODEL_TRAINING, this, benchmarkingFeatures);
              benchmarkingFeatures.remove("numDocs");
              // Reset the num from last training as 0
              miLearningInfor.miNumDocsFromLast = 0;
            }
            File miLeFile =
                    new File(wdResults,
                            ConstantParameters.FILENAMEOFMILearningInfor);
            miLearningInfor.writeDataIntoFile(miLeFile);
            break;
          default:
            throw new GateException("The learning mode is not defined!");
        }
        LogService.logMessage("This learning session finished!.", 1);
        // LogService.close();
      } catch(IOException e) {
        e.printStackTrace();
      } catch(GateException e) {
        e.printStackTrace();
      }
      // reset the parentBenchmarkID
      if(oldLightWeightApiParentId != null) {
        lightWeightApi.setParentBenchmarkId(oldLightWeightApiParentId);
      }
      if(LogService.minVerbosityLevel > 0)
        System.out.println("This learning session finished!");
    } // end of learning (position=corpus.size()-1)
  }

  /** Print out the information for featureData only option. */
  private void displayDataFilesInformation() {
    StringBuffer logMessage = new StringBuffer();
    logMessage.append("NLP features for all the documents are in the file"
            + wdResults.getAbsolutePath() + File.separator
            + ConstantParameters.FILENAMEOFNLPFeaturesData + "\n");
    logMessage.append("Feature vectors in sparse format are in the file"
            + wdResults.getAbsolutePath() + File.separator
            + ConstantParameters.FILENAMEOFFeatureVectorData + "\n");
    logMessage.append("Label list is in the file" + wdResults.getAbsolutePath()
            + File.separator + ConstantParameters.FILENAMEOFLabelList + "\n");
    logMessage.append("NLP features list is in the file"
            + wdResults.getAbsolutePath() + File.separator
            + ConstantParameters.FILENAMEOFNLPFeatureList + "\n");
    logMessage
            .append("The statistics of entity length for each class is in the file"
                    + wdResults.getAbsolutePath()
                    + File.separator
                    + ConstantParameters.FILENAMEOFChunkLenStats + "\n");
    System.out.println(logMessage.toString());
    LogService.logMessage(logMessage.toString(), 1);
  }

  /**
   * Determine the directory used to store temporary files when running in
   * APPLICATION mode.
   */
  protected File getApplicationTempDir() {
    if(applicationTempDir == null) {
      LogService.logMessage(
              "Creating temp directory for application-mode files", 1);
      try {
        applicationTempDir = File.createTempFile("appl", ".tmp", wdResults);
        applicationTempDir.delete();
        if(!applicationTempDir.mkdir()) { throw new IOException(
                "Error creating directory " + applicationTempDir); }
      } catch(IOException ioe) {
        LogService.logMessage("Could not create temporary directory for "
                + "application-mode temp files, using " + wdResults, 1);
        applicationTempDir = wdResults;
      }
    }
    return applicationTempDir;
  }

  /**
   * Delete the temporary directory for application-mode temp files when this
   * resource is deleted.
   */
  public void cleanup() {
    if(applicationTempDir != null && !applicationTempDir.equals(wdResults)) {
      deleteRecursively(applicationTempDir);
      applicationTempDir = null;
    }
  }

  /**
   * Delete a file or directory. If the argument is a directory, delete its
   * contents first, then remove the directory itself.
   */
  private void deleteRecursively(File fileOrDir) {
    if(fileOrDir.isDirectory()) {
      for(File f : fileOrDir.listFiles()) {
        deleteRecursively(f);
      }
    }
    if(!fileOrDir.delete()) {
      LogService.logMessage("Couldn't delete "
              + (fileOrDir.isDirectory() ? "directory " : "file ") + fileOrDir,
              1);
    }
  }

  public void setConfigFileURL(URL workingDirectory) {
    this.configFileURL = workingDirectory;
  }

  public URL getConfigFileURL() {
    return this.configFileURL;
  }
  
  public void setInputASName(String iasn) {
    this.inputASName = iasn;
  }

  public String getInputASName() {
    return this.inputASName;
  }

  public void setOutputASName(String iasn) {
    this.outputASName = iasn;
  }

  public String getOutputASName() {
    return this.outputASName;
  }

  public RunMode getLearningMode() {
    return this.learningMode;
  }

  public void setLearningMode(RunMode learningM) {
    this.learningMode = learningM;
  }

  public EvaluationBasedOnDocs getEvaluation() {
    return evaluation;
  }

  public EvaluationBasedOnDocs setEvaluation(EvaluationBasedOnDocs eval) {
    return this.evaluation = eval;
  }

  public void setRunProtocolDir(URL dir) {    
    if(dir != null && !dir.getProtocol().startsWith("file")) {
      throw new GateRuntimeException("runProtocolDir mut be a file URL");
    }
    runProtocolDir = dir;
  }
  public URL getRunProtocolDir() {
    return runProtocolDir;
  }
  private URL runProtocolDir = null;
  
  // /////// Benchmarkable ////////////////
  private String parentBenchmarkID;

  private String benchmarkID;

  /**
   * Returns the benchmark ID of the parent of this resource.
   *
   * @return
   */
  public String getParentBenchmarkId() {
    return this.parentBenchmarkID;
  }

  /**
   * Returns the benchmark ID of this resource.
   *
   * @return
   */
  public String getBenchmarkId() {
    if(this.benchmarkID == null) {
      benchmarkID = getName().replaceAll("[ ]+", "_");
      ;
    }
    return this.benchmarkID;
  }

  /**
   * Given an ID of the parent resource, this method is responsible for
   * producing the Benchmark ID, unique to this resource.
   *
   * @param parentID
   */
  public void createBenchmarkId(String parentID) {
    parentBenchmarkID = parentID;
    benchmarkID = Benchmark.createBenchmarkId(getName(), parentID);
  }

  /**
   * This method sets the benchmarkID for this resource.
   *
   * @param benchmarkID
   */
  public void setParentBenchmarkId(String benchmarkID) {
    parentBenchmarkID = benchmarkID;
  }

  /**
   * Returns the logger object being used by this resource.
   *
   * @return
   */
  public Logger getLogger() {
    return Benchmark.logger;
  }

  public void setBenchmarkId(String arg0) {
    // stub
  }
}