GATE.ac.uk - releases/gate-5.1-beta2-build3402-ALL/plugins/Learning/src/gate/learning/LearningAPIMain.java

/*
 *  LearningAPIMain.java
 * 
 *  Yaoyong Li 22/03/2007
 *
 *  $Id: LearningAPIMain.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
 */
package gate.learning;

import gate.Document;
import gate.Factory;
import gate.ProcessingResource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.Benchmark;
import gate.util.Benchmarkable;
import gate.util.GateException;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Date;
import java.util.HashMap;

import org.apache.log4j.Logger;

/**
 * The main object of the ML Api. It does initialiation, read parameter values
 * from GUI, and run the selected learning mode. It can also be called by java
 * code, as an API (an GATE class), for using this learning api.
 */
@SuppressWarnings("serial")
public class LearningAPIMain extends AbstractLanguageAnalyser
                                                             implements
                                                             ProcessingResource,
                                                             Benchmarkable {
  /** This is where the model(s) should be saved */
  private URL configFileURL;
  /**
   * Name of the AnnotationSet contains annotations specified in the DATASET
   * element of configuration file.
   */
  private String inputASName;
  /**
   * The annotationSet for the resulting annotations by application of models.
   */
  private String outputASName;
  /**
   * Run-time parameter learningMode, having three modes: training, application,
   * and evaluation.
   */
  private RunMode learningMode;
  private RunMode learningModeAppl;
  private RunMode learningModeMiTraining;
  private RunMode learningModeVIEWSVMMODEL;
  private RunMode learningModeSelectingDocs;
  /** Learning settings specified in the configuration file. */
  private LearningEngineSettings learningSettings;
  /**
   * The lightweight learning object for getting the features, training and
   * application.
   */
  LightWeightLearningApi lightWeightApi = null;
  /** The File for NLP learning Log. */
  private File logFile;
  /** Used by lightWeightApi, specifying training or application. */
  private boolean isTraining;
  /** Subdirectory for storing the data file produced by learning api. */
  private File wdResults = null;
  /** Doing evaluation. */
  private EvaluationBasedOnDocs evaluation;
  /** The MI learning information object. */
  MiLearningInformation miLearningInfor = null;
  /** The three counters for batch application. */
  int startDocIdApp;
  int endDocIdApp;
  int maxNumApp;

  /** Trivial constructor. */
  public LearningAPIMain() {
    // do nothing
  }

  // featureMap that is used for exporting log messages
  protected java.util.Map benchmarkingFeatures = new HashMap();

  /** Initialise this resource, and return it. */
  public gate.Resource init() throws ResourceInstantiationException {
    fireStatusChanged("Checking and reading learning settings!");
    // here all parameters are needs to be checked
    // check for the model storage directory
    if(configFileURL == null)
      throw new ResourceInstantiationException(
        "WorkingDirectory is required to store the learned model and cannot be null");
    // it is not null, check it is a file: URL
    if(!"file".equals(configFileURL.getProtocol())) { throw new ResourceInstantiationException(
      "WorkingDirectory must be a file: URL"); }

    // Get the working directory which the configuration
    // file reside in.
    File wd = null;
    try {
      wd = new File(configFileURL.toURI()).getParentFile();
    }
    catch(URISyntaxException use) {
      wd = new File(configFileURL.getFile()).getParentFile();
    }

    // it must be a directory
    if(!wd.isDirectory()) { throw new ResourceInstantiationException(wd
      + " must be a reference to directory"); }

    if(LogService.minVerbosityLevel > 0)
      System.out.println("Configuration File=" + configFileURL.toString());

    try {

      if(!new File(configFileURL.toURI()).exists()) {

      throw new ResourceInstantiationException(
        "Error: the configuration file specified does not exist!!"); }

    }
    catch(URISyntaxException e1) {
      e1.printStackTrace();
      throw new ResourceInstantiationException(e1);
    }

    miLearningInfor = new MiLearningInformation();
    try {
      // Load the learning setting file
      // by reading the configuration file
      learningSettings =
        LearningEngineSettings.loadLearningSettingsFromFile(configFileURL);
    }
    catch(Exception e) {
      throw new ResourceInstantiationException(e);
    }

    try {
      // Creat the sub-directory of the workingdirectroy where the data
      // files will be stored in
      if(LogService.minVerbosityLevel > 0) {
        System.out.println("\n\n*************************");
        System.out.println("A new session for NLP learning is starting.\n");
      }

      wdResults =
        new File(wd, gate.learning.ConstantParameters.SUBDIRFORRESULTS);
      wdResults.mkdir();

      logFile =
        new File(new File(wd, ConstantParameters.SUBDIRFORRESULTS),
          ConstantParameters.FILENAMEOFLOGFILE);
      LogService.init(logFile, true, learningSettings.verbosityLogService);

      StringBuffer logMessage = new StringBuffer();
      logMessage.append("\n\n*************************\n");
      logMessage.append("A new session for NLP learning is starting.\n");

      // adding WorkingDirectory parameter in the benchmarkingFeatures
      benchmarkingFeatures.put("workingDirectory", wd.getAbsolutePath());

      logMessage.append("The initiliased time of NLP learning: "
        + new Date().toString() + "\n");
      logMessage.append("Working directory: " + wd.getAbsolutePath() + "\n");
      logMessage.append("The feature files and models are saved at: "
        + wdResults.getAbsolutePath() + "\n");

      // Call the lightWeightLearningApi
      lightWeightApi = new LightWeightLearningApi(wd);

      // more initialisation
      lightWeightApi.furtherInit(wdResults, learningSettings);

      // adding WorkingDirectory parameter in the benchmarkingFeatures
      // benchmarkingFeatures.put("LearnerName",
      // learningSettings.learnerSettings.getLearnerName());
      // benchmarkingFeatures.put("LearnerNickName",
      // learningSettings.learnerSettings.getLearnerNickName());
      // benchmarkingFeatures.put("SurroundMode", learningSettings.surround);

      logMessage.append("Learner name: "
        + learningSettings.learnerSettings.getLearnerName() + "\n");
      logMessage.append("Learner nick name: "
        + learningSettings.learnerSettings.getLearnerNickName() + "\n");
      logMessage.append("Learner parameter settings: "
        + learningSettings.learnerSettings.learnerName + "\n");
      logMessage.append("Surroud mode (or chunk learning): "
        + learningSettings.surround);
      LogService.logMessage(logMessage.toString(), 1);
      //LogService.close();
    }
    catch(Exception e) {
      throw new ResourceInstantiationException(e);
    }
    learningModeAppl = RunMode.APPLICATION;
    maxNumApp = learningSettings.docNumIntevalApp;
    learningModeMiTraining = RunMode.MITRAINING;
    learningModeVIEWSVMMODEL = RunMode.VIEWPRIMALFORMMODELS;
    learningModeSelectingDocs = RunMode.RankingDocsForAL;
    fireProcessFinished();
    return this;
  } // init()

  /**
   * Run the resource.
   * 
   * @throws ExecutionException
   */
  public void execute() throws ExecutionException {

    // mode in which the PR is executed
    benchmarkingFeatures.put("learningMode", learningMode);

    if(learningMode.equals(learningModeVIEWSVMMODEL)) {
      if(corpus == null || corpus.size() == 0 || corpus.indexOf(document) == 0)
        lightWeightApi.viewSVMmodelsInNLPFeatures(new File(wdResults,
          ConstantParameters.FILENAMEOFModels), learningSettings);
      return;
    }
    if(learningMode.equals(learningModeSelectingDocs)) {
      // for ordering and selecting the documents for ative learning
      if(corpus == null || corpus.size() == 0 || corpus.indexOf(document) == 0) {
        // ranking the documents
        lightWeightApi.orderDocsWithModels(wdResults, learningSettings);
        // selecting the document
        // lightWeightApi.selectDocForAL()
      }
      return;
    }

    // now we need to see if the corpus is provided
    if(corpus == null)
      throw new ExecutionException("Provided corpus is null!");

    if(corpus.size() == 0)
      throw new ExecutionException("No Document found in corpus!");

    // set benchmark ID on the lightWeightApi
    String oldLightWeightApiParentId = null;
    if(lightWeightApi instanceof Benchmarkable) {
      oldLightWeightApiParentId = lightWeightApi.getParentBenchmarkId();
      lightWeightApi.createBenchmarkId(getBenchmarkId());
    }

    // first, get the NLP features from the documents, according to the
    // feature types specified in DataSetDefinition file
    int positionDoc = corpus.indexOf(document);

    // first document in the corpus
    if(positionDoc == 0) {
      lightWeightApi.inputASName = inputASName;
      lightWeightApi.outputASName = outputASName;

      /** Obtain the MI learning information of the last time learning. */
      if(learningMode.equals(this.learningModeMiTraining)) {
        miLearningInfor = new MiLearningInformation();
        File miLeFile =
          new File(wdResults, ConstantParameters.FILENAMEOFMILearningInfor);

        long startTime = Benchmark.startPoint();
        benchmarkingFeatures.put("miLearningInformationFile", miLeFile
          .getAbsolutePath());

        miLearningInfor.readDataFromFile(miLeFile);

        Benchmark.checkPoint(startTime, getBenchmarkId() + "."
          + Benchmark.READING_LEARNING_INFO, this, benchmarkingFeatures);
        benchmarkingFeatures.remove("miLearningInformationFile");
      }

      /** Set the information for batch application. */
      startDocIdApp = 0;
      endDocIdApp = 0;

      if(LogService.minVerbosityLevel > 0)
        System.out.println("Pre-processing the " + corpus.size()
          + " documents...");
      try {
        // PrintWriter logFileIn = new PrintWriter(new FileWriter(logFile,
        // true));
        LogService.init(logFile, true, learningSettings.verbosityLogService);
        LogService.logMessage("\n*** A new run starts.", 1);
        LogService.logMessage(
          "\nThe execution time (pre-processing the first document): "
            + new Date().toString(), 1);
        if(LogService.minVerbosityLevel > 0) {
          System.out.println("Learning starts.");
          System.out
            .println("For the information about this learning see the log file "
              + wdResults.getAbsolutePath()
              + File.separator
              + ConstantParameters.FILENAMEOFLOGFILE);
          System.out.println("The number of threads used is "+learningSettings.numThreadUsed);
        }
        //LogService.close();
        // logFileIn.println("EvaluationMode: " + evaluationMode);
        // logFileIn.println("TrainingMode: " + trainingMode);
        // logFileIn.println("InputAS: " + inputASName);
      }
      catch(IOException e) {
        e.printStackTrace();
      }
    }

    // Apply the model to a bunch of documents
    if(learningMode.equals(learningModeAppl)) {
      ++endDocIdApp;
      if(endDocIdApp - startDocIdApp == maxNumApp) {
        try {
          // first checking if the model file is available or not
          String modelFileName =
            wdResults.toString() + File.separator
              + ConstantParameters.FILENAMEOFModels;
          if(!new File(modelFileName).exists()) {
            System.out
              .println("Warning: the model is not available at the moment!!");
            return;
          }
          BufferedWriter outNLPFeatures = null;
          BufferedReader inNLPFeatures = null;
          BufferedWriter outFeatureVectors = null;
          // EvaluationBasedOnDocs.emptyDatafile(wdResults, false);
          if(LogService.minVerbosityLevel > 0)
            System.out.println("** " + "Application mode for document from "
              + startDocIdApp + " to " + endDocIdApp + "(not included):");

          LogService.logMessage("** Application mode for document from "
            + startDocIdApp + " to " + endDocIdApp + "(not included):", 1);
          isTraining = false;
          String classTypeOriginal =
            learningSettings.datasetDefinition.getClassAttribute().getType();

          outNLPFeatures =
            new BufferedWriter(new OutputStreamWriter(
              new FileOutputStream(new File(wdResults,
                ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
          int numDoc;
          numDoc = endDocIdApp - startDocIdApp;

          long startTime = Benchmark.startPoint();
          benchmarkingFeatures.put("numDocs", "" + numDoc);

          for(int i = startDocIdApp; i < endDocIdApp; ++i) {
            Document toProcess = (Document)corpus.get(i);
            lightWeightApi.annotations2NLPFeatures(toProcess,
              i - startDocIdApp, outNLPFeatures, isTraining, learningSettings);
            if(toProcess.getDataStore() != null
              && corpus.getDataStore() != null) {// (isDatastore)
              corpus.getDataStore().sync(corpus);
              Factory.deleteResource(toProcess);
            }
          }
          outNLPFeatures.flush();
          outNLPFeatures.close();

          lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
            learningSettings);

          Benchmark.checkPoint(startTime, getBenchmarkId() + "."
            + Benchmark.ANNOTS_TO_NLP_FEATURES, this, benchmarkingFeatures);

          startTime = Benchmark.startPoint();

          /** Open the normal NLP feature file. */
          inNLPFeatures =
            new BufferedReader(new InputStreamReader(
              new FileInputStream(new File(wdResults,
                ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
          outFeatureVectors =
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
              new File(wdResults,
                ConstantParameters.FILENAMEOFFeatureVectorDataApp)), "UTF-8"));

          lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
            outFeatureVectors, numDoc, isTraining, learningSettings);
          inNLPFeatures.close();
          outFeatureVectors.flush();
          outFeatureVectors.close();

          Benchmark.checkPoint(startTime, getBenchmarkId() + "."
            + Benchmark.NLP_FEATURES_TO_FVS, this, benchmarkingFeatures);

          // Applying th model
          String fvFileName =
            wdResults.toString() + File.separator
              + ConstantParameters.FILENAMEOFFeatureVectorDataApp;

          startTime = Benchmark.startPoint();

          lightWeightApi.applyModelInJava(corpus, startDocIdApp, endDocIdApp,
            classTypeOriginal, learningSettings, fvFileName);

          Benchmark.checkPoint(startTime, getBenchmarkId() + "."
            + Benchmark.MODEL_APPLICATION, this, benchmarkingFeatures);
          benchmarkingFeatures.remove("numDocs");

          startDocIdApp = endDocIdApp;
        }
        catch(IOException e) {
          e.printStackTrace();
        }
        catch(GateException e) {
          e.printStackTrace();
        }
      }
    }

    // we've reached the last document
    if(positionDoc == corpus.size() - 1) {
      // first select the training data and test data according to the
      // learning setting
      // set the inputASName in here, because it is a runtime parameter
      int numDoc = corpus.size();

      try {
        LogService.init(logFile, true, learningSettings.verbosityLogService);
        LogService.logMessage("The learning start at " + new Date().toString(),
          1);
        LogService.logMessage("The number of documents in dataset: " + numDoc,
          1);
        // Open the NLP feature file for storing the NLP feature vectors
        BufferedWriter outNLPFeatures = null;
        BufferedReader inNLPFeatures = null;
        BufferedWriter outFeatureVectors = null;
        // if only need the feature data
        switch(learningMode){
          case ProduceFeatureFilesOnly:
            // if only want feature data
            EvaluationBasedOnDocs.emptyDatafile(wdResults, true);
            if(LogService.minVerbosityLevel > 0)
              System.out.println("** Producing the feature files only!");
            LogService.logMessage("** Producing the feature files only!", 1);

            long startTime = Benchmark.startPoint();
            benchmarkingFeatures.put("numDocs", numDoc);

            isTraining = true;
            outNLPFeatures =
              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
            for(int i = 0; i < numDoc; ++i) {
              Document toProcess = (Document)corpus.get(i);
              lightWeightApi.annotations2NLPFeatures(toProcess, i,
                outNLPFeatures, isTraining, learningSettings);
              if(toProcess.getDataStore() != null && corpus.getDataStore() != null)
                Factory.deleteResource(toProcess);
            }
            outNLPFeatures.flush();
            outNLPFeatures.close();

            lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
              learningSettings);
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.ANNOTS_TO_NLP_FEATURES, this, benchmarkingFeatures);

            /** Open the normal NLP feature file. */
            inNLPFeatures =
              new BufferedReader(new InputStreamReader(new FileInputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
            outFeatureVectors =
              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFFeatureVectorData)), "UTF-8"));

            startTime = Benchmark.startPoint();

            lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
              outFeatureVectors, numDoc, isTraining, learningSettings);
            inNLPFeatures.close();
            outFeatureVectors.flush();
            outFeatureVectors.close();
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.NLP_FEATURES_TO_FVS, this, benchmarkingFeatures);

            // produce the ngram language model from feature list
            if(LogService.minVerbosityLevel > 0)
              System.out
                .println("Write the language model in N-grams into the file "
                  + ConstantParameters.FILENAMEOFNgramLM + "!");
            LogService.logMessage(
              "Write the language model in N-grams into the file "
                + ConstantParameters.FILENAMEOFNgramLM + "!", 1);

            if(learningSettings.datasetDefinition.getNgrams().size() >= 1) {

              startTime = Benchmark.startPoint();

              lightWeightApi.featureList2LM(wdResults,
                ((Ngram)learningSettings.datasetDefinition.getNgrams().get(0))
                  .getNumber());

              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                + Benchmark.WRITING_NGRAM_MODEL, this, benchmarkingFeatures);

              // produce the term-frequency matrix
              if(LogService.minVerbosityLevel > 0)
                System.out
                  .println("Write the term-document statistics into the file "
                    + ConstantParameters.FILENAMEOFTermFreqMatrix + "!");
              LogService.logMessage(
                "Write the term-document statistics into the file "
                  + ConstantParameters.FILENAMEOFTermFreqMatrix + "!", 1);

              startTime = Benchmark.startPoint();

              lightWeightApi.termfrequenceMatrix(wdResults, numDoc);

              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                + Benchmark.TERM_DOC_STATS, this, benchmarkingFeatures);

            }
            else {
              System.out
                .println("!! Warning: cannot produce N-gram data because there is no Ngram "
                  + "defintion in the configuration file!");
            }

            benchmarkingFeatures.remove("numDocs");

            // Write the name of documents and total number of them into a file
            BufferedWriter outDocsName =
              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                new File(wdResults, ConstantParameters.FILENAMEOFDocsName)),
                "UTF-8"));
            outDocsName.append("##totalDocs=" + numDoc);
            outDocsName.newLine();
            for(int i = 0; i < numDoc; ++i) {
              String docN = ((Document)corpus.get(i)).getName();
              if(docN.contains("_"))
                docN = docN.substring(0, docN.lastIndexOf("_"));
              outDocsName.append(docN);
              outDocsName.newLine();
            }
            outDocsName.flush();
            outDocsName.close();

            // Create the document for storing the names of selected documents
            // if it doesn't exist.
            File selectedFile =
              new File(wdResults, ConstantParameters.FILENAMEOFSelectedDOCForAL);
            if(!selectedFile.exists()) selectedFile.createNewFile();

            if(LogService.minVerbosityLevel > 0) displayDataFilesInformation();
            break;
          case TRAINING:
            // empty the data file
            Long tm1,
            tm2,
            tm3;
            if(LogService.DEBUG > 1) {
              tm1 = new Date().getTime();
            }
            EvaluationBasedOnDocs.emptyDatafile(wdResults, true);
            if(LogService.minVerbosityLevel > 0)
              System.out.println("** Training mode:");
            LogService.logMessage("** Training mode:", 1);

            startTime = Benchmark.startPoint();
            benchmarkingFeatures.put("numDocs", "" + numDoc);

            isTraining = true;
            outNLPFeatures =
              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
            for(int i = 0; i < numDoc; ++i) {
              Document toProcess = (Document)corpus.get(i);
              lightWeightApi.annotations2NLPFeatures(toProcess, i,
                outNLPFeatures, isTraining, learningSettings);
              if(toProcess.getDataStore() != null && corpus.getDataStore() != null)
                Factory.deleteResource(toProcess);
            }
            outNLPFeatures.flush();
            outNLPFeatures.close();
            lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
              learningSettings);

            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.ANNOTS_TO_NLP_FEATURES, this, benchmarkingFeatures);

            if(LogService.DEBUG > 1) {
              tm2 = new Date().getTime();
              tm3 = tm2 - tm1;
              tm3 /= 1000;
              System.out.println("time for NLP features: " + tm3);
            }
            /** Open the normal NLP feature file. */
            inNLPFeatures =
              new BufferedReader(new InputStreamReader(new FileInputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
            outFeatureVectors =
              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFFeatureVectorData)), "UTF-8"));

            startTime = Benchmark.startPoint();

            lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
              outFeatureVectors, numDoc, isTraining, learningSettings);

            inNLPFeatures.close();
            outFeatureVectors.flush();
            outFeatureVectors.close();

            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.NLP_FEATURES_TO_FVS, this, benchmarkingFeatures);

            if(LogService.DEBUG > 1) {
              tm1 = new Date().getTime();
              tm3 = tm1 - tm2;
              tm3 /= 1000;
              System.out.println("time for fv: " + tm3);
            }
            // if fitering the training data
            if(learningSettings.fiteringTrainingData
              && learningSettings.filteringRatio > 0.0) {

              startTime = Benchmark.startPoint();

              lightWeightApi.FilteringNegativeInstsInJava(corpus.size(),
                learningSettings);

              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                + Benchmark.FILTERING, this, benchmarkingFeatures);
            }

            if(LogService.DEBUG > 1) {
              tm2 = new Date().getTime();
              tm3 = tm2 - tm1;
              tm3 /= 1000;
              System.out.println("time for filtering: " + tm3);
            }

            startTime = Benchmark.startPoint();

            // using the java code for training
            lightWeightApi.trainingJava(corpus.size(), learningSettings);

            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.MODEL_TRAINING, this, benchmarkingFeatures);
            benchmarkingFeatures.remove("numDocs");

            if(LogService.DEBUG > 1) {
              tm1 = new Date().getTime();
              tm3 = tm1 - tm2;
              tm3 /= 1000;
              System.out.println("time for NLP training: " + tm3);
            }
            break;
          case APPLICATION:

            // first checking if the model file is available or not
            String modelFileName =
              wdResults.toString() + File.separator
                + ConstantParameters.FILENAMEOFModels;
            if(!new File(modelFileName).exists()) {
              System.out
                .println("Warning: the model is not available at the moment!!");
              return;
            }
            if(endDocIdApp > startDocIdApp) {
              if(LogService.minVerbosityLevel > 0)
                System.out.println("** "
                  + "Application mode for document from " + startDocIdApp
                  + " to " + endDocIdApp + "(not included):");
              LogService.logMessage("** Application mode for document from "
                + startDocIdApp + " to " + endDocIdApp + "(not included):", 1);

              isTraining = false;
              String classTypeOriginal =
                learningSettings.datasetDefinition.getClassAttribute()
                  .getType();
              outNLPFeatures =
                new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                  new File(wdResults,
                    ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
              numDoc = endDocIdApp - startDocIdApp;

              benchmarkingFeatures.put("numDocs", "" + numDoc);
              startTime = Benchmark.startPoint();

              for(int i = startDocIdApp; i < endDocIdApp; ++i) {
                Document toProcess = (Document)corpus.get(i);
                lightWeightApi
                  .annotations2NLPFeatures(toProcess, i - startDocIdApp,
                    outNLPFeatures, isTraining, learningSettings);
                if(toProcess.getDataStore() != null
                  && corpus.getDataStore() != null) {// (isDatastore)
                  Factory.deleteResource(toProcess);
                  corpus.getDataStore().sync(corpus);
                }
              }
              outNLPFeatures.flush();
              outNLPFeatures.close();

              lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
                learningSettings);

              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                + Benchmark.ANNOTS_TO_NLP_FEATURES, this, benchmarkingFeatures);

              /** Open the normal NLP feature file. */
              inNLPFeatures =
                new BufferedReader(new InputStreamReader(new FileInputStream(
                  new File(wdResults,
                    ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8"));
              outFeatureVectors =
                new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                  new File(wdResults,
                    ConstantParameters.FILENAMEOFFeatureVectorDataApp)),
                  "UTF-8"));

              startTime = Benchmark.startPoint();

              lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
                outFeatureVectors, numDoc, isTraining, learningSettings);

              inNLPFeatures.close();
              outFeatureVectors.flush();
              outFeatureVectors.close();

              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                + Benchmark.NLP_FEATURES_TO_FVS, this, benchmarkingFeatures);

              // Applying th model
              String fvFileName =
                wdResults.toString() + File.separator
                  + ConstantParameters.FILENAMEOFFeatureVectorDataApp;

              startTime = Benchmark.startPoint();

              lightWeightApi.applyModelInJava(corpus, startDocIdApp,
                endDocIdApp, classTypeOriginal, learningSettings, fvFileName);

              Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                + Benchmark.MODEL_APPLICATION, this, benchmarkingFeatures);
              benchmarkingFeatures.remove("numDocs");
              // Update the datastore for the added annotations
            }
            break;
          case EVALUATION:
            if(LogService.minVerbosityLevel > 0)
              System.out.println("** Evaluation mode:");
            LogService.logMessage("** Evaluation mode:", 1);
            evaluation =
              new EvaluationBasedOnDocs(corpus, wdResults, inputASName);

            benchmarkingFeatures.put("numDocs", corpus.size());
            startTime = Benchmark.startPoint();

            evaluation.evaluation(learningSettings, lightWeightApi);
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.EVALUATION, this, benchmarkingFeatures);
            benchmarkingFeatures.remove("numDocs");

            break;
          case MITRAINING:
            if(LogService.minVerbosityLevel > 0)
              System.out.println("** MITRAINING mode:");
            LogService.logMessage("** MITRAINING mode:", 1);
            isTraining = true;

            benchmarkingFeatures.put("numDocs", "" + numDoc);
            startTime = Benchmark.startPoint();

            /**
             * Need to write the NLP features into a temporary file, then copy
             * it into the NLP file.
             */
            BufferedWriter outNLPFeaturesTemp =
              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)), "UTF-8"));
            for(int i = 0; i < numDoc; ++i) {
              lightWeightApi.annotations2NLPFeatures((Document)corpus.get(i),
                i, outNLPFeaturesTemp, isTraining, learningSettings);
            }

            outNLPFeaturesTemp.flush();
            outNLPFeaturesTemp.close();
            lightWeightApi.finishFVs(wdResults, numDoc, isTraining,
              learningSettings);
            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.ANNOTS_TO_NLP_FEATURES, this, benchmarkingFeatures);

            lightWeightApi.copyNLPFeat2NormalFile(wdResults,
              miLearningInfor.miNumDocsTraining);
            /**
             * Use the temp NLP feature file instead of the normal one for
             * MI-training.
             */
            inNLPFeatures =
              new BufferedReader(new InputStreamReader(new FileInputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)), "UTF-8"));
            outFeatureVectors =
              new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                new File(wdResults,
                  ConstantParameters.FILENAMEOFFeatureVectorData), true),
                "UTF-8"));

            startTime = Benchmark.startPoint();

            lightWeightApi.nlpfeatures2FVs(wdResults, inNLPFeatures,
              outFeatureVectors, numDoc, isTraining, learningSettings);
            inNLPFeatures.close();
            outFeatureVectors.flush();
            outFeatureVectors.close();

            Benchmark.checkPoint(startTime, getBenchmarkId() + "."
              + Benchmark.NLP_FEATURES_TO_FVS, this, benchmarkingFeatures);

            System.gc(); // to make effort to delete the files.
            miLearningInfor.miNumDocsTraining += numDoc;
            miLearningInfor.miNumDocsFromLast += numDoc;
            if(miLearningInfor.miNumDocsFromLast >= learningSettings.miDocInterval) {
              // Start learning
              // if fitering the training data
              if(learningSettings.fiteringTrainingData
                && learningSettings.filteringRatio > 0.0) {

                benchmarkingFeatures.put("numDocs",
                  miLearningInfor.miNumDocsTraining + "");
                startTime = Benchmark.startPoint();

                lightWeightApi.FilteringNegativeInstsInJava(
                  miLearningInfor.miNumDocsTraining, learningSettings);

                Benchmark.checkPoint(startTime, getBenchmarkId() + "."
                  + Benchmark.FILTERING, this, benchmarkingFeatures);
              }

              startTime = Benchmark.startPoint();

              // using the java code for training
              lightWeightApi.trainingJava(miLearningInfor.miNumDocsTraining,
                learningSettings);

              Benchmark.checkPoint(startTime, getBenchmarkId() + "." + "."
                + Benchmark.MODEL_TRAINING, this, benchmarkingFeatures);
              benchmarkingFeatures.remove("numDocs");

              // Reset the num from last training as 0
              miLearningInfor.miNumDocsFromLast = 0;
            }
            File miLeFile =
              new File(wdResults, ConstantParameters.FILENAMEOFMILearningInfor);
            miLearningInfor.writeDataIntoFile(miLeFile);
            break;
          default:
            throw new GateException("The learning mode is not defined!");
        }
        LogService.logMessage("This learning session finished!.", 1);
        //LogService.close();
      }
      catch(IOException e) {
        e.printStackTrace();
      }
      catch(GateException e) {
        e.printStackTrace();
      }

      // reset the parentBenchmarkID
      if(oldLightWeightApiParentId != null) {
        lightWeightApi.setParentBenchmarkId(oldLightWeightApiParentId);
      }

      if(LogService.minVerbosityLevel > 0)
        System.out.println("This learning session finished!.");
    } // end of learning (position=corpus.size()-1)
  }

  /** Print out the information for featureData only option. */
  private void displayDataFilesInformation() {
    StringBuffer logMessage = new StringBuffer();
    logMessage.append("NLP features for all the documents are in the file"
      + wdResults.getAbsolutePath() + File.separator
      + ConstantParameters.FILENAMEOFNLPFeaturesData + "\n");
    logMessage.append("Feature vectors in sparse format are in the file"
      + wdResults.getAbsolutePath() + File.separator
      + ConstantParameters.FILENAMEOFFeatureVectorData + "\n");
    logMessage.append("Label list is in the file" + wdResults.getAbsolutePath()
      + File.separator + ConstantParameters.FILENAMEOFLabelList + "\n");
    logMessage.append("NLP features list is in the file"
      + wdResults.getAbsolutePath() + File.separator
      + ConstantParameters.FILENAMEOFNLPFeatureList + "\n");
    logMessage
      .append("The statistics of entity length for each class is in the file"
        + wdResults.getAbsolutePath() + File.separator
        + ConstantParameters.FILENAMEOFChunkLenStats + "\n");
    System.out.println(logMessage.toString());
    LogService.logMessage(logMessage.toString(), 1);
  }

  public void setConfigFileURL(URL workingDirectory) {
    this.configFileURL = workingDirectory;
  }

  public URL getConfigFileURL() {
    return this.configFileURL;
  }

  public void setInputASName(String iasn) {
    this.inputASName = iasn;
  }

  public String getInputASName() {
    return this.inputASName;
  }

  public void setOutputASName(String iasn) {
    this.outputASName = iasn;
  }

  public String getOutputASName() {
    return this.outputASName;
  }

  public RunMode getLearningMode() {
    return this.learningMode;
  }

  public void setLearningMode(RunMode learningM) {
    this.learningMode = learningM;
  }

  public EvaluationBasedOnDocs getEvaluation() {
    return evaluation;
  }

  public EvaluationBasedOnDocs setEvaluation(EvaluationBasedOnDocs eval) {
    return this.evaluation = eval;
  }

  // /////// Benchmarkable ////////////////

  private String parentBenchmarkID;
  private String benchmarkID;

  /**
   * Returns the benchmark ID of the parent of this resource.
   * 
   * @return
   */
  public String getParentBenchmarkId() {
    return this.parentBenchmarkID;
  }

  /**
   * Returns the benchmark ID of this resource.
   * 
   * @return
   */
  public String getBenchmarkId() {
    if(this.benchmarkID == null) {
      benchmarkID = getName().replaceAll("[ ]+", "_");
      ;
    }
    return this.benchmarkID;
  }

  /**
   * Given an ID of the parent resource, this method is responsible for
   * producing the Benchmark ID, unique to this resource.
   * 
   * @param parentID
   */
  public void createBenchmarkId(String parentID) {
    parentBenchmarkID = parentID;
    benchmarkID = Benchmark.createBenchmarkId(getName(), parentID);
  }

  /**
   * This method sets the benchmarkID for this resource.
   * 
   * @param benchmarkID
   */
  public void setParentBenchmarkId(String benchmarkID) {
    parentBenchmarkID = benchmarkID;
  }

  /**
   * Returns the logger object being used by this resource.
   * 
   * @return
   */
  public Logger getLogger() {
    return Benchmark.logger;
  }

  public void setBenchmarkId(String arg0) {
    // stub
  }

}