Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsLearningsrcgatelearningtest 〉 TestLearningAPI.java
 
/*
 *  TestLearningAPI.java
 * 
 *  Yaoyong Li 22/03/2007
 *
 *  $Id: TestLearningAPI.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
 */
package gate.learning.test;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Comparator;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.learning.ConstantParameters;
import gate.learning.EvaluationBasedOnDocs;
import gate.learning.LearningAPIMain;
import gate.learning.LogService;
import gate.learning.RunMode;
import gate.util.ExtensionFileFilter;
import gate.util.GateException;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
 * Test the three types of NLP learning implemented in
 *  ML Api by using the test methods and small datasets.
 */
public class TestLearningAPI extends TestCase {
  /** Use it to do initialisation only once. */
  private static boolean initialized = false;
  /** Learning home for reading the data and configuration file. */
  private static File learningHome;
  /** Constructor, setting the home directory. */
  public TestLearningAPI(String arg0) throws GateException,
    MalformedURLException {
    super(arg0);
    if(!initialized) {
      Gate.init();
      learningHome = new File(new File(Gate.getGateHome(), "plugins"),
        "Learning");
      Gate.getCreoleRegister().addDirectory(learningHome.toURL());
      initialized = true;
    }
  }
  /** The ML Api object to be tested. */
  LearningAPIMain learningApi;
  /** Corpus used for testing. */
  Corpus corpus;
  /** The controller include the ML Api as one PR. */
  gate.creole.SerialAnalyserController controller;
  /** Set up method (does nothing because it may have
   * different behaviour in different enviroment. 
   */
  protected void setUp() throws Exception {
    super.setUp();
  }
  /**  Release some resources.*/
  protected void tearDown() throws Exception {
    super.tearDown();
  }
  /** Loading the configurationg file and corpus for testing. 
   * And make settings as in the GATE Gui. 
   */
  void loadSettings(String configFileName, String corpusDirName, String inputasN, String outputasN)
    throws GateException, IOException {
    LogService.minVerbosityLevel = 0;
    if(LogService.minVerbosityLevel>0)
      System.out.println("Learning Home : " + learningHome.getAbsolutePath());
    FeatureMap parameters = Factory.newFeatureMap();
    URL configFileURL = new File(configFileName).toURL();
    parameters.put("configFileURL", configFileURL);
    learningApi = (LearningAPIMain)Factory.createResource(
      "gate.learning.LearningAPIMain", parameters);
    // Load the corpus
    corpus = Factory.newCorpus("DataSet");
    ExtensionFileFilter fileFilter = new ExtensionFileFilter();
    fileFilter.addExtension("xml");
    File[] xmlFiles = new File(corpusDirName).listFiles(fileFilter);  
    Arrays.sort(xmlFiles, new Comparator<File>() {
      public int compare(File a, File b) {
        return a.getName().compareTo(b.getName());
      }
    });
    for(File f : xmlFiles) {
      if(!f.isDirectory()) {
          Document doc = Factory.newDocument(f.toURI().toURL(), "UTF-8");
          doc.setName(f.getName());
          corpus.add(doc);
      }
    }
//    URL tempURL = new File(corpusDirName).toURL();
//    corpus.populate(tempURL, fileFilter, "UTF-8", false);
    // Set the inputAS
    learningApi.setInputASName(inputasN);
    learningApi.setOutputASName(outputasN);
    controller = (gate.creole.SerialAnalyserController)Factory
      .createResource("gate.creole.SerialAnalyserController");
    controller.setCorpus(corpus);
    controller.add(learningApi);
  }
  /** Clear up the resources used after one test. */
  private void clearOneTest() {
    corpus.clear();
    Factory.deleteResource(corpus);
    Factory.deleteResource(learningApi);
    controller.remove(learningApi);
    controller.cleanup();
    Factory.deleteResource(controller);
  }
  /** Test the chunk learning by using the SVM with linear kernel and
   * a small part of the OntoNews corpus. 
   */
  public void testSVMChunkLearnng() throws IOException, GateException {
    // Initialisation
    System.out.print("Testing the SVM with liner kernenl on chunk learning...");
    File chunklearningHome = new File(new File(learningHome, "test"),
      "chunklearning");
    String configFileURL = new File(chunklearningHome, "engines-svm.xml")
      .getAbsolutePath();
    String corpusDirName = new File(chunklearningHome, "data-ontonews")
      .getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(chunklearningHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = "Key";
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    assertEquals("Wrong value for correct: ", 44, (int)Math.floor(evaluation.macroMeasuresOfResults.correct));
    assertEquals("Wrong value for partial: ", 10, (int)Math.floor(evaluation.macroMeasuresOfResults.partialCor));
    assertEquals("Wrong value for spurious: ", 11, (int)Math.floor(evaluation.macroMeasuresOfResults.spurious));
    assertEquals("Wrong value for missing: ", 40, (int)Math.floor(evaluation.macroMeasuresOfResults.missing));
    
    System.out.println("completed");
    // Remove the resources
    clearOneTest();
  }
  /** Test the chunk learning by using the Naive Bayes method and
   * a small part of the OntoNews corpus. */
  public void testNBChunkLearnng() throws IOException, GateException {
    // Initialisation
    System.out.print("Testing the Naive Bayes method on chunk learning...");
    File chunklearningHome = new File(new File(learningHome, "test"),
      "chunklearning");
    String configFileURL = new File(chunklearningHome,
      "engines-naivebayesweka.xml").getAbsolutePath();
    String corpusDirName = new File(chunklearningHome, "data-ontonews")
      .getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(chunklearningHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = "Key";
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    /*assertEquals(evaluation.macroMeasuresOfResults.correct, 3);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 1);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 19);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 68);*/
    assertEquals("Wrong value for correct: ", 27, (int)Math.floor(evaluation.macroMeasuresOfResults.correct));
    assertEquals("Wrong value for partial: ", 3, (int)Math.floor(evaluation.macroMeasuresOfResults.partialCor));
    assertEquals("Wrong value for spurious: ", 27, (int)Math.floor(evaluation.macroMeasuresOfResults.spurious));
    assertEquals("Wrong value for missing: ", 42, (int)Math.floor(evaluation.macroMeasuresOfResults.missing));
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }
  /** Test the chunk learning by using the PAUM and
   * a small part of the OntoNews corpus. */
  public void testPAUMChunkLearnng() throws IOException, GateException {
    // Initialisation
    System.out.print("Testing the PAUM method on chunk learning...");
    File chunklearningHome = new File(new File(learningHome, "test"),
      "chunklearning");
    String configFileURL = new File(chunklearningHome,
      "engines-paum.xml").getAbsolutePath();
    String corpusDirName = new File(chunklearningHome, "data-ontonews")
      .getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(chunklearningHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = "Key";
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    /*assertEquals(evaluation.macroMeasuresOfResults.correct, 3);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 1);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 19);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 68);*/
    assertEquals("Wrong value for correct: ", 52, (int)Math.floor(evaluation.macroMeasuresOfResults.correct));
    assertEquals("Wrong value for partial: ", 12, (int)Math.floor(evaluation.macroMeasuresOfResults.partialCor));
    assertEquals("Wrong value for spurious: ", 27, (int)Math.floor(evaluation.macroMeasuresOfResults.spurious));
    assertEquals("Wrong value for missing: ", 30, (int)Math.floor(evaluation.macroMeasuresOfResults.missing));
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }
  /** Test the text classification by using the SVM with linear kernel
   * and the data for sentence classification. 
   */
  /*public void testSVMClassification() throws GateException, IOException {
    // Initialisation
    System.out.print("Testing the SVM with linear kernel on text classification...");
    File scHome = new File(new File(learningHome, "test"),
      "sentence-classification");
    String configFileURL = new File(scHome, "engines-svm.xml")
      .getAbsolutePath();
    String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(scHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = null;
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    assertEquals(evaluation.macroMeasuresOfResults.correct, 27);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 45);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 39);
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }*/
  /** Test the text classification by using the SVM with quadratic kernel
   * and the data for sentence classification. 
   */
  /*public void testSVMKernelClassification() throws GateException, IOException {
    System.out.print("Testing the SVM with quadratic kernel on text classification...");
    // Initialisation
    File scHome = new File(new File(learningHome, "test"),
      "sentence-classification");
    String configFileURL = new File(scHome, "engines-svm-quadratickernel.xml")
      .getAbsolutePath();
    String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(scHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = null;
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    assertEquals(evaluation.macroMeasuresOfResults.correct, 27);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 44);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 38);
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }*/
  /** Test the text classification by using the KNN
   * and the data for sentence classification. 
   */
  /*public void testKNNClassification() throws GateException, IOException {
    System.out.print("Testing the KNN on text classification...");
    // Initialisation
    File scHome = new File(new File(learningHome, "test"),
      "sentence-classification");
    String configFileURL = new File(scHome, "engines-knnweka.xml")
      .getAbsolutePath();
    String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(scHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = null;
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    assertEquals(evaluation.macroMeasuresOfResults.correct, 13);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 60);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 52);
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }*/
  /** Test the text classification by using the C4.5 algorithm
   * and the data for sentence classification. 
   */
  /*public void testC45Classification() throws GateException, IOException {
    System.out.print("Testing the C4.5 on text classification...");
    // Initialisation
    File scHome = new File(new File(learningHome, "test"),
      "sentence-classification");
    String configFileURL = new File(scHome, "engines-c45weka.xml")
      .getAbsolutePath();
    String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(scHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = null;
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    assertEquals(evaluation.macroMeasuresOfResults.correct, 25);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 63);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 40);
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }*/
  /** Test the relation extraction by using the SVM with linear kernel
   * and a small part of data from ACE-04 relation extraction. 
   */
  /*public void testSVMRelationLearning() throws GateException, IOException {
    System.out.print("Testing the SVM with linear kernel on relation extraction...");
    // Initialisation relation-learning
    File scHome = new File(new File(learningHome, "test"), "relation-learning");
    String configFileURL = new File(scHome, "engines-svm.xml")
      .getAbsolutePath();
    String corpusDirName = new File(scHome, "data-acerelation")
      .getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(scHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = "Key";
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    //assertEquals(evaluation.macroMeasuresOfResults.correct, 4);
    //assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    //assertEquals(evaluation.macroMeasuresOfResults.spurious, 27);
    //assertEquals(evaluation.macroMeasuresOfResults.missing, 110);
    assertEquals(evaluation.macroMeasuresOfResults.correct, 1);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 5);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 113);
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }*/
  /** Test the relation extraction by using the SVM with linear kernel
   * and a small part of data from ACE-04 relation extraction. 
   */
  /*public void testSVMRelationLearningWithNgramFeatures() throws GateException, IOException {
    System.out.print("Testing the SVM with Ngram features on relation extraction ...");
    // Initialisation relation-learning
    File scHome = new File(new File(learningHome, "test"), "relation-learning");
    String configFileURL = new File(scHome, "engines-svm-ngram.xml")
      .getAbsolutePath();
    String corpusDirName = new File(scHome, "data-acerelation")
      .getAbsolutePath();
    //Remove the label list file, feature list file and chunk length files. 
    String wdResults = new File(scHome, 
      ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
    emptySavedFiles(wdResults);
    String inputASN = "Key";
    loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
    // Set the evaluation mode
    RunMode runM=RunMode.EVALUATION;
    learningApi.setLearningMode(runM);
    controller.execute();
    // Using the evaluation mode for testing
    EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
    // Compare the overall results with the correct numbers
    //assertEquals(evaluation.macroMeasuresOfResults.correct, 4);
    //assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    //assertEquals(evaluation.macroMeasuresOfResults.spurious, 27);
    //assertEquals(evaluation.macroMeasuresOfResults.missing, 110);
    assertEquals(evaluation.macroMeasuresOfResults.correct, 3);
    assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
    assertEquals(evaluation.macroMeasuresOfResults.spurious, 6);
    assertEquals(evaluation.macroMeasuresOfResults.missing, 111);
    // Remove the resources
    clearOneTest();
    System.out.println("completed");
  }*/
  /** Empty the label list, NLP feature list and the chunk lenght list file
   * before each test in order to obtain the consistent results of each test.
   */
  private void emptySavedFiles(String savedFilesDir) {
    (new File(savedFilesDir, ConstantParameters.FILENAMEOFNLPFeatureList)).delete();
    (new File(savedFilesDir, ConstantParameters.FILENAMEOFLabelList)).delete();
    (new File(savedFilesDir, ConstantParameters.FILENAMEOFChunkLenStats)).delete();
    (new File(savedFilesDir, ConstantParameters.FILENAMEOFFeatureVectorData)).delete();
    (new File(savedFilesDir, ConstantParameters.FILENAMEOFNLPFeaturesData)).delete();
  }

  /** Test suite routine for the test runner */
  public static Test suite() {
    return new TestSuite(TestLearningAPI.class);
  } // suite
}