/*
* TestLearningAPI.java
*
* Yaoyong Li 22/03/2007
*
* $Id: TestLearningAPI.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
*/
package gate.learning.test;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Comparator;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.learning.ConstantParameters;
import gate.learning.EvaluationBasedOnDocs;
import gate.learning.LearningAPIMain;
import gate.learning.LogService;
import gate.learning.RunMode;
import gate.util.ExtensionFileFilter;
import gate.util.GateException;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* Test the three types of NLP learning implemented in
* ML Api by using the test methods and small datasets.
*/
public class TestLearningAPI extends TestCase {
/** Use it to do initialisation only once. */
private static boolean initialized = false;
/** Learning home for reading the data and configuration file. */
private static File learningHome;
/** Constructor, setting the home directory. */
public TestLearningAPI(String arg0) throws GateException,
MalformedURLException {
super(arg0);
if(!initialized) {
Gate.init();
learningHome = new File(new File(Gate.getGateHome(), "plugins"),
"Learning");
Gate.getCreoleRegister().addDirectory(learningHome.toURL());
initialized = true;
}
}
/** The ML Api object to be tested. */
LearningAPIMain learningApi;
/** Corpus used for testing. */
Corpus corpus;
/** The controller include the ML Api as one PR. */
gate.creole.SerialAnalyserController controller;
/** Set up method (does nothing because it may have
* different behaviour in different enviroment.
*/
protected void setUp() throws Exception {
super.setUp();
}
/** Release some resources.*/
protected void tearDown() throws Exception {
super.tearDown();
}
/** Loading the configurationg file and corpus for testing.
* And make settings as in the GATE Gui.
*/
void loadSettings(String configFileName, String corpusDirName, String inputasN, String outputasN)
throws GateException, IOException {
LogService.minVerbosityLevel = 0;
if(LogService.minVerbosityLevel>0)
System.out.println("Learning Home : " + learningHome.getAbsolutePath());
FeatureMap parameters = Factory.newFeatureMap();
URL configFileURL = new File(configFileName).toURL();
parameters.put("configFileURL", configFileURL);
learningApi = (LearningAPIMain)Factory.createResource(
"gate.learning.LearningAPIMain", parameters);
// Load the corpus
corpus = Factory.newCorpus("DataSet");
ExtensionFileFilter fileFilter = new ExtensionFileFilter();
fileFilter.addExtension("xml");
File[] xmlFiles = new File(corpusDirName).listFiles(fileFilter);
Arrays.sort(xmlFiles, new Comparator<File>() {
public int compare(File a, File b) {
return a.getName().compareTo(b.getName());
}
});
for(File f : xmlFiles) {
if(!f.isDirectory()) {
Document doc = Factory.newDocument(f.toURI().toURL(), "UTF-8");
doc.setName(f.getName());
corpus.add(doc);
}
}
// URL tempURL = new File(corpusDirName).toURL();
// corpus.populate(tempURL, fileFilter, "UTF-8", false);
// Set the inputAS
learningApi.setInputASName(inputasN);
learningApi.setOutputASName(outputasN);
controller = (gate.creole.SerialAnalyserController)Factory
.createResource("gate.creole.SerialAnalyserController");
controller.setCorpus(corpus);
controller.add(learningApi);
}
/** Clear up the resources used after one test. */
private void clearOneTest() {
corpus.clear();
Factory.deleteResource(corpus);
Factory.deleteResource(learningApi);
controller.remove(learningApi);
controller.cleanup();
Factory.deleteResource(controller);
}
/** Test the chunk learning by using the SVM with linear kernel and
* a small part of the OntoNews corpus.
*/
public void testSVMChunkLearnng() throws IOException, GateException {
// Initialisation
System.out.print("Testing the SVM with liner kernenl on chunk learning...");
File chunklearningHome = new File(new File(learningHome, "test"),
"chunklearning");
String configFileURL = new File(chunklearningHome, "engines-svm.xml")
.getAbsolutePath();
String corpusDirName = new File(chunklearningHome, "data-ontonews")
.getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(chunklearningHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = "Key";
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
assertEquals("Wrong value for correct: ", 44, (int)Math.floor(evaluation.macroMeasuresOfResults.correct));
assertEquals("Wrong value for partial: ", 10, (int)Math.floor(evaluation.macroMeasuresOfResults.partialCor));
assertEquals("Wrong value for spurious: ", 11, (int)Math.floor(evaluation.macroMeasuresOfResults.spurious));
assertEquals("Wrong value for missing: ", 40, (int)Math.floor(evaluation.macroMeasuresOfResults.missing));
System.out.println("completed");
// Remove the resources
clearOneTest();
}
/** Test the chunk learning by using the Naive Bayes method and
* a small part of the OntoNews corpus. */
public void testNBChunkLearnng() throws IOException, GateException {
// Initialisation
System.out.print("Testing the Naive Bayes method on chunk learning...");
File chunklearningHome = new File(new File(learningHome, "test"),
"chunklearning");
String configFileURL = new File(chunklearningHome,
"engines-naivebayesweka.xml").getAbsolutePath();
String corpusDirName = new File(chunklearningHome, "data-ontonews")
.getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(chunklearningHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = "Key";
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
/*assertEquals(evaluation.macroMeasuresOfResults.correct, 3);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 1);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 19);
assertEquals(evaluation.macroMeasuresOfResults.missing, 68);*/
assertEquals("Wrong value for correct: ", 27, (int)Math.floor(evaluation.macroMeasuresOfResults.correct));
assertEquals("Wrong value for partial: ", 3, (int)Math.floor(evaluation.macroMeasuresOfResults.partialCor));
assertEquals("Wrong value for spurious: ", 27, (int)Math.floor(evaluation.macroMeasuresOfResults.spurious));
assertEquals("Wrong value for missing: ", 42, (int)Math.floor(evaluation.macroMeasuresOfResults.missing));
// Remove the resources
clearOneTest();
System.out.println("completed");
}
/** Test the chunk learning by using the PAUM and
* a small part of the OntoNews corpus. */
public void testPAUMChunkLearnng() throws IOException, GateException {
// Initialisation
System.out.print("Testing the PAUM method on chunk learning...");
File chunklearningHome = new File(new File(learningHome, "test"),
"chunklearning");
String configFileURL = new File(chunklearningHome,
"engines-paum.xml").getAbsolutePath();
String corpusDirName = new File(chunklearningHome, "data-ontonews")
.getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(chunklearningHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = "Key";
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
/*assertEquals(evaluation.macroMeasuresOfResults.correct, 3);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 1);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 19);
assertEquals(evaluation.macroMeasuresOfResults.missing, 68);*/
assertEquals("Wrong value for correct: ", 52, (int)Math.floor(evaluation.macroMeasuresOfResults.correct));
assertEquals("Wrong value for partial: ", 12, (int)Math.floor(evaluation.macroMeasuresOfResults.partialCor));
assertEquals("Wrong value for spurious: ", 27, (int)Math.floor(evaluation.macroMeasuresOfResults.spurious));
assertEquals("Wrong value for missing: ", 30, (int)Math.floor(evaluation.macroMeasuresOfResults.missing));
// Remove the resources
clearOneTest();
System.out.println("completed");
}
/** Test the text classification by using the SVM with linear kernel
* and the data for sentence classification.
*/
/*public void testSVMClassification() throws GateException, IOException {
// Initialisation
System.out.print("Testing the SVM with linear kernel on text classification...");
File scHome = new File(new File(learningHome, "test"),
"sentence-classification");
String configFileURL = new File(scHome, "engines-svm.xml")
.getAbsolutePath();
String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(scHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = null;
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
assertEquals(evaluation.macroMeasuresOfResults.correct, 27);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 45);
assertEquals(evaluation.macroMeasuresOfResults.missing, 39);
// Remove the resources
clearOneTest();
System.out.println("completed");
}*/
/** Test the text classification by using the SVM with quadratic kernel
* and the data for sentence classification.
*/
/*public void testSVMKernelClassification() throws GateException, IOException {
System.out.print("Testing the SVM with quadratic kernel on text classification...");
// Initialisation
File scHome = new File(new File(learningHome, "test"),
"sentence-classification");
String configFileURL = new File(scHome, "engines-svm-quadratickernel.xml")
.getAbsolutePath();
String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(scHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = null;
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
assertEquals(evaluation.macroMeasuresOfResults.correct, 27);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 44);
assertEquals(evaluation.macroMeasuresOfResults.missing, 38);
// Remove the resources
clearOneTest();
System.out.println("completed");
}*/
/** Test the text classification by using the KNN
* and the data for sentence classification.
*/
/*public void testKNNClassification() throws GateException, IOException {
System.out.print("Testing the KNN on text classification...");
// Initialisation
File scHome = new File(new File(learningHome, "test"),
"sentence-classification");
String configFileURL = new File(scHome, "engines-knnweka.xml")
.getAbsolutePath();
String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(scHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = null;
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
assertEquals(evaluation.macroMeasuresOfResults.correct, 13);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 60);
assertEquals(evaluation.macroMeasuresOfResults.missing, 52);
// Remove the resources
clearOneTest();
System.out.println("completed");
}*/
/** Test the text classification by using the C4.5 algorithm
* and the data for sentence classification.
*/
/*public void testC45Classification() throws GateException, IOException {
System.out.print("Testing the C4.5 on text classification...");
// Initialisation
File scHome = new File(new File(learningHome, "test"),
"sentence-classification");
String configFileURL = new File(scHome, "engines-c45weka.xml")
.getAbsolutePath();
String corpusDirName = new File(scHome, "data-h").getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(scHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = null;
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
assertEquals(evaluation.macroMeasuresOfResults.correct, 25);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 63);
assertEquals(evaluation.macroMeasuresOfResults.missing, 40);
// Remove the resources
clearOneTest();
System.out.println("completed");
}*/
/** Test the relation extraction by using the SVM with linear kernel
* and a small part of data from ACE-04 relation extraction.
*/
/*public void testSVMRelationLearning() throws GateException, IOException {
System.out.print("Testing the SVM with linear kernel on relation extraction...");
// Initialisation relation-learning
File scHome = new File(new File(learningHome, "test"), "relation-learning");
String configFileURL = new File(scHome, "engines-svm.xml")
.getAbsolutePath();
String corpusDirName = new File(scHome, "data-acerelation")
.getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(scHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = "Key";
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
//assertEquals(evaluation.macroMeasuresOfResults.correct, 4);
//assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
//assertEquals(evaluation.macroMeasuresOfResults.spurious, 27);
//assertEquals(evaluation.macroMeasuresOfResults.missing, 110);
assertEquals(evaluation.macroMeasuresOfResults.correct, 1);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 5);
assertEquals(evaluation.macroMeasuresOfResults.missing, 113);
// Remove the resources
clearOneTest();
System.out.println("completed");
}*/
/** Test the relation extraction by using the SVM with linear kernel
* and a small part of data from ACE-04 relation extraction.
*/
/*public void testSVMRelationLearningWithNgramFeatures() throws GateException, IOException {
System.out.print("Testing the SVM with Ngram features on relation extraction ...");
// Initialisation relation-learning
File scHome = new File(new File(learningHome, "test"), "relation-learning");
String configFileURL = new File(scHome, "engines-svm-ngram.xml")
.getAbsolutePath();
String corpusDirName = new File(scHome, "data-acerelation")
.getAbsolutePath();
//Remove the label list file, feature list file and chunk length files.
String wdResults = new File(scHome,
ConstantParameters.SUBDIRFORRESULTS).getAbsolutePath();
emptySavedFiles(wdResults);
String inputASN = "Key";
loadSettings(configFileURL, corpusDirName, inputASN, inputASN);
// Set the evaluation mode
RunMode runM=RunMode.EVALUATION;
learningApi.setLearningMode(runM);
controller.execute();
// Using the evaluation mode for testing
EvaluationBasedOnDocs evaluation = learningApi.getEvaluation();
// Compare the overall results with the correct numbers
//assertEquals(evaluation.macroMeasuresOfResults.correct, 4);
//assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
//assertEquals(evaluation.macroMeasuresOfResults.spurious, 27);
//assertEquals(evaluation.macroMeasuresOfResults.missing, 110);
assertEquals(evaluation.macroMeasuresOfResults.correct, 3);
assertEquals(evaluation.macroMeasuresOfResults.partialCor, 0);
assertEquals(evaluation.macroMeasuresOfResults.spurious, 6);
assertEquals(evaluation.macroMeasuresOfResults.missing, 111);
// Remove the resources
clearOneTest();
System.out.println("completed");
}*/
/** Empty the label list, NLP feature list and the chunk lenght list file
* before each test in order to obtain the consistent results of each test.
*/
private void emptySavedFiles(String savedFilesDir) {
(new File(savedFilesDir, ConstantParameters.FILENAMEOFNLPFeatureList)).delete();
(new File(savedFilesDir, ConstantParameters.FILENAMEOFLabelList)).delete();
(new File(savedFilesDir, ConstantParameters.FILENAMEOFChunkLenStats)).delete();
(new File(savedFilesDir, ConstantParameters.FILENAMEOFFeatureVectorData)).delete();
(new File(savedFilesDir, ConstantParameters.FILENAMEOFNLPFeaturesData)).delete();
}
/** Test suite routine for the test runner */
public static Test suite() {
return new TestSuite(TestLearningAPI.class);
} // suite
}