/*
* LightWeightLearningApi.java
*
* Yaoyong Li 22/03/2007
*
* $Id: LightWeightLearningApi.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
*/
package gate.learning;
import gate.AnnotationSet;
import gate.Annotation;
import gate.Corpus;
import gate.Factory;
import gate.FeatureMap;
import gate.Node;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import org.apache.log4j.Logger;
import gate.Document;
import gate.learning.learners.ChunkOrEntity;
import gate.learning.learners.MultiClassLearning;
import gate.learning.learners.PostProcessing;
import gate.learning.learners.SupervisedLearner;
import gate.learning.learners.SvmLibSVM;
import gate.learning.learners.weka.WekaLearner;
import gate.learning.learners.weka.WekaLearning;
import gate.util.Benchmark;
import gate.util.Benchmarkable;
import gate.util.GateException;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;
/**
* Do all the main learning tasks, such as obtaining the feature vectors from
* GATE annotations, training and application. Also filtering out some negative
* examples if want.
*/
public class LightWeightLearningApi extends Object implements Benchmarkable {
/** This is where the model(s) should be saved */
private File wd;
/**
* The annotationSet containing annotations considered in the DATASET element
* of configuration file.
*/
public String inputASName;
/**
* The annotationSet for the resulting annotations by application of models.
*/
public String outputASName;
/** Object of the NLP feature list. */
public NLPFeaturesList featuresList;
/** Object of the label list. */
public Label2Id labelsAndId;
/** Position of all features specified in the configuration file. */
int[] featurePositionTotal;
/** The left-most position among all features. */
int maxNegPositionTotal = 0;
/** The right-most position among all features. */
int maxPosPositionTotal = 0;
/** The weight for the Ngram features */
float ngramWeight = 1.0f;
/**
* HashMap for the chunkLenStats, for post-processing of chunk learning.
*/
HashMap chunkLenHash;
/** Constructor, with working directory setting. */
public LightWeightLearningApi(File wd) {
this.wd = wd;
}
/**
* Further initialisation for the main object LearningAPIMain().
*/
public void furtherInit(File wdResults, LearningEngineSettings engineSettings) {
// read the NLP feature list
featuresList = new NLPFeaturesList();
featuresList.loadFromFile(wdResults,
ConstantParameters.FILENAMEOFNLPFeatureList, "UTF-8");
labelsAndId = new Label2Id();
labelsAndId.loadLabelAndIdFromFile(wdResults,
ConstantParameters.FILENAMEOFLabelList);
chunkLenHash = ChunkLengthStats.loadChunkLenStats(wdResults,
ConstantParameters.FILENAMEOFChunkLenStats);
// Get the feature position of all features
// Keep the order of the three types of features as that in
// NLPFeaturesOfDoc.obtainDocNLPFeatures()
int num;
num = engineSettings.datasetDefinition.arrs.featurePosition.length;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
num += engineSettings.datasetDefinition.arg1.arrs.featurePosition.length
+ engineSettings.datasetDefinition.arg2.arrs.featurePosition.length;
}
this.featurePositionTotal = new int[num];
num = 0;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
for(int i = 0; i < engineSettings.datasetDefinition.arg1.arrs.featurePosition.length; ++i)
this.featurePositionTotal[num++] = engineSettings.datasetDefinition.arg1.arrs.featurePosition[i];
for(int i = 0; i < engineSettings.datasetDefinition.arg2.arrs.featurePosition.length; ++i)
this.featurePositionTotal[num++] = engineSettings.datasetDefinition.arg2.arrs.featurePosition[i];
}
for(int i = 0; i < engineSettings.datasetDefinition.arrs.featurePosition.length; ++i)
this.featurePositionTotal[num++] = engineSettings.datasetDefinition.arrs.featurePosition[i];
maxNegPositionTotal = 0;
if(maxNegPositionTotal < engineSettings.datasetDefinition.arrs.maxNegPosition)
maxNegPositionTotal = engineSettings.datasetDefinition.arrs.maxNegPosition;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
if(maxNegPositionTotal < engineSettings.datasetDefinition.arg1.arrs.maxNegPosition)
maxNegPositionTotal = engineSettings.datasetDefinition.arg1.arrs.maxNegPosition;
if(maxNegPositionTotal < engineSettings.datasetDefinition.arg2.arrs.maxNegPosition
+ engineSettings.datasetDefinition.arg1.maxTotalPosition + 2)
maxNegPositionTotal = engineSettings.datasetDefinition.arg2.arrs.maxNegPosition
+ engineSettings.datasetDefinition.arg1.maxTotalPosition + 2;
}
// Get the ngram weight from the datasetdefintion.
ngramWeight = 1.0f;
if(engineSettings.datasetDefinition.ngrams != null
&& engineSettings.datasetDefinition.ngrams.size() > 0
&& ((Ngram)engineSettings.datasetDefinition.ngrams.get(0)).weight != 1.0)
ngramWeight = ((Ngram)engineSettings.datasetDefinition.ngrams.get(0)).weight;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
if(engineSettings.datasetDefinition.arg1.ngrams != null
&& engineSettings.datasetDefinition.arg1.ngrams.size() > 0
&& ((Ngram)engineSettings.datasetDefinition.arg1.ngrams.get(0)).weight != 1.0)
ngramWeight = ((Ngram)engineSettings.datasetDefinition.arg1.ngrams
.get(0)).weight;
if(engineSettings.datasetDefinition.arg2.ngrams != null
&& engineSettings.datasetDefinition.arg2.ngrams.size() > 0
&& ((Ngram)engineSettings.datasetDefinition.arg2.ngrams.get(0)).weight != 1.0)
ngramWeight = ((Ngram)engineSettings.datasetDefinition.arg2.ngrams
.get(0)).weight;
}
}
/**
* Obtain the features and labels and form feature vectors from the GATE
* annotation of each document.
*/
public void annotations2NLPFeatures(Document doc, int numDocs,
BufferedWriter outNLPFeatures, boolean isTraining,
LearningEngineSettings engineSettings) {
AnnotationSet annotations = null;
if(inputASName == null || inputASName.trim().length() == 0) {
annotations = doc.getAnnotations();
} else {
annotations = doc.getAnnotations(inputASName);
}
/*
* if(numDocs == 0) { try { outNLPFeatures = new BufferedWriter(new
* OutputStreamWriter(new FileOutputStream(new File(wdResults,
* ConstantParameters.FILENAMEOFNLPFeaturesData)), "UTF-8")); }
* catch(IOException e) {
* e.printStackTrace(); } }
*/
// obtain the NLP features for the document
String docName = doc.getName().replaceAll(ConstantParameters.ITEMSEPARATOR,
"_");
if(docName.contains("_"))
docName = docName.substring(0, docName.lastIndexOf("_"));
if(LogService.minVerbosityLevel > 1)
System.out.println(numDocs + " docname=" + docName + ".");
NLPFeaturesOfDoc nlpFeaturesDoc = new NLPFeaturesOfDoc(annotations,
engineSettings.datasetDefinition.getInstanceType(), docName);
nlpFeaturesDoc.obtainDocNLPFeatures(annotations,
engineSettings.datasetDefinition);
// update the NLP features list
if(isTraining && engineSettings.isNLPFeatListUpdatable) {
featuresList.addFeaturesFromDoc(nlpFeaturesDoc);
}
if(isTraining && engineSettings.isLabelListUpdatable) {
// update the class name list
labelsAndId.updateMultiLabelFromDoc(nlpFeaturesDoc.classNames);
}
// Only after the label list was updated, update the chunk length
// list for each label
if(isTraining && engineSettings.surround)
ChunkLengthStats.updateChunkLensStats(annotations,
engineSettings.datasetDefinition, chunkLenHash, labelsAndId);
nlpFeaturesDoc.writeNLPFeaturesToFile(outNLPFeatures, docName, numDocs,
featurePositionTotal);
return;
}
/** Normalising the feature vectors. */
static void normaliseFVs(DocFeatureVectors docFV) {
for(int i = 0; i < docFV.numInstances; ++i) {
double sum = 0;
for(int j = 0; j < docFV.fvs[i].len; ++j)
sum += docFV.fvs[i].nodes[j].value * docFV.fvs[i].nodes[j].value;
sum = Math.sqrt(sum);
for(int j = 0; j < docFV.fvs[i].len; ++j)
docFV.fvs[i].nodes[j].value /= sum;
}
}
/**
* Finishing the conversion from annotations to feature vectors by writing
* back the label and nlp feature list into files, and closing the java
* writers.
*/
public void finishFVs(File wdResults, int numDocs, boolean isTraining,
LearningEngineSettings engineSettings) {
if(isTraining && engineSettings.isNLPFeatListUpdatable)
featuresList.writeListIntoFile(wdResults,
ConstantParameters.FILENAMEOFNLPFeatureList, "UTF-8");
if(isTraining && engineSettings.isLabelListUpdatable)
labelsAndId.writeLabelAndIdToFile(wdResults,
ConstantParameters.FILENAMEOFLabelList);
if(isTraining & engineSettings.surround)
ChunkLengthStats.writeChunkLensStatsIntoFile(wdResults,
ConstantParameters.FILENAMEOFChunkLenStats, chunkLenHash);
}
/** transfer the feature list to ngram (language model). */
public void featureList2LM(File wdResults, int nGram) {
featuresList.writeToLM(wdResults, ConstantParameters.FILENAMEOFNgramLM,
nGram);
}
/** Convert the NLP features into feature vectors and write them into file. */
public void nlpfeatures2FVs(File wdResults, BufferedReader inNLPFeatures,
BufferedWriter outFeatureVectors, int numDocs, boolean isTraining,
LearningEngineSettings engineSettings) {
try {
// BufferedWriter outFeatureVectors = new BufferedWriter(new
// OutputStreamWriter(new FileOutputStream(
// new File(wdResults,ConstantParameters.FILENAMEOFFeatureVectorData),
// true), "UTF-8"));
// Read the first line out which is about feature names
inNLPFeatures.readLine();
for(int i = 0; i < numDocs; ++i) {
NLPFeaturesOfDoc nlpFeatDoc = new NLPFeaturesOfDoc();
nlpFeatDoc.readNLPFeaturesFromFile(inNLPFeatures);
DocFeatureVectors docFV = new DocFeatureVectors();
docFV.obtainFVsFromNLPFeatures(nlpFeatDoc, featuresList,
featurePositionTotal, maxNegPositionTotal, featuresList.totalNumDocs,
ngramWeight, engineSettings.datasetDefinition.valueTypeNgram);
/*if(engineSettings.datasetDefinition.isSameWinSize) {
// expand the feature vector to include the context words
docFV.expandFV(engineSettings.datasetDefinition.windowSizeLeft,
engineSettings.datasetDefinition.windowSizeRight);
}*/
if(isTraining) {
LabelsOfFeatureVectorDoc labelsDoc = new LabelsOfFeatureVectorDoc();
labelsDoc.obtainMultiLabelsFromNLPDocSurround(nlpFeatDoc,
labelsAndId, engineSettings.surround);
//addDocFVsMultiLabelToFile(i, outFeatureVectors,
//labelsDoc.multiLabels, docFV);
docFV.addDocFVsMultiLabelToFile(i, outFeatureVectors,
labelsDoc.multiLabels);
} else {
int[] labels = new int[nlpFeatDoc.numInstances];
//addDocFVsToFile(i, outFeatureVectors, labels, docFV);
docFV.addDocFVsToFile(i, outFeatureVectors, labels);
}
}
// outFeatureVectors.flush();
// outFeatureVectors.close();
} catch(IOException e) {
System.out
.println("Error occured in reading the NLP data from file for converting to FVs"
+ "or writing the FVs data into file!");
}
}
/** Write the FVs of one document into file. */
/*void addDocFVsToFile(int index, BufferedWriter out, int[] labels,
DocFeatureVectors docFV) {
try {
out.write(new Integer(index) + ConstantParameters.ITEMSEPARATOR
+ new Integer(docFV.numInstances) + ConstantParameters.ITEMSEPARATOR
+ docFV.docId);
out.newLine();
for(int i = 0; i < docFV.numInstances; ++i) {
StringBuffer line = new StringBuffer();
line.append(new Integer(i + 1) + ConstantParameters.ITEMSEPARATOR
+ new Integer(labels[i]));
for(int j = 0; j < docFV.fvs[i].len; ++j)
line.append(ConstantParameters.ITEMSEPARATOR
+ docFV.fvs[i].nodes[j].index + ConstantParameters.INDEXVALUESEPARATOR
+ docFV.fvs[i].nodes[j].value);
out.write(line.toString());
out.newLine();
}
} catch(IOException e) {
}
}*/
/** Write the FVs with labels of one document into file. */
/*void addDocFVsMultiLabelToFile(int index, BufferedWriter out,
LabelsOfFV[] multiLabels, DocFeatureVectors docFV) {
try {
out.write(new Integer(index) + ConstantParameters.ITEMSEPARATOR
+ new Integer(docFV.numInstances) + ConstantParameters.ITEMSEPARATOR
+ docFV.docId);
out.newLine();
for(int i = 0; i < docFV.numInstances; ++i) {
StringBuffer line = new StringBuffer();
line.append(new Integer(i + 1) + ConstantParameters.ITEMSEPARATOR
+ multiLabels[i].num);
for(int j = 0; j < multiLabels[i].num; ++j)
line.append(ConstantParameters.ITEMSEPARATOR
+ multiLabels[i].labels[j]);
for(int j = 0; j < docFV.fvs[i].len; ++j)
line.append(ConstantParameters.ITEMSEPARATOR
+ docFV.fvs[i].nodes[j].index + ConstantParameters.INDEXVALUESEPARATOR
+ docFV.fvs[i].nodes[j].value);
out.write(line.toString());
out.newLine();
}
} catch(IOException e) {
}
}*/
/** Order and select unlabelled documents for active learning. */
public void orderDocsWithModels(File wdResults,
LearningEngineSettings engineSettings) {
try {
// Reading the names of all documents and the total number of them
int numDocs = 0;
// Read the names of documents from a file
BufferedReader inDocsName = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(wdResults,
ConstantParameters.FILENAMEOFDocsName)), "UTF-8"));
String str = inDocsName.readLine();
numDocs = Integer.parseInt(str.substring(str.indexOf("=") + 1));
String[] docsName = new String[numDocs];
for(int i = 0; i < numDocs; ++i)
docsName[i] = inDocsName.readLine();
inDocsName.close();
// Read the selected document
int numDocsSelected;
Vector selectedDocs = new Vector();
File docsFile = new File(wdResults,
ConstantParameters.FILENAMEOFSelectedDOCForAL);
if(docsFile.exists())
numDocsSelected = obtainDocsForAL(docsFile, selectedDocs);
else {
System.out.println("!!! Warning: Cannot get the information about the "
+ "number of documents for selecting!!");
return;
}
boolean[] countedDocs = new boolean[numDocs];
int[] indexSortedDocs = new int[numDocs];
// first apply the current models to the data
File dataFile = new File(wdResults,
ConstantParameters.FILENAMEOFFVDataSelecting);
if(!dataFile.exists()) {
System.out
.println("!!! Warning: The data file named "
+ ConstantParameters.FILENAMEOFFVDataSelecting
+ " doesn't exist. The file should be used to store the fv data for selecting doc!");
return;
}
File modelFile = new File(wdResults, ConstantParameters.FILENAMEOFModels);
int learnerType;
learnerType = obtainLearnerType(engineSettings.learnerSettings.learnerName);
int numClasses = 0;
switch(learnerType){
case 1: // for weka learner
LogService.logMessage("Use weka learner.", 1);
System.out
.println("!! Warning: Currently there is no implementation for the Weka's learner "
+ "to select document for active learning.");
LogService.logMessage(
"!! Warning: Currently there is no implementation for the Weka's learner "
+ "to select document for active learning.", 1);
break;
case 2: // for learner of multi to binary conversion
LogService.logMessage("Multi to binary conversion.", 1);
// System.out.println("** multi to binary:");
String dataSetFile = null;
//get a learner
String learningCommand = engineSettings.learnerSettings.paramsOfLearning;
learningCommand = learningCommand.trim();
learningCommand = learningCommand.replaceAll("[ \t]+", " ");
SupervisedLearner paumLearner = MultiClassLearning
.obtainLearnerFromName(engineSettings.learnerSettings.learnerName,
learningCommand, dataSetFile);
paumLearner
.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
paumLearner
.setLearnerParams(engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage(
"The learners: " + paumLearner.getLearnerName(), 1);
MultiClassLearning chunkLearning = new MultiClassLearning(
engineSettings.multi2BinaryMode);
// read data
File tempDataFile= new File(wdResults,
ConstantParameters.TempFILENAMEofFVData);
boolean isUsingTempDataFile = false;
//if(paumLearner.getLearnerName().equals("SVMExec"))
//isUsingTempDataFile = true;
chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
// Apply the model to the data which was read already.
chunkLearning.apply(paumLearner, modelFile);
// labelsFVDoc = chunkLearning.dataFVinDoc.labelsFVDoc;
numClasses = chunkLearning.numClasses;
if(engineSettings.multi2BinaryMode == 2) // for one vs another mode
if(chunkLearning.numNull > 0)
numClasses = (numClasses + 1) * numClasses / 2;
else numClasses = (numClasses - 1) * numClasses / 2;
// Compute the margins for the instances in the document
// first set which documents were selected, which were not
for(int i = 0; i < numDocs; ++i) {
if(selectedDocs.contains(docsName[i]))
countedDocs[i] = false;
else countedDocs[i] = true;
}
// then compute the margins for documents
float[] marginsD = new float[numDocs];
float optB = (1 - ((SvmLibSVM)paumLearner).tau)
/ (1 + ((SvmLibSVM)paumLearner).tau);
computeDocBasedMargins(chunkLearning.dataFVinDoc.labelsFVDoc,
numClasses, engineSettings.multi2BinaryMode, optB,
engineSettings.alSetting, marginsD);
// get the biggest ones
// setting the smallest value to remove those not counted
for(int i = 0; i < numDocs; ++i) {
if(!countedDocs[i]) marginsD[i] = 99999;
}
LightWeightLearningApi.sortFloatAscIndex(marginsD, indexSortedDocs,
numDocs, numDocs);
// write the ranked documents into a file
BufferedWriter outDocs = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(new File(wdResults,
ConstantParameters.FILENAMEOFRankedDOCForAL)), "UTF-8"));
// String [] str =
// inDocs.readLine().split(ConstantParameters.ITEMSEPARATOR);
int numRanked = numDocs - numDocsSelected;
outDocs.append("##numDocsRanked=" + numRanked);
outDocs.newLine();
for(int i = 0; i < numDocs; ++i) {
int kk = numDocs - i - 1;
if(countedDocs[indexSortedDocs[kk]]) {
outDocs.append(docsName[indexSortedDocs[kk]]
+ ConstantParameters.ITEMSEPARATOR
+ marginsD[indexSortedDocs[kk]]);
outDocs.newLine();
}
}
outDocs.flush();
outDocs.close();
break;
default:
System.out.println("Error! Wrong learner type.");
LogService.logMessage("Error! Wrong learner type.", 0);
}
} catch(GateException e) {
e.printStackTrace();
} catch(IOException e) {
e.printStackTrace();
}
}
/**
* obtained the total number of docs and the selected docs for AL.
*
* @throws IOException
*/
private int obtainDocsForAL(File docsFile, List<String> selectedDocs)
throws IOException {
int numDocsSelected = 0;
BufferedReader inDocs = new BufferedReader(new InputStreamReader(
new FileInputStream(docsFile), "UTF-8"));
String str = inDocs.readLine();
while(str != null && str != "") {
selectedDocs.add(str);
str = inDocs.readLine();
++numDocsSelected;
}
inDocs.close();
return numDocsSelected;
}
/**
* compute the margins of tokens to class hyperplane and averaged over tokens
* in document
*/
private void computeDocBasedMargins(LabelsOfFeatureVectorDoc[] labelsFVDoc,
int numClasses, int multi2BinaryMode, float optB,
ActiveLearningSetting als, float[] marginsD) {
int numDoc = labelsFVDoc.length;
for(int nd = 0; nd < numDoc; ++nd) {
int numInstances = labelsFVDoc[nd].multiLabels.length;
float[] valueInst = new float[numInstances];
for(int i = 0; i < numClasses; ++i) {
for(int ni = 0; ni < numInstances; ++ni) {
valueInst[ni] = (float)UsefulFunctions
.inversesigmoid((double)labelsFVDoc[nd].multiLabels[ni].probs[i]);
if(multi2BinaryMode == 1) valueInst[ni] -= optB;
if(valueInst[ni] > 0) valueInst[ni] = -valueInst[ni]; // get the
// negative
// values to be
// used the
// sorting
// method
}// end of loop over all instances
// select the biggest
if(als.numTokensSelect > 0) {
int numExamples = als.numTokensSelect;
if(numExamples > numInstances) numExamples = numInstances;
int[] indexSort = new int[numExamples];
LightWeightLearningApi.sortFloatAscIndex(valueInst, indexSort,
numInstances, numExamples);
float sum = 0;
for(int j = 0; j < numExamples; ++j)
sum += valueInst[indexSort[j]]; // still use the negative values
sum /= numExamples;
marginsD[nd] -= sum; // because values are negative.
} else {
float sum = 0;
int k = 0;
for(int j = 0; j < numInstances; ++j)
if(valueInst[j] > -0.5) {
sum += valueInst[j];
++k;
}
if(k > 0)
sum /= k;
else sum = -1.0f;
marginsD[nd] -= sum; // because values are negative.
}
}// end of loop over all classes
}// end of loop over all documents
}
/**
* Obtain the term-frequence matrix for each documents from the feature vector
* files
*/
public void termfrequenceMatrix(File wdResults, int numDocs) {
try {
BufferedWriter outTFs = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(new File(wdResults,
ConstantParameters.FILENAMEOFTermFreqMatrix), true), "UTF-8"));
BufferedReader inFVs = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(wdResults,
ConstantParameters.FILENAMEOFFeatureVectorData)), "UTF-8"));
HashMap<Integer, String> indexTerm = new HashMap<Integer, String>();
for(Object obj : featuresList.featuresList.keySet()) {
indexTerm.put(
new Integer(featuresList.featuresList.get(obj).toString()), obj
.toString());
}
for(int nd = 0; nd < numDocs; ++nd) {
String[] ts = inFVs.readLine().split(ConstantParameters.ITEMSEPARATOR);
outTFs.append(ts[0] + " Documentname=\"" + ts[2] + "\", has " + ts[1]
+ " parts:");
outTFs.newLine();
int npart = Integer.parseInt(ts[1]);
for(int i = 0; i < npart; ++i) {
String[] ts1 = inFVs.readLine().split(
ConstantParameters.ITEMSEPARATOR);
HashMap<String, Integer> termFreq = new HashMap<String, Integer>();
int bindex = 2 + Integer.parseInt(ts1[1]);
for(int j = bindex; j < ts1.length; ++j) {
int isep = ts1[j].indexOf(ConstantParameters.INDEXVALUESEPARATOR);
Integer index = new Integer(ts1[j].substring(0, isep));
Integer valI = new Integer((int)(Float.parseFloat((ts1[j]
.substring(isep + 1)))));
termFreq.put(indexTerm.get(index), valI);
}
List<String> keys = new ArrayList<String>(termFreq.keySet());
Collections.sort(keys);
StringBuffer sb = new StringBuffer();
for(int j = 0; j < keys.size(); ++j) {
String ks = keys.get(j);
String str = ks;
if(str.contains("<>")) { // if it is a ngram feature
str = str.substring(str.indexOf("_", 1) + 1, str
.lastIndexOf("<>"));
sb.append(str + ConstantParameters.INDEXVALUESEPARATOR
+ termFreq.get(ks) + " ");
}
}
outTFs.append(sb.toString().trim());
outTFs.newLine();
}
}
inFVs.close();
outTFs.flush();
outTFs.close();
} catch(UnsupportedEncodingException e) {
e.printStackTrace();
} catch(FileNotFoundException e) {
e.printStackTrace();
} catch(IOException e) {
e.printStackTrace();
}
}
/** Copy the NLP features from tempory file to normal file. */
public void copyNLPFeat2NormalFile(File wdResults, int miNumDocsTraining) {
try {
BufferedWriter outNLPFeatures = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(new File(wdResults,
ConstantParameters.FILENAMEOFNLPFeaturesData), true), "UTF-8"));
BufferedReader inNLPFeaturesTemp = new BufferedReader(
new InputStreamReader(new FileInputStream(new File(wdResults,
ConstantParameters.FILENAMEOFNLPFeaturesDataTemp)), "UTF-8"));
String line = inNLPFeaturesTemp.readLine();
if(miNumDocsTraining == 0) {
outNLPFeatures.append(line);
outNLPFeatures.newLine();
}
line = inNLPFeaturesTemp.readLine();
while(line != null) {
outNLPFeatures.append(line);
outNLPFeatures.newLine();
line = inNLPFeaturesTemp.readLine();
}
inNLPFeaturesTemp.close();
outNLPFeatures.flush();
outNLPFeatures.close();
} catch(UnsupportedEncodingException e) {
e.printStackTrace();
} catch(FileNotFoundException e) {
e.printStackTrace();
} catch(IOException e) {
e.printStackTrace();
}
}
/**
* Training using the Java implementatoin of learning algorithms.
*/
public void trainingJava(int numDocs, LearningEngineSettings engineSettings)
throws GateException {
LogService.logMessage("\nTraining starts.\n", 1);
// The files for training data and model
File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
String fvFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFFeatureVectorData;
String nlpFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFNLPFeaturesData;
String modelFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFModels;
String labelInDataFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFLabelsInData;
String nlpDataLabelFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFNLPDataLabel;
File dataFile = new File(fvFileName);
File nlpDataFile = new File(nlpFileName);
File modelFile = new File(modelFileName);
File labelInData = new File(labelInDataFileName);
File nlpDataLabelFile = new File(nlpDataLabelFileName);
int learnerType = obtainLearnerType(engineSettings.learnerSettings.learnerName);
// benchmarking features
Map benchmarkingFeatures = new HashMap();
switch(learnerType){
case 1: // for weka learner
LogService.logMessage("Use weka learner.", 1);
WekaLearning wekaL = new WekaLearning();
short featureType = WekaLearning
.obtainWekaLeanerDataType(engineSettings.learnerSettings.learnerName);
// Convert and read training data
switch(featureType){
case WekaLearning.NLPFEATUREFVDATA:
// Transfer the labels in nlpDataFile into
// the label in the sparse data
// and collect the labels and write them into a file
long startTime = Benchmark.startPoint();
convertNLPLabelsTDataLabel(nlpDataFile, dataFile, labelInData,
nlpDataLabelFile, numDocs, engineSettings.surround);
Benchmark.checkPoint(startTime, benchmarkID + "."
+ "nlpLabelsToDataLabels", this, benchmarkingFeatures);
startTime = Benchmark.startPoint();
benchmarkingFeatures.put("nlpFeaturesFile", nlpDataLabelFile
.getAbsolutePath());
wekaL.readNLPFeaturesFromFile(nlpDataLabelFile, numDocs,
this.featuresList, true, labelsAndId.label2Id.size(),
engineSettings.surround);
Benchmark.checkPoint(startTime, benchmarkID + "."
+ "readingNlpFeatures", this, benchmarkingFeatures);
benchmarkingFeatures.remove("nlpFeaturesFile");
break;
case WekaLearning.SPARSEFVDATA:
startTime = Benchmark.startPoint();
benchmarkingFeatures.put("featureVectorFile", dataFile
.getAbsolutePath());
wekaL.readSparseFVsFromFile(dataFile, numDocs, true,
labelsAndId.label2Id.size(), engineSettings.surround);
Benchmark.checkPoint(startTime, benchmarkID + "." + "readingFVs",
this, benchmarkingFeatures);
benchmarkingFeatures.remove("featureVectorFile");
break;
}
// Get the wekaLearner from the learnername
WekaLearner wekaLearner = WekaLearning.obtainWekaLearner(
engineSettings.learnerSettings.learnerName,
engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage("Weka learner name: "
+ wekaLearner.getLearnerName(), 1);
long startTime = Benchmark.startPoint();
benchmarkingFeatures.put("modelFile", modelFile.getAbsolutePath());
// Training.
wekaL.train(wekaLearner, modelFile);
Benchmark.checkPoint(startTime,
benchmarkID + "." + "wekaModelTraining", this, benchmarkingFeatures);
benchmarkingFeatures.remove("modelFile");
break;
case 2: // for learner of multi to binary conversion
LogService.logMessage("Multi to binary conversion.", 1);
// get a learner
String learningCommand = "";
if(engineSettings.learnerSettings.executableTraining != null)
learningCommand = engineSettings.learnerSettings.executableTraining+ " ";
learningCommand += engineSettings.learnerSettings.paramsOfLearning;
learningCommand = learningCommand.trim();
learningCommand = learningCommand.replaceAll("[ \t]+", " ");
String dataSetFile = null;
SupervisedLearner paumLearner = MultiClassLearning
.obtainLearnerFromName(engineSettings.learnerSettings.learnerName,
learningCommand, dataSetFile);
paumLearner
.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
paumLearner
.setLearnerParams(engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage("The learners: " + paumLearner.getLearnerName(),
1);
if(LogService.minVerbosityLevel > 1)
System.out.println("Using the "+ paumLearner.getLearnerName());
MultiClassLearning chunkLearning = new MultiClassLearning(
engineSettings.multi2BinaryMode);
if(engineSettings.multiBinaryExecutor != null) {
chunkLearning.setExecutor(engineSettings.multiBinaryExecutor);
}
startTime = Benchmark.startPoint();
benchmarkingFeatures.put("dataFile", dataFile.getAbsolutePath());
// read data
File tempDataFile= new File(wdResults,
ConstantParameters.TempFILENAMEofFVData);
boolean isUsingTempDataFile = false;
if(paumLearner.getLearnerName().equals("SVMExec") ||
paumLearner.getLearnerName().equals("PAUMExec") )
isUsingTempDataFile = true; //using the temp data file
chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
Benchmark.checkPoint(startTime, benchmarkID + "."
+ "readingChunkLearningData", this, benchmarkingFeatures);
benchmarkingFeatures.remove("dataFile");
LogService.logMessage("The number of classes in dataset: "
+ chunkLearning.numClasses, 1);
// training
startTime = Benchmark.startPoint();
benchmarkingFeatures.put("modelFile", modelFile.getAbsolutePath());
//using different method for one thread or multithread
if(engineSettings.numThreadUsed >1 )//for using thread
chunkLearning.training(paumLearner, modelFile);
else //for not using thread
chunkLearning.trainingNoThread(paumLearner, modelFile, isUsingTempDataFile, tempDataFile);
Benchmark.checkPoint(startTime,
benchmarkID + "." + "paumModelTraining", this, benchmarkingFeatures);
benchmarkingFeatures.remove("modelFile");
break;
default:
System.out.println("Error! Wrong learner type.");
LogService.logMessage("Error! Wrong learner type.", 0);
}
}
/**
* Apply the model to data, also using the learning algorithm implemented in
* Java.
*/
public void applyModelInJava(Corpus corpus, int startDocId, int endDocId,
String labelName, LearningEngineSettings engineSettings, String fvFileName)
throws GateException {
int numDocs = endDocId - startDocId;
LogService.logMessage("\nApplication starts.", 1);
// The files for training data and model
File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
// String fvFileName = wdResults.toString() + File.separator
// + ConstantParameters.FILENAMEOFFeatureVectorData;
String nlpFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFNLPFeaturesData;
String modelFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFModels;
// String labelInDataFileName = wdResults.toString() + File.separator
// + ConstantParameters.FILENAMEOFLabelsInData;
File dataFile = new File(fvFileName);
File nlpDataFile = new File(nlpFileName);
File modelFile = new File(modelFileName);
int learnerType;
learnerType = obtainLearnerType(engineSettings.learnerSettings.learnerName);
int numClasses = 0;
// Store the label information from the model application
LabelsOfFeatureVectorDoc[] labelsFVDoc = null;
short featureType = WekaLearning.SPARSEFVDATA;
Map benchmarkingFeatures = new HashMap();
switch(learnerType){
case 1: // for weka learner
LogService.logMessage("Use weka learner.", 1);
WekaLearning wekaL = new WekaLearning();
// Check if the learner uses the sparse feaature vectors or NLP
// features
featureType = WekaLearning
.obtainWekaLeanerDataType(engineSettings.learnerSettings.learnerName);
long startTime = Benchmark.startPoint();
switch(featureType){
case WekaLearning.NLPFEATUREFVDATA:
wekaL.readNLPFeaturesFromFile(nlpDataFile, numDocs,
this.featuresList, false, labelsAndId.label2Id.size(),
engineSettings.surround);
break;
case WekaLearning.SPARSEFVDATA:
wekaL.readSparseFVsFromFile(dataFile, numDocs, false,
labelsAndId.label2Id.size(), engineSettings.surround);
break;
}
Benchmark.checkPoint(startTime, benchmarkID + "."
+ "readingNlpFeatures", this, benchmarkingFeatures);
// Check if the weka learner has distribute output of classify
boolean distributionOutput = WekaLearning
.obtainWekaLearnerOutputType(engineSettings.learnerSettings.learnerName);
// Get the wekaLearner from the learnername
WekaLearner wekaLearner = WekaLearning.obtainWekaLearner(
engineSettings.learnerSettings.learnerName,
engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage("Weka learner name: "
+ wekaLearner.getLearnerName(), 1);
// Application
startTime = Benchmark.startPoint();
benchmarkingFeatures.put("modelFile", modelFile.getAbsolutePath());
wekaL.apply(wekaLearner, modelFile, distributionOutput);
Benchmark.checkPoint(startTime, benchmarkID + "."
+ "wekaModelApplication", this, benchmarkingFeatures);
benchmarkingFeatures.remove("modelFile");
labelsFVDoc = wekaL.labelsFVDoc;
numClasses = labelsAndId.label2Id.size() * 2; // subtract the
// null class
break;
case 2: // for learner of multi to binary conversion
LogService.logMessage("Multi to binary conversion.", 1);
// System.out.println("** multi to binary:");
// get a learner
String learningCommand = engineSettings.learnerSettings.paramsOfLearning;
learningCommand = learningCommand.trim();
learningCommand = learningCommand.replaceAll("[ \t]+", " ");
String dataSetFile = null;
SupervisedLearner paumLearner = MultiClassLearning
.obtainLearnerFromName(engineSettings.learnerSettings.learnerName,
learningCommand, dataSetFile);
paumLearner
.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
paumLearner
.setLearnerParams(engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage("The learners: " + paumLearner.getLearnerName(),
1);
MultiClassLearning chunkLearning = new MultiClassLearning(
engineSettings.multi2BinaryMode);
// read data
startTime = Benchmark.startPoint();
//get the fv data
File tempDataFile= new File(wdResults,
ConstantParameters.TempFILENAMEofFVData);
boolean isUsingTempDataFile = false;
//if(paumLearner.getLearnerName().equals("SVMExec"))
//isUsingTempDataFile = true;
chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
Benchmark.checkPoint(startTime, benchmarkID + "."
+ "readingChunkLearningData", this, benchmarkingFeatures);
// apply
startTime = Benchmark.startPoint();
benchmarkingFeatures.put("modelFile", modelFile.getAbsolutePath());
// using different method for one thread or multithread
if(engineSettings.numThreadUsed>1) //for using thread
chunkLearning.apply(paumLearner, modelFile);
else //for not using thread
chunkLearning.applyNoThread(paumLearner, modelFile);
Benchmark.checkPoint(startTime, benchmarkID + "."
+ "paumModelApplication", this, benchmarkingFeatures);
benchmarkingFeatures.remove("modelFile");
labelsFVDoc = chunkLearning.dataFVinDoc.labelsFVDoc;
numClasses = chunkLearning.numClasses;
break;
default:
System.out.println("Error! Wrong learner type.");
LogService.logMessage("Error! Wrong learner type.", 0);
}
if(engineSettings.surround) {
String featName = engineSettings.datasetDefinition.arrs.classFeature;
String instanceType = engineSettings.datasetDefinition.getInstanceType();
labelsAndId = new Label2Id();
labelsAndId.loadLabelAndIdFromFile(wdResults,
ConstantParameters.FILENAMEOFLabelList);
// post-processing and add new annotation to the text
PostProcessing postPr = new PostProcessing(
engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
engineSettings.thrClassificationProb);
// System.out.println("** Application mode:");
long startTime = Benchmark.startPoint();
for(int i = 0; i < numDocs; ++i) {
HashSet chunks = new HashSet();
postPr.postProcessingChunk((short)3, labelsFVDoc[i].multiLabels,
numClasses, chunks, chunkLenHash);
// System.out.println("**
// documentName="+((Document)corpus.get(i)).getName());
Document toProcess = (Document)corpus.get(i + startDocId);
addAnnsInDoc(toProcess, chunks, instanceType, featName, labelName,
labelsAndId);
if(toProcess.getDataStore() != null && corpus.getDataStore() != null) {
corpus.getDataStore().sync(corpus);
Factory.deleteResource(toProcess);
}
}
Benchmark.checkPoint(startTime, benchmarkID + "." + "postProcessing",
this, benchmarkingFeatures);
} else {
String featName = engineSettings.datasetDefinition.arrs.classFeature;
String instanceType = engineSettings.datasetDefinition.getInstanceType();
labelsAndId = new Label2Id();
labelsAndId.loadLabelAndIdFromFile(wdResults,
ConstantParameters.FILENAMEOFLabelList);
// post-processing and add new annotation to the text
// PostProcessing postPr = new PostProcessing(0.42, 0.2);
PostProcessing postPr = new PostProcessing(
engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
engineSettings.thrClassificationProb);
for(int i = 0; i < numDocs; ++i) {
int[] selectedLabels = new int[labelsFVDoc[i].multiLabels.length];
float[] valuesLabels = new float[labelsFVDoc[i].multiLabels.length];
postPr.postProcessingClassification((short)3,
labelsFVDoc[i].multiLabels, selectedLabels, valuesLabels);
Document toProcess = (Document)corpus.get(i + startDocId);
// Add the ranked label list and their scores, not just a single label
// addLabelListInDocClassification(toProcess,
// labelsFVDoc[i].multiLabels,
// instanceType, featName, labelName, labelsAndId, engineSettings);
addAnnsInDocClassification(toProcess, selectedLabels, valuesLabels,
instanceType, featName, labelName, labelsAndId, engineSettings);
if(toProcess.getDataStore() != null && corpus.getDataStore() != null) {
corpus.getDataStore().sync(corpus);
Factory.deleteResource(toProcess);
}
}
}
}
/**
* Apply the model to one document, also using the learning algorithm
* implemented in Java.
*/
public void applyModelInJavaPerDoc(Document doc, String labelName,
LearningEngineSettings engineSettings) throws GateException {
LogService.logMessage("\nApplication starts.", 1);
// The files for training data and model
File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
String fvFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFFeatureVectorData;
String nlpFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFNLPFeaturesData;
String modelFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFModels;
// String labelInDataFileName = wdResults.toString() + File.separator
// + ConstantParameters.FILENAMEOFLabelsInData;
File dataFile = new File(fvFileName);
File nlpDataFile = new File(nlpFileName);
File modelFile = new File(modelFileName);
int learnerType;
learnerType = obtainLearnerType(engineSettings.learnerSettings.learnerName);
int numClasses = 0;
int numDocs = 1; // corpus.size();
// Store the label information from the model application
LabelsOfFeatureVectorDoc[] labelsFVDoc = null;
short featureType = WekaLearning.SPARSEFVDATA;
switch(learnerType){
case 1: // for weka learner
LogService.logMessage("Use weka learner.", 1);
WekaLearning wekaL = new WekaLearning();
// Check if the learner uses the sparse feaature vectors or NLP
// features
featureType = WekaLearning
.obtainWekaLeanerDataType(engineSettings.learnerSettings.learnerName);
switch(featureType){
case WekaLearning.NLPFEATUREFVDATA:
wekaL.readNLPFeaturesFromFile(nlpDataFile, numDocs,
this.featuresList, false, labelsAndId.label2Id.size(),
engineSettings.surround);
break;
case WekaLearning.SPARSEFVDATA:
wekaL.readSparseFVsFromFile(dataFile, numDocs, false,
labelsAndId.label2Id.size(), engineSettings.surround);
break;
}
// Check if the weka learner has distribute output of classify
boolean distributionOutput = WekaLearning
.obtainWekaLearnerOutputType(engineSettings.learnerSettings.learnerName);
// Get the wekaLearner from the learnername
WekaLearner wekaLearner = WekaLearning.obtainWekaLearner(
engineSettings.learnerSettings.learnerName,
engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage("Weka learner name: "
+ wekaLearner.getLearnerName(), 1);
// Training.
wekaL.apply(wekaLearner, modelFile, distributionOutput);
labelsFVDoc = wekaL.labelsFVDoc;
numClasses = labelsAndId.label2Id.size() * 2; // subtract the
// null class
break;
case 2: // for learner of multi to binary conversion
LogService.logMessage("Multi to binary conversion.", 1);
// get a learner
String learningCommand = engineSettings.learnerSettings.paramsOfLearning;
learningCommand = learningCommand.trim();
learningCommand = learningCommand.replaceAll("[ \t]+", " ");
String dataSetFile = null;
SupervisedLearner paumLearner = MultiClassLearning
.obtainLearnerFromName(engineSettings.learnerSettings.learnerName,
learningCommand, dataSetFile);
paumLearner
.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
paumLearner
.setLearnerParams(engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage("The learners: " + paumLearner.getLearnerName(),
1);
// System.out.println("** multi to binary:");
MultiClassLearning chunkLearning = new MultiClassLearning(
engineSettings.multi2BinaryMode);
// read data
// get the fv data
File tempDataFile= new File(wdResults,
ConstantParameters.TempFILENAMEofFVData);
boolean isUsingTempDataFile = false;
//if(paumLearner.getLearnerName().equals("SVMExec"))
//isUsingTempDataFile = true;
chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
// dataFile);
// apply
// using different method for one thread or multithread
if(engineSettings.numThreadUsed>1) //for using thread
chunkLearning.apply(paumLearner, modelFile);
else //for not using thread
chunkLearning.applyNoThread(paumLearner, modelFile);
labelsFVDoc = chunkLearning.dataFVinDoc.labelsFVDoc;
numClasses = chunkLearning.numClasses;
break;
default:
System.out.println("Error! Wrong learner type.");
LogService.logMessage("Error! Wrong learner type.", 0);
}
if(engineSettings.surround) {
String featName = engineSettings.datasetDefinition.arrs.classFeature;
String instanceType = engineSettings.datasetDefinition.getInstanceType();
labelsAndId = new Label2Id();
labelsAndId.loadLabelAndIdFromFile(wdResults,
ConstantParameters.FILENAMEOFLabelList);
// post-processing and add new annotation to the text
PostProcessing postPr = new PostProcessing(
engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
engineSettings.thrClassificationProb);
// System.out.println("** Application mode:");
for(int i = 0; i < numDocs; ++i) {
HashSet chunks = new HashSet();
postPr.postProcessingChunk((short)3, labelsFVDoc[i].multiLabels,
numClasses, chunks, chunkLenHash);
// System.out.println("**
// documentName="+((Document)corpus.get(i)).getName());
addAnnsInDoc(doc, chunks, instanceType, featName, labelName,
labelsAndId);
}
} else {
String featName = engineSettings.datasetDefinition.arrs.classFeature;
String instanceType = engineSettings.datasetDefinition.getInstanceType();
labelsAndId = new Label2Id();
labelsAndId.loadLabelAndIdFromFile(wdResults,
ConstantParameters.FILENAMEOFLabelList);
// post-processing and add new annotation to the text
// PostProcessing postPr = new PostProcessing(0.42, 0.2);
PostProcessing postPr = new PostProcessing(
engineSettings.thrBoundaryProb, engineSettings.thrEntityProb,
engineSettings.thrClassificationProb);
for(int i = 0; i < numDocs; ++i) { // numDocs is always 1
int[] selectedLabels = new int[labelsFVDoc[i].multiLabels.length];
float[] valuesLabels = new float[labelsFVDoc[i].multiLabels.length];
postPr.postProcessingClassification((short)3,
labelsFVDoc[i].multiLabels, selectedLabels, valuesLabels);
addAnnsInDocClassification(doc, selectedLabels, valuesLabels,
instanceType, featName, labelName, labelsAndId, engineSettings);
}
}
}
/**
* Add the annotation into documents for chunk learning.
*
* @throws InvalidOffsetException
*/
private void addAnnsInDoc(Document doc, HashSet chunks, String instanceType,
String featName, String labelName, Label2Id labelsAndId)
throws InvalidOffsetException {
AnnotationSet annsDoc = null;
if(inputASName == null || inputASName.trim().length() == 0) {
annsDoc = doc.getAnnotations();
} else {
annsDoc = doc.getAnnotations(inputASName);
}
AnnotationSet annsDocResults = null;
if(outputASName == null || outputASName.trim().length() == 0) {
annsDocResults = doc.getAnnotations();
} else {
annsDocResults = doc.getAnnotations(outputASName);
}
AnnotationSet anns = annsDoc.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
for(Object obj : chunks) {
ChunkOrEntity entity = (ChunkOrEntity)obj;
FeatureMap features = Factory.newFeatureMap();
features.put(featName, labelsAndId.id2Label.get(
new Integer(entity.name).toString()).toString());
features.put("prob", entity.prob);
Annotation token1 = (Annotation)annotationArray.get(entity.start);
Annotation token2 = (Annotation)annotationArray.get(entity.end);
Node entityS = token1.getStartNode();
Node entityE = token2.getEndNode();
if(entityS != null && entityE != null)
annsDocResults.add(entityS.getOffset(), entityE.getOffset(), labelName,
features);
}
}
/**
* Add the annotation into documents for classification.
*
* @throws InvalidOffsetException
*/
private void addAnnsInDocClassification(Document doc, int[] selectedLabels,
float[] valuesLabels, String instanceType, String featName,
String labelName, Label2Id labelsAndId,
LearningEngineSettings engineSettings) throws InvalidOffsetException {
AnnotationSet annsDoc = null;
if(inputASName == null || inputASName.trim().length() == 0) {
annsDoc = doc.getAnnotations();
} else {
annsDoc = doc.getAnnotations(inputASName);
}
AnnotationSet annsDocResults = null;
if(outputASName == null || outputASName.trim().length() == 0) {
annsDocResults = doc.getAnnotations();
} else {
annsDocResults = doc.getAnnotations(outputASName);
}
AnnotationSet annsLabel = annsDoc.get(labelName);
AnnotationSet anns = annsDoc.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
// For the relation extraction
String arg1F = null;
String arg2F = null;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
AttributeRelation relAtt = (AttributeRelation)engineSettings.datasetDefinition.classAttribute;
arg1F = relAtt.getArg1();
arg2F = relAtt.getArg2();
}
for(int i = 0; i < annotationArray.size(); ++i) {
if(selectedLabels[i] < 0) continue;
FeatureMap features = Factory.newFeatureMap();
features.put(featName, labelsAndId.id2Label.get(
new Integer(selectedLabels[i] + 1).toString()).toString());
features.put("prob", valuesLabels[i]);
Annotation ann = (Annotation)annotationArray.get(i);
// For relation data, need the argument features
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
String arg1V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg1Feat).toString();
String arg2V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg2Feat).toString();
features.put(arg1F, arg1V);
features.put(arg2F, arg2V);
}
// FeatureMap featO = ann.getFeatures();
// for(Object obj:features.keySet()) {
// if(featO.containsKey(obj))
// featO.put(obj.toString()+"_results", features.get(obj));
// else featO.put(obj, features.get(obj));
// }
annsDocResults.add(ann.getStartNode().getOffset(), ann.getEndNode()
.getOffset(), labelName, features);
}
}
/**
* Add a ranked list of label for each example in documents for
* classification, not just a single label.
*
* @throws InvalidOffsetException
*/
private void addLabelListInDocClassification(Document doc,
LabelsOfFV[] multiLabels, String instanceType, String featName,
String labelName, Label2Id labelsAndId,
LearningEngineSettings engineSettings) throws InvalidOffsetException {
AnnotationSet annsDoc = null;
if(inputASName == null || inputASName.trim().length() == 0) {
annsDoc = doc.getAnnotations();
} else {
annsDoc = doc.getAnnotations(inputASName);
}
AnnotationSet annsDocResults = null;
if(outputASName == null || outputASName.trim().length() == 0) {
annsDocResults = doc.getAnnotations();
} else {
annsDocResults = doc.getAnnotations(outputASName);
}
AnnotationSet anns = annsDoc.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
// For the relation extraction
String arg1F = null;
String arg2F = null;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
AttributeRelation relAtt = (AttributeRelation)engineSettings.datasetDefinition.classAttribute;
arg1F = relAtt.getArg1();
arg2F = relAtt.getArg2();
}
for(int i = 0; i < annotationArray.size(); ++i) {
int len = multiLabels[i].num;
int[] indexSort = new int[len];
sortFloatAscIndex(multiLabels[i].probs, indexSort, len, len);
// get the labels and their scores
StringBuffer strB = new StringBuffer();
for(int j = 0; j < len; ++j) {
String label = labelsAndId.id2Label.get(
new Integer(indexSort[j] + 1).toString()).toString();
strB.append(label + ":" + multiLabels[i].probs[indexSort[j]] + " ");
}
FeatureMap features = Factory.newFeatureMap();
features.put(featName, strB.toString().trim());
// features.put("prob", valuesLabels[i]);
Annotation ann = (Annotation)annotationArray.get(i);
// For relation data, need the argument features
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
String arg1V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg1Feat).toString();
String arg2V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg2Feat).toString();
features.put(arg1F, arg1V);
features.put(arg2F, arg2V);
}
FeatureMap featO = ann.getFeatures();
for(Object obj : features.keySet()) {
// if(featO.containsKey(obj))
featO.put(obj.toString() + "_resultsList", features.get(obj));
// else featO.put(obj, features.get(obj));
}
// FeatureMap featAdd = ann.
// for(Object obj:featO.keySet()) {
// annsDocResults.add(ann.).getFeatures().put(obj, featO.get(obj));
// }
annsDocResults.add(ann.getStartNode().getOffset(), ann.getEndNode()
.getOffset(), labelName, featO);
// annsDoc.add(ann.getStartNode(), ann.getEndNode(), labelName, features);
}
}
/** Convert the string labels in the nlp data file into the index labels. */
public void convertNLPLabelsTDataLabel(File nlpDataFile, File dataFile,
File labelInDataFile, File nlpDataLabelFile, int numDocs,
boolean surroundingMode) {
try {
BufferedReader inData = new BufferedReader(new InputStreamReader(
new FileInputStream(dataFile), "UTF-8"));
BufferedReader inNlpData = new BufferedReader(new InputStreamReader(
new FileInputStream(nlpDataFile), "UTF-8"));
BufferedWriter outNlpDataLabel = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(nlpDataLabelFile), "UTF-8"));
HashSet uniqueLabels = new HashSet();
// The head line of NLP feature file
String line = inNlpData.readLine();
outNlpDataLabel.append(line);
outNlpDataLabel.newLine();
String[] items;
// For each document
for(int iDoc = 0; iDoc < numDocs; ++iDoc) {
int numLabels;
line = inData.readLine();
items = line.split(ConstantParameters.ITEMSEPARATOR);
int num = Integer.parseInt(items[1]);
line = inNlpData.readLine();
items = line.split(ConstantParameters.ITEMSEPARATOR);
line = items[0]+ ConstantParameters.ITEMSEPARATOR+items[1]+ ConstantParameters.ITEMSEPARATOR+
+num;
outNlpDataLabel.append(line);
outNlpDataLabel.newLine();
// For each instance
for(int i = 0; i < num; ++i) {
// Read the line from data file and get the data label
line = inData.readLine();
items = line.split(ConstantParameters.ITEMSEPARATOR);
numLabels = Integer.parseInt(items[1]);
StringBuffer labels = new StringBuffer();
labels.append(items[1]);
//System.out.println("line="+line+", items[1]="+items[1]);
for(int j = 0; j < numLabels; ++j) {
labels.append(ConstantParameters.ITEMSEPARATOR);
labels.append(items[j + 2]);
if(!uniqueLabels.contains(items[j + 2]))
uniqueLabels.add(items[j + 2]);
}
outNlpDataLabel.append(labels.toString());
// Read the line from NLP feature and get the features
line = inNlpData.readLine();
items = line.split(ConstantParameters.ITEMSEPARATOR);
numLabels = Integer.parseInt(items[0]);
StringBuffer nlpFeats = new StringBuffer();
for(int j = numLabels + 1; j < items.length; ++j) {
nlpFeats.append(ConstantParameters.ITEMSEPARATOR);
nlpFeats.append(items[j]);
}
outNlpDataLabel.append(nlpFeats);
outNlpDataLabel.newLine();
}
}
outNlpDataLabel.flush();
outNlpDataLabel.close();
inData.close();
inNlpData.close();
BufferedWriter labelInData = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(labelInDataFile), "UTF-8"));
labelInData.append(uniqueLabels.size() + " #total_labels");
labelInData.newLine();
for(Object obj : uniqueLabels) {
labelInData.append(obj.toString());
labelInData.newLine();
}
labelInData.flush();
labelInData.close();
} catch(FileNotFoundException e) {
e.printStackTrace();
} catch(IOException e) {
e.printStackTrace();
}
}
/**
* Flitering out the negative examples of the training data using the SVM.
*
* @throws GateException
*/
public void FilteringNegativeInstsInJava(int numDocs,
LearningEngineSettings engineSettings) throws GateException {
LogService.logMessage("\nFiltering starts.", 1);
// The files for training data and model
File wdResults = new File(wd, ConstantParameters.SUBDIRFORRESULTS);
String fvFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFFeatureVectorData;
String modelFileName = wdResults.toString() + File.separator
+ ConstantParameters.FILENAMEOFModels;
File dataFile = new File(fvFileName);
File modelFile = new File(modelFileName);
// for learner of multi to binary conversion
LogService.logMessage("Multi to binary conversion.", 1);
MultiClassLearning chunkLearning = new MultiClassLearning(
engineSettings.multi2BinaryMode);
// read data
File tempDataFile= new File(wdResults,
ConstantParameters.TempFILENAMEofFVData);
boolean isUsingTempDataFile = false;
//if(paumLearner.getLearnerName().equals("SVMExec"))
//isUsingTempDataFile = true; //using the temp data file
chunkLearning.getDataFromFile(numDocs, dataFile, isUsingTempDataFile, tempDataFile);
// Back up the label data before it is reset.
LabelsOfFeatureVectorDoc[] labelsFVDocB = new LabelsOfFeatureVectorDoc[numDocs];
for(int i = 0; i < numDocs; ++i) {
labelsFVDocB[i] = new LabelsOfFeatureVectorDoc();
labelsFVDocB[i].multiLabels = new LabelsOfFV[chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels.length];
for(int j = 0; j < chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels.length; ++j)
labelsFVDocB[i].multiLabels[j] = chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels[j];
}
// Reset the class label of data for binary class for filtering
// purpose
int numNeg; // number of negative example in the training data
numNeg = chunkLearning.resetClassInData();
if(numNeg == 0) {// No negative example (null lable) at all in training
// data
LogService.logMessage(
"!Cannot do the filtering, because there is no negative examples"
+ "in the training data for filtering!", 1);
if(LogService.minVerbosityLevel > 0)
System.out
.println("!Cannot do the filtering, because there is no negative examples"
+ "in the training data for filtering!");
return;
}
LogService.logMessage("The number of classes in dataset: "
+ chunkLearning.numClasses, 1);
// Use the SVM only for filtering
String dataSetFile = null;
SupervisedLearner paumLearner = MultiClassLearning.obtainLearnerFromName(
"SVMLibSvmJava", "-c 1.0 -t 0 -m 100 -tau 1.0 ", dataSetFile);
paumLearner
.setLearnerExecutable(engineSettings.learnerSettings.executableTraining);
paumLearner
.setLearnerParams(engineSettings.learnerSettings.paramsOfLearning);
LogService.logMessage("The learners: " + paumLearner.getLearnerName(), 1);
// training
// if number of classes is zero, not filtering at all
if(chunkLearning.numClasses == 0) {
LogService.logMessage(
"!Cannot do the filtering, because there is no positive examples"
+ "in the training data!", 1);
if(LogService.minVerbosityLevel > 0)
System.out
.println("!Cannot do the filtering, because there is no positive examples"
+ "in the training data!");
return;
}
// using different method for one thread or multithread
if(engineSettings.numThreadUsed>1) {//using thread
chunkLearning.training(paumLearner, modelFile);
// applying the learning model to training example and get the
// confidence score for each example
chunkLearning.apply(paumLearner, modelFile);
} else { //not using thread
chunkLearning.trainingNoThread(paumLearner, modelFile, isUsingTempDataFile, tempDataFile);
chunkLearning.applyNoThread(paumLearner, modelFile);
}
// Store the scores of negative examples.
float[] scoresNegB = new float[numNeg];
float[] scoresNeg = new float[numNeg];
int kk = 0;
for(int i = 0; i < labelsFVDocB.length; ++i) {
for(int j = 0; j < labelsFVDocB[i].multiLabels.length; ++j)
if(labelsFVDocB[i].multiLabels[j].num == 0) {
// System.out.println("(i, j, kk)="+i+","+j+","+kk+"*");
scoresNeg[kk++] = chunkLearning.dataFVinDoc.labelsFVDoc[i].multiLabels[j].probs[0];
}
}
// If want to remove the negative that are close to positive one,
// reverse the scores.
if(engineSettings.filteringNear) for(int i = 0; i < numNeg; ++i)
scoresNeg[i] = -scoresNeg[i];
// Back up the score before sorting
for(int i = 0; i < numNeg; ++i)
scoresNegB[i] = scoresNeg[i];
// Sort those scores
Arrays.sort(scoresNeg);
// int index = numNeg -
// (int)Math.floor(numNeg*engineSettings.filteringRatio);
int index = (int)Math.floor(numNeg * engineSettings.filteringRatio);
if(index >= numNeg) index = numNeg - 1;
if(index < 0) index = 0;
float thrFiltering = scoresNeg[index];
boolean[] isFiltered = new boolean[numNeg];
for(int i = 0; i < numNeg; ++i)
if(scoresNegB[i] < thrFiltering)
isFiltered[i] = true;
else isFiltered[i] = false;
// Write the filtered data into the data file
BufferedWriter out;
try {
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
dataFile), "UTF-8"));
numNeg = 0;
for(int i = 0; i < labelsFVDocB.length; ++i) {
int kk1 = 0;
int numK = 0; // num of instances in the doc to be kept
for(int j = 0; j < labelsFVDocB[i].multiLabels.length; ++j)
if(labelsFVDocB[i].multiLabels[j].num == 0) {
if(!isFiltered[numNeg + kk1]) ++numK;
++kk1;
} else {
++numK;
}
out.write(i + ConstantParameters.ITEMSEPARATOR + numK
+ ConstantParameters.ITEMSEPARATOR
+ chunkLearning.dataFVinDoc.trainingFVinDoc[i].docId);
out.newLine();
kk1 = 0;
for(int j = 0; j < labelsFVDocB[i].multiLabels.length; ++j) {
if(labelsFVDocB[i].multiLabels[j].num > 0
|| !isFiltered[numNeg + kk1]) {
StringBuffer line = new StringBuffer();
line.append(j + ConstantParameters.ITEMSEPARATOR
+ labelsFVDocB[i].multiLabels[j].num);
for(int j1 = 0; j1 < labelsFVDocB[i].multiLabels[j].num; ++j1) {
line.append(ConstantParameters.ITEMSEPARATOR
+ labelsFVDocB[i].multiLabels[j].labels[j1]);
}
SparseFeatureVector fv = chunkLearning.dataFVinDoc.trainingFVinDoc[i].fvs[j];
for(int j1 = 0; j1 < fv.len; ++j1)
line.append(ConstantParameters.ITEMSEPARATOR + fv.nodes[j1].index
+ ConstantParameters.INDEXVALUESEPARATOR + fv.nodes[j1].value);
out.write(line.toString());
out.newLine();
}
if(labelsFVDocB[i].multiLabels[j].num == 0) ++kk1;
}
numNeg += kk1;
}
out.flush();
out.close();
} catch(IOException e) {
e.printStackTrace();
}
}
/**
* Determining the type of the learner, it is from Weka or not.
*/
public static int obtainLearnerType(String learnerName) throws GateException {
if(learnerName.equals("SVMLibSvmJava") || learnerName.equals("C4.5Weka")
|| learnerName.equals("KNNWeka") || learnerName.equals("NaiveBayesWeka")
|| learnerName.equals("PAUM") || learnerName.equals("SVMExec") ||
learnerName.equals("PAUMExec")) {
if(learnerName.endsWith("Weka")) {
return 1;
} else {
return 2;
}
} else {
throw new GateException("The learning engine named as \"" + learnerName
+ "\" is not defined!");
}
}
/** Convert and view the SVM models in term of NLP features. */
public void viewSVMmodelsInNLPFeatures(File modelFile,
LearningEngineSettings engineSettings) {
try {
// First decide if the learning model is linear one or not
if(!(engineSettings.learnerSettings.learnerName
.equalsIgnoreCase("SVMLibSvmJava") && (!engineSettings.learnerSettings.paramsOfLearning
.contains("-t ") || engineSettings.learnerSettings.paramsOfLearning
.contains("-t 0")))) {
System.out
.println("According to the configuration file, "
+ "the model file is not linear svm model. Hence it cannot be displayed in the "
+ "current implementation!");
return;
}
int numP0, numN0;
boolean surroundMode;
numP0 = engineSettings.numPosSVMModel;
numN0 = engineSettings.numNegSVMModel;
surroundMode = engineSettings.surround;
// Open the mode file and read the model
HashMap featId2Form = new HashMap();
for(Object obj : featuresList.featuresList.keySet()) {
int k = Integer.parseInt(featuresList.featuresList.get(obj).toString());
featId2Form.put(k, obj);
}
// Need some methods from MultiClassLearning
MultiClassLearning mulL = new MultiClassLearning();
// Open the mode file and read the model
if(modelFile.exists() && !modelFile.isDirectory()) {
// see whether we're trying to apply an old-style model
// stored all in one file
BufferedReader buff = new BufferedReader(new InputStreamReader(
new FileInputStream(modelFile), "UTF-8"));
String firstLine = buff.readLine();
buff.close();
if(firstLine != null && firstLine.endsWith("#numTrainingDocs")) {
// this is an old-style model, so try and transparently upgrade it to
// the new format
mulL.upgradeSingleFileModelToDirectory(modelFile);
} else {
throw new IOException("Unrecognised model file format for file "
+ modelFile);
}
}
if(!modelFile.exists()) { throw new IllegalStateException(
"Model directory " + modelFile + " does not exist");
}
//Read the training meta information from the meta data file
// include the total number of features and number of tags (numClasses)
File metaDataFile = new File(modelFile, ConstantParameters.FILENAMEOFModelMetaData);
BufferedReader metaDataBuff = new BufferedReader(new InputStreamReader(
new FileInputStream(metaDataFile), "UTF-8"));
int totalNumFeatures;
String learnerNameFromModel = null;
// note that reading the training meta data also read the number of class
// in the model, e.g. changing the numClasses.
totalNumFeatures = mulL.ReadTrainingMetaData(metaDataBuff,
learnerNameFromModel);
metaDataBuff.close();
// for each class
int classIndex = 1;
for(int iClass = 0; iClass < mulL.numClasses; ++iClass) {
float b;
float[] w = new float[totalNumFeatures];
//Read the model file
final File thisClassModelFile = new File(modelFile, String.format(
ConstantParameters.FILENAMEOFPerClassModel, Integer
.valueOf(classIndex++)));
BufferedReader modelBuff = new BufferedReader(
new InputStreamReader(
new FileInputStream(thisClassModelFile), "UTF-8"));
//Read the header line
String items[] = modelBuff.readLine().split(" ");
// Get the weight vector
b = SvmLibSVM.readWeightVectorFromFile(modelBuff, w);
modelBuff.close();
int numT;
int numP;
if(numP0 > 0) {
numT = 0;
for(int i = 1; i < totalNumFeatures; ++i)
if(w[i] >= 0) ++numT;
float[] wP = new float[numT];
int[] indexW = new int[numT];
numT = 0;
for(int i = 1; i < totalNumFeatures; ++i)
if(w[i] >= 0) {
wP[numT] = w[i];
indexW[numT] = i;
++numT;
}
if(numP0 > numT)
numP = numT;
else numP = numP0;
int[] indexSort = new int[numP];
sortFloatAscIndex(wP, indexSort, numT, numP);
String st1 = null;
if(surroundMode) {
st1 = labelsAndId.id2Label.get(
new Integer(iClass / 2 + 1).toString()).toString();
if(iClass % 2 == 0)
st1 += "-StartToken";
else st1 += "-LastToken";
} else {
st1 = labelsAndId.id2Label.get(new Integer(iClass + 1).toString())
.toString();
}
System.out.println("The " + numP
+ " most significiant positive NLP feature for class: " + st1
+ ":");
for(int i = 0; i < numP; ++i) {
int k1 = indexW[indexSort[i]]
/ (int)ConstantParameters.MAXIMUMFEATURES;
int k2 = indexW[indexSort[i]]
% (int)ConstantParameters.MAXIMUMFEATURES;
String st = null;
if(k1 == 0) {
st = "0";
} else {
if(k1 <= maxNegPositionTotal)
st = "-" + k1;
else {
k1 -= maxNegPositionTotal;
st = "+" + k1;
}
}
st += " " + featId2Form.get(k2).toString();
System.out.println(i + ": " + st + " -- " + wP[indexSort[i]]);
}
}
// For the negative weight
if(numN0 > 0) {
System.out.println("Negative weight:");
numT = 0;
for(int i = 0; i < totalNumFeatures; ++i)
if(w[i] < 0) ++numT;
float[] wN = new float[numT];
int[] indexWN = new int[numT];
numT = 0;
for(int i = 0; i < totalNumFeatures; ++i)
if(w[i] < 0) {
wN[numT] = -w[i];
indexWN[numT] = i;
++numT;
}
if(numN0 > numT)
numP = numT;
else numP = numN0;
int[] indexSortN = new int[numP];
sortFloatAscIndex(wN, indexSortN, numT, numP);
String st1 = null;
if(surroundMode) {
st1 = labelsAndId.id2Label.get(
new Integer(iClass / 2 + 1).toString()).toString();
if(iClass % 2 == 0)
st1 += "-StartToken";
else st1 += "-LastToken";
} else {
st1 = labelsAndId.id2Label.get(new Integer(iClass + 1).toString())
.toString();
}
System.out.println("The " + numP
+ " most significiant negative NLP feature for class: " + st1
+ ":");
for(int i = 0; i < numP; ++i) {
int k1 = indexWN[indexSortN[i]]
/ (int)ConstantParameters.MAXIMUMFEATURES;
int k2 = indexWN[indexSortN[i]]
% (int)ConstantParameters.MAXIMUMFEATURES;
String st = null;
if(k1 == 0) {
st = "0";
} else {
if(k1 <= maxNegPositionTotal)
st = "-" + k1;
else {
k1 -= maxNegPositionTotal;
st = "+" + k1;
}
}
st += " " + featId2Form.get(k2).toString();
float wN0 = -wN[indexSortN[i]];
System.out.println(i + ": " + st + " -- " + wN0);
}
}
}
} catch(IOException e) {
e.printStackTrace();
}
}
/**
* Get the biggest numK components from an array. wP: the array storing the
* numbers for sorting, indexSort stores the indices of the numK biggest
* numbers numT, the number of those numbers.
*/
public static void sortFloatAscIndex(float[] wP, int[] indexSort, int numT,
int numK) {
int i, j, k, j1;
float[] rp1 = new float[numK];
float one;
if(numT <= 0) return;
rp1[0] = wP[0];
indexSort[0] = 0;
for(i = 1; i < numT; ++i) {
one = wP[i];
if(i >= numK)
j1 = numK - 1;
else j1 = i - 1;
k = j1;
for(j = j1; j > -1; --j) {
if(one > rp1[j])
--k;
else break;
}
if(i < numK) {
rp1[i] = rp1[i - 1];
indexSort[i] = indexSort[i - 1];
}
++k;
for(j = j1; j > k; --j) {
rp1[j] = rp1[j - 1];
indexSort[j] = indexSort[j - 1];
}
if(k < numK) {
rp1[k] = one;
indexSort[k] = i;
}
}
}
// /////// Benchmarkable ////////////////
private String parentBenchmarkID;
private String benchmarkID;
/**
* Returns the benchmark ID of the parent of this resource.
*
* @return
*/
public String getParentBenchmarkId() {
return this.parentBenchmarkID;
}
/**
* Returns the benchmark ID of this resource.
*
* @return
*/
public String getBenchmarkId() {
if(this.benchmarkID == null) {
benchmarkID = "LightWeightLearningApi";
}
return this.benchmarkID;
}
/**
* Given an ID of the parent resource, this method is responsible for
* producing the Benchmark ID, unique to this resource.
*
* @param parentID
*/
public void createBenchmarkId(String parentID) {
parentBenchmarkID = parentID;
benchmarkID = Benchmark.createBenchmarkId("LightWeightLearningApi",
parentID);
}
/**
* This method sets the benchmarkID for this resource.
*
* @param benchmarkID
*/
public void setParentBenchmarkId(String benchmarkID) {
parentBenchmarkID = benchmarkID;
}
/**
* Returns the logger object being used by this resource.
*
* @return
*/
public Logger getLogger() {
return Benchmark.logger;
}
public void setBenchmarkId(String arg0) {
// stub
}
}