/*
* LabelsOfFeatureVectorDoc.java
*
* Yaoyong Li 22/03/2007
*
* $Id: LabelsOfFeatureVectorDoc.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
*/
package gate.learning;
import gate.learning.DocFeatureVectors.LongCompactor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
/**
* Labels (indexes) of feature vectors in one document. It represents
* multi-label via the LabelsOfFV object.
*/
public class LabelsOfFeatureVectorDoc {
/** Array of multi-labels for all instances in a document. */
public LabelsOfFV[] multiLabels = null;
/** Constructor, trivial case. */
public LabelsOfFeatureVectorDoc() {
}
/**
* Get the labels from the NLP label feautues of the document. For surround
* mode, get the start and end token label as 1 and 2. For entity with single
* token, the token has two labels, 1 and 2.
*/
public void obtainMultiLabelsFromNLPDocSurround(NLPFeaturesOfDoc nlpDoc,
Label2Id label2Id, boolean surroundMode) {
String currentN;
int num = nlpDoc.numInstances;
multiLabels = new LabelsOfFV[num];
if(!surroundMode) {// not the surroundMode
for(int i = 0; i < num; ++i) {
HashSet setLabels = new HashSet();
if(nlpDoc.classNames[i] instanceof String) {
String[] items = nlpDoc.classNames[i]
.split(ConstantParameters.ITEMSEPARATOR);
for(int j = 0; j < items.length; ++j) {
currentN = items[j];
if(currentN.endsWith(ConstantParameters.SUFFIXSTARTTOKEN))
currentN = currentN.substring(0, currentN
.lastIndexOf(ConstantParameters.SUFFIXSTARTTOKEN));
if(label2Id.label2Id.containsKey(currentN))
// just use the labels in the LabelsList.save file
setLabels.add(Integer.valueOf(label2Id.label2Id.get(currentN)
.toString())); // Integer.valueOf(label2Id.label2Id.get(currentN).toString());
}
}
multiLabels[i] = new LabelsOfFV(setLabels.size());
if(setLabels.size() > 0) {
multiLabels[i].labels = new int[setLabels.size()];
List indexes = new ArrayList(setLabels);
LongCompactor c = new LongCompactor();
Collections.sort(indexes, c);
for(int j = 0; j < indexes.size(); ++j)
multiLabels[i].labels[j] = Integer.valueOf(indexes.get(j)
.toString()); // Integer.valueOf(obj.toString());
}
}// end of the i loop
} else
// for the surrond mode
for(int i = 0; i < num; ++i) {
HashSet setLabels = new HashSet();
if(nlpDoc.classNames[i] != null) {
String[] items = nlpDoc.classNames[i]
.split(ConstantParameters.ITEMSEPARATOR);
for(int j = 0; j < items.length; ++j) {
currentN = items[j];
if(currentN.endsWith(ConstantParameters.SUFFIXSTARTTOKEN)) {
String label = currentN.substring(0, currentN
.lastIndexOf(ConstantParameters.SUFFIXSTARTTOKEN));
if(label2Id.label2Id.containsKey(label)) {
setLabels.add(Integer.valueOf(label2Id.label2Id.get(label)
.toString()) * 2 - 1);
if(i + 1 == num
|| !hasTheSameLabel(label, nlpDoc.classNames[i + 1]))
// single token
setLabels.add(Integer.valueOf(label2Id.label2Id.get(label)
.toString()) * 2);
}
} else { // no start token
if(label2Id.label2Id.containsKey(currentN)) {
if(i + 1 == num) {// the last token, hence the
// end token
setLabels.add(Integer.valueOf(label2Id.label2Id.get(currentN)
.toString()) * 2);
} else if(!hasTheSameLabel(currentN, nlpDoc.classNames[i + 1]))
setLabels.add(Integer.valueOf(label2Id.label2Id.get(currentN)
.toString()) * 2);
}
}
}
}
multiLabels[i] = new LabelsOfFV(setLabels.size());
if(setLabels.size() > 0) {
multiLabels[i].labels = new int[setLabels.size()];
List indexes = new ArrayList(setLabels);
LongCompactor c = new LongCompactor();
Collections.sort(indexes, c);
for(int j = 0; j < indexes.size(); ++j) {
multiLabels[i].labels[j] = Integer.valueOf(indexes.get(j).toString()); // Integer.valueOf(obj.toString());
}
}
}// end of the i loop
}
/** Is a squence of labels contains one particular label. */
private boolean hasTheSameLabel(String label, String classNames) {
if(classNames != null) {
String[] items = classNames.split(ConstantParameters.ITEMSEPARATOR);
for(int i = 0; i < items.length; ++i) {
String currentN = items[i];
if(currentN.endsWith(ConstantParameters.SUFFIXSTARTTOKEN))
currentN = currentN.substring(0, currentN
.lastIndexOf(ConstantParameters.SUFFIXSTARTTOKEN));
if(currentN.equals(label)) return true;
}
}
return false;
}
}