Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsLearningsrcgatelearning 〉 LabelsOfFeatureVectorDoc.java
 
/*
 *  LabelsOfFeatureVectorDoc.java
 * 
 *  Yaoyong Li 22/03/2007
 *
 *  $Id: LabelsOfFeatureVectorDoc.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
 */
package gate.learning;

import gate.learning.DocFeatureVectors.LongCompactor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;

/**
 * Labels (indexes) of feature vectors in one document. It represents
 * multi-label via the LabelsOfFV object.
 */
public class LabelsOfFeatureVectorDoc {
  /** Array of multi-labels for all instances in a document. */
  public LabelsOfFV[] multiLabels = null;

  /** Constructor, trivial case. */
  public LabelsOfFeatureVectorDoc() {
  }

  /**
   * Get the labels from the NLP label feautues of the document. For surround
   * mode, get the start and end token label as 1 and 2. For entity with single
   * token, the token has two labels, 1 and 2.
   */
  public void obtainMultiLabelsFromNLPDocSurround(NLPFeaturesOfDoc nlpDoc,
    Label2Id label2Id, boolean surroundMode) {
    String currentN;
    int num = nlpDoc.numInstances;
    multiLabels = new LabelsOfFV[num];
    if(!surroundMode) {// not the surroundMode
      for(int i = 0; i < num; ++i) {
        HashSet setLabels = new HashSet();
        if(nlpDoc.classNames[i] instanceof String) {
          String[] items = nlpDoc.classNames[i]
            .split(ConstantParameters.ITEMSEPARATOR);
          for(int j = 0; j < items.length; ++j) {
            currentN = items[j];
            if(currentN.endsWith(ConstantParameters.SUFFIXSTARTTOKEN))
              currentN = currentN.substring(0, currentN
                .lastIndexOf(ConstantParameters.SUFFIXSTARTTOKEN));
            if(label2Id.label2Id.containsKey(currentN))
              // just use the labels in the LabelsList.save file
              setLabels.add(Integer.valueOf(label2Id.label2Id.get(currentN)
                .toString())); // Integer.valueOf(label2Id.label2Id.get(currentN).toString());
          }
        }
        multiLabels[i] = new LabelsOfFV(setLabels.size());
        if(setLabels.size() > 0) {
          multiLabels[i].labels = new int[setLabels.size()];
          List indexes = new ArrayList(setLabels);
          LongCompactor c = new LongCompactor();
          Collections.sort(indexes, c);
          for(int j = 0; j < indexes.size(); ++j)
            multiLabels[i].labels[j] = Integer.valueOf(indexes.get(j)
              .toString()); // Integer.valueOf(obj.toString());
        }
      }// end of the i loop
    } else
    // for the surrond mode
    for(int i = 0; i < num; ++i) {
      HashSet setLabels = new HashSet();
      if(nlpDoc.classNames[i] != null) {
        String[] items = nlpDoc.classNames[i]
          .split(ConstantParameters.ITEMSEPARATOR);
        for(int j = 0; j < items.length; ++j) {
          currentN = items[j];
          if(currentN.endsWith(ConstantParameters.SUFFIXSTARTTOKEN)) {
            String label = currentN.substring(0, currentN
              .lastIndexOf(ConstantParameters.SUFFIXSTARTTOKEN));
            if(label2Id.label2Id.containsKey(label)) {
              setLabels.add(Integer.valueOf(label2Id.label2Id.get(label)
                .toString()) * 2 - 1);
              if(i + 1 == num
                || !hasTheSameLabel(label, nlpDoc.classNames[i + 1]))
                // single token
                setLabels.add(Integer.valueOf(label2Id.label2Id.get(label)
                  .toString()) * 2);
            }
          } else { // no start token
            if(label2Id.label2Id.containsKey(currentN)) {
              if(i + 1 == num) {// the last token, hence the
                // end token
                setLabels.add(Integer.valueOf(label2Id.label2Id.get(currentN)
                  .toString()) * 2);
              } else if(!hasTheSameLabel(currentN, nlpDoc.classNames[i + 1]))
                setLabels.add(Integer.valueOf(label2Id.label2Id.get(currentN)
                  .toString()) * 2);
            }
          }
        }
      }
      multiLabels[i] = new LabelsOfFV(setLabels.size());
      if(setLabels.size() > 0) {
        multiLabels[i].labels = new int[setLabels.size()];
        List indexes = new ArrayList(setLabels);
        LongCompactor c = new LongCompactor();
        Collections.sort(indexes, c);
        for(int j = 0; j < indexes.size(); ++j) {
          multiLabels[i].labels[j] = Integer.valueOf(indexes.get(j).toString()); // Integer.valueOf(obj.toString());
        }
      }
    }// end of the i loop
  }

  /** Is a squence of labels contains one particular label. */
  private boolean hasTheSameLabel(String label, String classNames) {
    if(classNames != null) {
      String[] items = classNames.split(ConstantParameters.ITEMSEPARATOR);
      for(int i = 0; i < items.length; ++i) {
        String currentN = items[i];
        if(currentN.endsWith(ConstantParameters.SUFFIXSTARTTOKEN))
          currentN = currentN.substring(0, currentN
            .lastIndexOf(ConstantParameters.SUFFIXSTARTTOKEN));
        if(currentN.equals(label)) return true;
      }
    }
    return false;
  }
}