Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsLearningsrcgatelearning 〉 Ngram.java
 
/*
 *  Ngram.java
 * 
 *  Yaoyong Li 22/03/2007
 *
 *  $Id: Ngram.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
 */
package gate.learning;

import gate.util.GateException;
import org.jdom.Element;

/**
 * Desribing the NGAM features defined in the DATASET element of the
 * configuration file.
 */
public class Ngram {
  /** Name of the Ngram feature. */
  private String name;
  /** The N in the N-gram. */
  private short number;
  /** How many GATE features used for the N-gram. */
  private short consnum;
  /** The GATE types of the features used in the N-gram. */
  private String[] typesGate = null;
  /** The GATE features used in the N-gram. */
  private String[] featuresGate = null;
  /**
   * The posistion of the annotation considered relative to the current instance
   * annotation. Normally it should be 0.
   */
  int position;
  /** The weight of ngram. */
  float weight=1.0f;

  /**
   * Load the N-gram definition from an XML element of configuration file.
   */
  public Ngram(Element jdomElement) throws GateException {
    // find the name
    Element anElement = jdomElement.getChild("NAME");
    if(anElement == null)
      throw new GateException(
        "Required element \"NAME\" not present in attribute:\n"
          + jdomElement.toString() + "!");
    else name = anElement.getTextTrim();
    name = name.replaceAll(ConstantParameters.ITEMSEPARATOR,
      ConstantParameters.ITEMSEPREPLACEMENT);
    // find how many tokens (N) are used for the Ngram
    anElement = jdomElement.getChild("NUMBER");
    if(anElement == null)
      throw new GateException(
        "Required element \"NUMBER\" not present in attribute:\n"
          + jdomElement.toString() + "!");
    else number = (new Short(anElement.getTextTrim())).shortValue();
    // find how many constituents are used for the each token
    anElement = jdomElement.getChild("CONSNUM");
    if(anElement == null)
      throw new GateException(
        "Required element \"CONSNUM\" not present in attribute:\n"
          + jdomElement.toString() + "!");
    else consnum = (new Short(anElement.getTextTrim())).shortValue();
    // find the position if present
    anElement = jdomElement.getChild("POSITION");
    if(anElement == null)
      position = 0;
    else position = Integer.parseInt(anElement.getTextTrim());
    //Find the weight if present
    anElement = jdomElement.getChild("WEIGHT");
    if(anElement == null)
      weight = 1.0f;
    else weight = Float.parseFloat(anElement.getTextTrim());
    // allocate memory for the types and features for all the
    // constituents
    typesGate = new String[consnum];
    featuresGate = new String[consnum];
    for(int i = 0; i < consnum; ++i) {
      // find the type
      anElement = jdomElement.getChild("CONS-" + new Integer(i + 1));
      if(anElement == null)
        throw new GateException(
          "Required element \"TYPE\" not present in attribute:\n"
            + jdomElement.toString() + "!");
      else {
        obtainTypeAndFeat(anElement, typesGate, featuresGate, i);
      }
    }
  }

  /** Obtain the types and features of one N-gram definition. */
  private void obtainTypeAndFeat(Element anElement, String[] typesGate,
    String[] featuresGate, int i) throws GateException {
    Element lowerElement = anElement.getChild("TYPE");
    if(anElement != null) {
      typesGate[i] = lowerElement.getTextTrim();
      typesGate[i] = typesGate[i].replaceAll(ConstantParameters.ITEMSEPARATOR,
        ConstantParameters.ITEMSEPREPLACEMENT);
    } else throw new GateException(
      "Required element \"TYPE\" not present in attribute!");
    lowerElement = anElement.getChild("FEATURE");
    if(anElement != null) {
      featuresGate[i] = lowerElement.getTextTrim();
      featuresGate[i] = featuresGate[i]
        .replaceAll(ConstantParameters.ITEMSEPARATOR,
          ConstantParameters.ITEMSEPREPLACEMENT);
    } else throw new GateException(
      "Required element \"FEATURE\" not present in attribute!");
  }

  public Ngram() {
    name = null;
    typesGate = null;
    featuresGate = null;
    number = 0;
    consnum = 0;
  }

  public void setName(String name) {
    this.name = name;
  }

  public String getName() {
    return name;
  }

  public void setNumber(short number) {
    this.number = number;
  }

  public short getNumber() {
    return number;
  }

  public void setConsnum(short consnum) {
    this.consnum = consnum;
  }

  public short getConsnum() {
    return consnum;
  }

  public void setTypesGate(String[] typesGate) {
    this.typesGate = typesGate;
  }

  public String[] getTypessGate() {
    return typesGate;
  }

  public String[] setFeaturesGate(String[] featuresGate) {
    return this.featuresGate = featuresGate;
  }

  public String[] getFeaturesGate() {
    return featuresGate;
  }

  public String toString() {
    StringBuffer res = new StringBuffer();
    res.append("Name: " + name + "\n");
    res.append("Number: " + this.number + "\n");
    res.append("Consnum: " + this.consnum + "\n");
    for(int i = 0; i < typesGate.length; ++i) {
      res.append("cons-" + new Integer(i + 1) + "\n");
      res.append("Types: " + typesGate[i] + "\n");
      res.append("Features: " + featuresGate[i] + "\n");
    }
    return res.toString();
  }
}