/*
* Ngram.java
*
* Yaoyong Li 22/03/2007
*
* $Id: Ngram.java, v 1.0 2007-03-22 12:58:16 +0000 yaoyong $
*/
package gate.learning;
import gate.util.GateException;
import org.jdom.Element;
/**
* Desribing the NGAM features defined in the DATASET element of the
* configuration file.
*/
public class Ngram {
/** Name of the Ngram feature. */
private String name;
/** The N in the N-gram. */
private short number;
/** How many GATE features used for the N-gram. */
private short consnum;
/** The GATE types of the features used in the N-gram. */
private String[] typesGate = null;
/** The GATE features used in the N-gram. */
private String[] featuresGate = null;
/**
* The posistion of the annotation considered relative to the current instance
* annotation. Normally it should be 0.
*/
int position;
/** The weight of ngram. */
float weight=1.0f;
/**
* Load the N-gram definition from an XML element of configuration file.
*/
public Ngram(Element jdomElement) throws GateException {
// find the name
Element anElement = jdomElement.getChild("NAME");
if(anElement == null)
throw new GateException(
"Required element \"NAME\" not present in attribute:\n"
+ jdomElement.toString() + "!");
else name = anElement.getTextTrim();
name = name.replaceAll(ConstantParameters.ITEMSEPARATOR,
ConstantParameters.ITEMSEPREPLACEMENT);
// find how many tokens (N) are used for the Ngram
anElement = jdomElement.getChild("NUMBER");
if(anElement == null)
throw new GateException(
"Required element \"NUMBER\" not present in attribute:\n"
+ jdomElement.toString() + "!");
else number = (new Short(anElement.getTextTrim())).shortValue();
// find how many constituents are used for the each token
anElement = jdomElement.getChild("CONSNUM");
if(anElement == null)
throw new GateException(
"Required element \"CONSNUM\" not present in attribute:\n"
+ jdomElement.toString() + "!");
else consnum = (new Short(anElement.getTextTrim())).shortValue();
// find the position if present
anElement = jdomElement.getChild("POSITION");
if(anElement == null)
position = 0;
else position = Integer.parseInt(anElement.getTextTrim());
//Find the weight if present
anElement = jdomElement.getChild("WEIGHT");
if(anElement == null)
weight = 1.0f;
else weight = Float.parseFloat(anElement.getTextTrim());
// allocate memory for the types and features for all the
// constituents
typesGate = new String[consnum];
featuresGate = new String[consnum];
for(int i = 0; i < consnum; ++i) {
// find the type
anElement = jdomElement.getChild("CONS-" + new Integer(i + 1));
if(anElement == null)
throw new GateException(
"Required element \"TYPE\" not present in attribute:\n"
+ jdomElement.toString() + "!");
else {
obtainTypeAndFeat(anElement, typesGate, featuresGate, i);
}
}
}
/** Obtain the types and features of one N-gram definition. */
private void obtainTypeAndFeat(Element anElement, String[] typesGate,
String[] featuresGate, int i) throws GateException {
Element lowerElement = anElement.getChild("TYPE");
if(anElement != null) {
typesGate[i] = lowerElement.getTextTrim();
typesGate[i] = typesGate[i].replaceAll(ConstantParameters.ITEMSEPARATOR,
ConstantParameters.ITEMSEPREPLACEMENT);
} else throw new GateException(
"Required element \"TYPE\" not present in attribute!");
lowerElement = anElement.getChild("FEATURE");
if(anElement != null) {
featuresGate[i] = lowerElement.getTextTrim();
featuresGate[i] = featuresGate[i]
.replaceAll(ConstantParameters.ITEMSEPARATOR,
ConstantParameters.ITEMSEPREPLACEMENT);
} else throw new GateException(
"Required element \"FEATURE\" not present in attribute!");
}
public Ngram() {
name = null;
typesGate = null;
featuresGate = null;
number = 0;
consnum = 0;
}
public void setName(String name) {
this.name = name;
}
public String getName() {
return name;
}
public void setNumber(short number) {
this.number = number;
}
public short getNumber() {
return number;
}
public void setConsnum(short consnum) {
this.consnum = consnum;
}
public short getConsnum() {
return consnum;
}
public void setTypesGate(String[] typesGate) {
this.typesGate = typesGate;
}
public String[] getTypessGate() {
return typesGate;
}
public String[] setFeaturesGate(String[] featuresGate) {
return this.featuresGate = featuresGate;
}
public String[] getFeaturesGate() {
return featuresGate;
}
public String toString() {
StringBuffer res = new StringBuffer();
res.append("Name: " + name + "\n");
res.append("Number: " + this.number + "\n");
res.append("Consnum: " + this.consnum + "\n");
for(int i = 0; i < typesGate.length; ++i) {
res.append("cons-" + new Integer(i + 1) + "\n");
res.append("Types: " + typesGate[i] + "\n");
res.append("Features: " + featuresGate[i] + "\n");
}
return res.toString();
}
}