package module7;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Resource;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
/**
* Example document statistics PR, configured using CREOLE
* annotations. Requires a creole.xml file containing
* <JAR SCAN="true">module7.jar</JAR>
*
* Copyright 2011 The University of Sheffield
* Released under the GNU Lesser General Public Licence version 3
*/
@CreoleResource(name = "Document Statistics (@CR)",
comment = "Counts the number of tokens in the document")
public class DocStats extends AbstractLanguageAnalyser {
private String inputASName;
private String annotTypeToCount;
private String countFeatureName;
public String getInputASName() {
return inputASName;
}
@Optional
@RunTime
@CreoleParameter(comment = "The annotation set used for input")
public void setInputASName(String inputASName) {
this.inputASName = inputASName;
}
public String getAnnotTypeToCount() {
return annotTypeToCount;
}
@RunTime
@CreoleParameter(comment = "The annotation type to count",
defaultValue = "Token")
public void setAnnotTypeToCount(String annotTypeToCount) {
this.annotTypeToCount = annotTypeToCount;
}
public String getCountFeatureName() {
return countFeatureName;
}
@RunTime
@CreoleParameter(
comment = "The feature name under which the count should be stored",
defaultValue = "token_count")
public void setCountFeatureName(String countFeatureName) {
this.countFeatureName = countFeatureName;
}
@Override
public void execute() throws ExecutionException {
AnnotationSet inputAS = document.getAnnotations(inputASName);
AnnotationSet interestingAnnots = inputAS.get(annotTypeToCount);
document.getFeatures().put(countFeatureName,
interestingAnnots.size());
// the following are hard-coded examples of other kinds
// of things you might want to count, and will typically
// only be meaningful if the annotTypeToCount is the default
// of Token
// use the AnnotationSet API to select annotations
// based on feature value matching constraints
AnnotationSet words = inputAS.get(annotTypeToCount,
Utils.featureMap("kind", "word"));
// Utils.featureMap is a shorthand to create a
// FeatureMap from an array of alternating names
// and values.
document.getFeatures().put("num_words", words.size());
// count the number of nouns and verbs (based on POS category)
int numNouns = 0;
int numVerbs = 0;
for(Annotation a : interestingAnnots) {
String category = (String)a.getFeatures().get("category");
if(category != null) {
if(category.startsWith("NN")) {
numNouns++;
} else if(category.startsWith("VB")) {
numVerbs++;
}
}
}
document.getFeatures().put("num_nouns", numNouns);
document.getFeatures().put("num_verbs", numVerbs);
}
@Override
public Resource init() throws ResourceInstantiationException {
System.out.println("DocStats.init() called");
return this;
}
}