/**
* Counts the number of times the word "Goldfish" appears in a sentence. That total is
* added as a feature to every sentence annotation.
*
* Also adds summary information to each document, namely:
*
* - total number of characters in document
* - total number of tokens in document
* - total number of words in document
* - total number of sentences
* - total "Goldfish" count
*
* @author Andrew Golightly (acg4@cs.waikato.ac.nz)
* -- last updated 18/05/2003
*/
package andrewgolightly.nlp.gate.prs;
import gate.Annotation;
import gate.AnnotationSet;
import gate.creole.ExecutionException;
import gate.creole.ANNIEConstants;
import java.util.Iterator;
public class Goldfish extends gate.creole.AbstractLanguageAnalyser {
private String inputASname, outputASname;
public String getinputASname() {
return inputASname;
}
public void setinputASname(String inputASname) {
this.inputASname = inputASname;
}
public String getoutputASname() {
return outputASname;
}
public void setoutputASname(String outputASname) {
this.outputASname = outputASname;
}
public void execute() throws ExecutionException {
gate.Document doc = getDocument();
int totalGoldfishCount = 0;
doc.getFeatures().clear();
AnnotationSet inputAnnSet = (inputASname == null || inputASname.length() == 0)
? doc.getAnnotations()
: doc.getAnnotations(inputASname);
AnnotationSet outputAnnSet = (outputASname == null || outputASname.length() == 0)
? doc.getAnnotations()
: doc.getAnnotations(outputASname);
doc.getFeatures().put("Number of characters",
new Integer(doc.getContent().toString().length()).toString());
try {
doc.getFeatures().put(
"Number of tokens",
new Integer(inputAnnSet.get(ANNIEConstants.TOKEN_ANNOTATION_TYPE)
.size()).toString());
}
catch(NullPointerException e) {
throw new ExecutionException(
"You need to run the English Tokenizer first!");
}
try {
doc.getFeatures().put(
"Number of sentences",
new Integer(inputAnnSet.get(
ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size())
.toString());
}
catch(NullPointerException e) {
throw new ExecutionException(
"You need to run the Sentence Splitter first!");
}
// iterate through the sentences
Iterator sentenceIterator = inputAnnSet.get(
ANNIEConstants.SENTENCE_ANNOTATION_TYPE).iterator(), tokenIterator;
int wordCount = 0;
while(sentenceIterator.hasNext()) {
Annotation sentenceAnnotation = (Annotation)sentenceIterator.next();
tokenIterator = inputAnnSet.get(ANNIEConstants.TOKEN_ANNOTATION_TYPE,
sentenceAnnotation.getStartNode().getOffset(),
sentenceAnnotation.getEndNode().getOffset()).iterator();
// iterate through the tokens in the current sentence
int sentenceGoldfishCount = 0;
String word;
while(tokenIterator.hasNext()) {
Annotation tokenAnnotation = (Annotation)tokenIterator.next();
if(tokenAnnotation.getFeatures().get(
ANNIEConstants.TOKEN_KIND_FEATURE_NAME).equals("word"))
wordCount++;
word = (String)tokenAnnotation.getFeatures().get(
ANNIEConstants.TOKEN_STRING_FEATURE_NAME);
if(word.equals("Goldfish")) {
try {
outputAnnSet.add(tokenAnnotation.getStartNode().getOffset(),
tokenAnnotation.getEndNode().getOffset(), "Goldfish",
gate.Factory.newFeatureMap());
}
catch(gate.util.InvalidOffsetException ioe) {
throw new ExecutionException(ioe);
}
sentenceGoldfishCount++;
}
}
sentenceAnnotation.getFeatures().put(new String("Goldfish Count"),
new Integer(sentenceGoldfishCount));
totalGoldfishCount += sentenceGoldfishCount;
}
doc.getFeatures().put("Number of words", new Integer(wordCount).toString());
doc.getFeatures().put("Total \"Goldfish\" count",
new Integer(totalGoldfishCount).toString());
}
}