package module7; import gate.Annotation; import gate.AnnotationSet; import gate.Resource; import gate.Utils; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.ResourceInstantiationException; import gate.creole.metadata.CreoleParameter; import gate.creole.metadata.CreoleResource; import gate.creole.metadata.Optional; import gate.creole.metadata.RunTime; /** * Example document statistics PR, configured using CREOLE * annotations. Requires a creole.xml file containing * <JAR SCAN="true">module7.jar</JAR> * * Copyright 2011 The University of Sheffield * Released under the GNU Lesser General Public Licence version 3 */ @CreoleResource(name = "Document Statistics (@CR)", comment = "Counts the number of tokens in the document") public class DocStats extends AbstractLanguageAnalyser { private String inputASName; private String annotTypeToCount; private String countFeatureName; public String getInputASName() { return inputASName; } @Optional @RunTime @CreoleParameter(comment = "The annotation set used for input") public void setInputASName(String inputASName) { this.inputASName = inputASName; } public String getAnnotTypeToCount() { return annotTypeToCount; } @RunTime @CreoleParameter(comment = "The annotation type to count", defaultValue = "Token") public void setAnnotTypeToCount(String annotTypeToCount) { this.annotTypeToCount = annotTypeToCount; } public String getCountFeatureName() { return countFeatureName; } @RunTime @CreoleParameter( comment = "The feature name under which the count should be stored", defaultValue = "token_count") public void setCountFeatureName(String countFeatureName) { this.countFeatureName = countFeatureName; } @Override public void execute() throws ExecutionException { AnnotationSet inputAS = document.getAnnotations(inputASName); AnnotationSet interestingAnnots = inputAS.get(annotTypeToCount); document.getFeatures().put(countFeatureName, interestingAnnots.size()); // the following are hard-coded examples of other kinds // of things you might want to count, and will typically // only be meaningful if the annotTypeToCount is the default // of Token // use the AnnotationSet API to select annotations // based on feature value matching constraints AnnotationSet words = inputAS.get(annotTypeToCount, Utils.featureMap("kind", "word")); // Utils.featureMap is a shorthand to create a // FeatureMap from an array of alternating names // and values. document.getFeatures().put("num_words", words.size()); // count the number of nouns and verbs (based on POS category) int numNouns = 0; int numVerbs = 0; for(Annotation a : interestingAnnots) { String category = (String)a.getFeatures().get("category"); if(category != null) { if(category.startsWith("NN")) { numNouns++; } else if(category.startsWith("VB")) { numVerbs++; } } } document.getFeatures().put("num_nouns", numNouns); document.getFeatures().put("num_verbs", numVerbs); } @Override public Resource init() throws ResourceInstantiationException { System.out.println("DocStats.init() called"); return this; } }