Log in Help
Print
HomewikiTrainingCourseMay2011track-2-extras 〉 DocStats.java
 
package module7;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Resource;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;

/**
 * Example document statistics PR, configured using CREOLE
 * annotations.  Requires a creole.xml file containing
 * <JAR SCAN="true">module7.jar</JAR>
 * 
 * Copyright 2011 The University of Sheffield
 * Released under the GNU Lesser General Public Licence version 3
 */
@CreoleResource(name = "Document Statistics (@CR)",
    comment = "Counts the number of tokens in the document")
public class DocStats extends AbstractLanguageAnalyser {

  private String inputASName;
  
  private String annotTypeToCount;
  
  private String countFeatureName;
  
  public String getInputASName() {
    return inputASName;
  }

  @Optional
  @RunTime
  @CreoleParameter(comment = "The annotation set used for input")
  public void setInputASName(String inputASName) {
    this.inputASName = inputASName;
  }

  public String getAnnotTypeToCount() {
    return annotTypeToCount;
  }

  @RunTime
  @CreoleParameter(comment = "The annotation type to count",
      defaultValue = "Token")
  public void setAnnotTypeToCount(String annotTypeToCount) {
    this.annotTypeToCount = annotTypeToCount;
  }

  public String getCountFeatureName() {
    return countFeatureName;
  }

  @RunTime
  @CreoleParameter(
      comment = "The feature name under which the count should be stored",
      defaultValue = "token_count")
  public void setCountFeatureName(String countFeatureName) {
    this.countFeatureName = countFeatureName;
  }

  @Override
  public void execute() throws ExecutionException {
    AnnotationSet inputAS = document.getAnnotations(inputASName);
    AnnotationSet interestingAnnots = inputAS.get(annotTypeToCount);
    
    document.getFeatures().put(countFeatureName,
        interestingAnnots.size());
    
    // the following are hard-coded examples of other kinds
    // of things you might want to count, and will typically
    // only be meaningful if the annotTypeToCount is the default
    // of Token
    
    // use the AnnotationSet API to select annotations
    // based on feature value matching constraints
    
    AnnotationSet words = inputAS.get(annotTypeToCount,
        Utils.featureMap("kind", "word"));
    // Utils.featureMap is a shorthand to create a
    // FeatureMap from an array of alternating names
    // and values.
    
    document.getFeatures().put("num_words", words.size());
    
    // count the number of nouns and verbs (based on POS category)
    int numNouns = 0;
    int numVerbs = 0;
    for(Annotation a : interestingAnnots) {
      String category = (String)a.getFeatures().get("category");
      if(category != null) {
        if(category.startsWith("NN")) {
          numNouns++;
        } else if(category.startsWith("VB")) {
          numVerbs++;
        }
      }
    }
    
    document.getFeatures().put("num_nouns", numNouns);
    document.getFeatures().put("num_verbs", numVerbs);
  }

  @Override
  public Resource init() throws ResourceInstantiationException {
    System.out.println("DocStats.init() called");
    return this;
  }
  
}