1   package gate.ml;
2   
3   import java.util.*;
4   
5   import weka.core.*;
6   
7   import gate.*;
8   import gate.util.*;
9   
10  /**
11   * Detects the presence of annotations of given types.
12   */
13  public class AnnotationDetector implements InstanceDetector {
14  
15    public AnnotationDetector() {
16      annotationTypes = new ArrayList();
17    }
18  
19  
20    public void dataAdvance(Long offset) {
21      //check the annotations that start at the offset for the required types.
22      Set annotations = dataCollector.getStartingAnnotations(offset);
23      if(annotations != null && !annotations.isEmpty()){
24        Iterator annIter = annotations.iterator();
25        while(annIter.hasNext()){
26          Annotation annotation = (Annotation)annIter.next();
27          if(annotationTypes.contains(annotation.getType())){
28            //we found a new instance; generate the instance for it
29            //all the atributes plus the class
30            Instance instance = new Instance(
31              dataCollector.getAttributeDetectors().size() + 1);
32            instance.setDataset(dataCollector.getDataSet());
33            Iterator attDetIter = dataCollector.getAttributeDetectors().
34                                  iterator();
35            int currentAtt = 0;
36            while(attDetIter.hasNext()){
37              Object attValue = ((AttributeDetector)attDetIter.next()).
38                                getAttributeValue(annotation);
39              if(attValue != null){
40                if(attValue instanceof Number){
41                  instance.setValue(currentAtt, ((Number)attValue).doubleValue());
42                }else instance.setValue(currentAtt, attValue.toString());
43              }else{
44                instance.setMissing(currentAtt);
45              }
46              currentAtt ++;
47            }
48            //set the class
49            instance.setValue(currentAtt, annotation.getType());
50            dataCollector.addInstance(instance);
51          }//if(annotationTypes.contains(annotation.getType()))
52        }//while(annIter.hasNext()){
53      }//if(annotations != null && !annotations.isEmpty()){
54    }//public void dataAdvance(Long offset) {
55  
56    public void setDataCollector(DataCollector collector) {
57      this.dataCollector = collector;
58    }
59  
60    /**
61     * Sets the annotation types that constitute instences this detector is
62     * interested in.
63     * The types are represented as a string containing the annotation types
64     * separated by commas (e.g. "Person,Organisation,Location".
65     * @param typesList a String value.
66     */
67    public void setAnnotationTypes(String typesList){
68      StringTokenizer strTok = new StringTokenizer(typesList, ",", false);
69      annotationTypes.clear();
70      while(strTok.hasMoreTokens())
71        annotationTypes.add(strTok.nextToken().trim());
72    }
73  
74    /**
75     * Gets the definition for the attribute handled by this detector.
76     * @return an Attribute object.
77     */
78    public Attribute getClassAttribute(){
79      FastVector values = new FastVector(annotationTypes.size());
80      Iterator typesIter = annotationTypes.iterator();
81      while(typesIter.hasNext()) values.addElement(typesIter.next());
82      return new Attribute("AnnotationType", values);
83    }
84  
85    protected DataCollector dataCollector;
86  
87    protected List annotationTypes;
88  }