|
AnnotationDetector |
|
1 package gate.ml; 2 3 import java.util.*; 4 5 import weka.core.*; 6 7 import gate.*; 8 import gate.util.*; 9 10 /** 11 * Detects the presence of annotations of given types. 12 */ 13 public class AnnotationDetector implements InstanceDetector { 14 15 public AnnotationDetector() { 16 annotationTypes = new ArrayList(); 17 } 18 19 20 public void dataAdvance(Long offset) { 21 //check the annotations that start at the offset for the required types. 22 Set annotations = dataCollector.getStartingAnnotations(offset); 23 if(annotations != null && !annotations.isEmpty()){ 24 Iterator annIter = annotations.iterator(); 25 while(annIter.hasNext()){ 26 Annotation annotation = (Annotation)annIter.next(); 27 if(annotationTypes.contains(annotation.getType())){ 28 //we found a new instance; generate the instance for it 29 //all the atributes plus the class 30 Instance instance = new Instance( 31 dataCollector.getAttributeDetectors().size() + 1); 32 instance.setDataset(dataCollector.getDataSet()); 33 Iterator attDetIter = dataCollector.getAttributeDetectors(). 34 iterator(); 35 int currentAtt = 0; 36 while(attDetIter.hasNext()){ 37 Object attValue = ((AttributeDetector)attDetIter.next()). 38 getAttributeValue(annotation); 39 if(attValue != null){ 40 if(attValue instanceof Number){ 41 instance.setValue(currentAtt, ((Number)attValue).doubleValue()); 42 }else instance.setValue(currentAtt, attValue.toString()); 43 }else{ 44 instance.setMissing(currentAtt); 45 } 46 currentAtt ++; 47 } 48 //set the class 49 instance.setValue(currentAtt, annotation.getType()); 50 dataCollector.addInstance(instance); 51 }//if(annotationTypes.contains(annotation.getType())) 52 }//while(annIter.hasNext()){ 53 }//if(annotations != null && !annotations.isEmpty()){ 54 }//public void dataAdvance(Long offset) { 55 56 public void setDataCollector(DataCollector collector) { 57 this.dataCollector = collector; 58 } 59 60 /** 61 * Sets the annotation types that constitute instences this detector is 62 * interested in. 63 * The types are represented as a string containing the annotation types 64 * separated by commas (e.g. "Person,Organisation,Location". 65 * @param typesList a String value. 66 */ 67 public void setAnnotationTypes(String typesList){ 68 StringTokenizer strTok = new StringTokenizer(typesList, ",", false); 69 annotationTypes.clear(); 70 while(strTok.hasMoreTokens()) 71 annotationTypes.add(strTok.nextToken().trim()); 72 } 73 74 /** 75 * Gets the definition for the attribute handled by this detector. 76 * @return an Attribute object. 77 */ 78 public Attribute getClassAttribute(){ 79 FastVector values = new FastVector(annotationTypes.size()); 80 Iterator typesIter = annotationTypes.iterator(); 81 while(typesIter.hasNext()) values.addElement(typesIter.next()); 82 return new Attribute("AnnotationType", values); 83 } 84 85 protected DataCollector dataCollector; 86 87 protected List annotationTypes; 88 }
|
AnnotationDetector |
|