|
AnnotationLengthExtractor |
|
1 /* 2 * Copyright (c) 1998-2001, The University of Sheffield. 3 * 4 * This file is part of GATE (see http://gate.ac.uk/), and is free 5 * software, licenced under the GNU Library General Public License, 6 * Version 2, June 1991 (in the distribution as file licence.html, 7 * and also available at http://gate.ac.uk/gate/licence.html). 8 * 9 * Valentin Tablan 17 June 2002 10 * 11 * $Id: AnnotationLengthExtractor.java,v 1.1 2002/06/27 17:12:32 valyt Exp $ 12 */ 13 package gate.ml; 14 15 import java.util.*; 16 17 import weka.core.*; 18 19 import gate.*; 20 import gate.util.*; 21 import gate.creole.ANNIEConstants; 22 23 24 public class AnnotationLengthExtractor extends AbstractAttributeExtractor { 25 26 public AnnotationLengthExtractor() { 27 } 28 29 public Attribute getAttribute() { 30 return new Attribute("Annotation length"); 31 } 32 33 34 public Object getAttributeValue(Object data) { 35 //the data is an annotation in this case. 36 Annotation ann = (Annotation)data; 37 Long endOffset = ann.getEndNode().getOffset(); 38 Long nextOffset = ann.getStartNode().getOffset(); 39 int tokensCnt = 0; 40 while(nextOffset != null && 41 nextOffset.compareTo(endOffset) < 0){ 42 //advance offset counting all tokens found 43 Set startingAnnots = dataCollector.getStartingAnnotations(nextOffset); 44 if(startingAnnots != null && (!startingAnnots.isEmpty())){ 45 Iterator annIter = startingAnnots.iterator(); 46 while(annIter.hasNext()){ 47 Annotation annotation = (Annotation)annIter.next(); 48 if(annotation.getType().equals(ANNIEConstants.TOKEN_ANNOTATION_TYPE)){ 49 tokensCnt++; 50 } 51 } 52 } 53 nextOffset = dataCollector.nextOffset(nextOffset); 54 } 55 return new Double(tokensCnt); 56 } 57 }
|
AnnotationLengthExtractor |
|