package gate.merger; import java.util.*; import gate.Annotation; import gate.AnnotationSet; import gate.Factory; import gate.FeatureMap; import gate.ProcessingResource; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.ResourceInstantiationException; import gate.util.AnnotationMerging; import gate.util.IaaCalculation; import gate.util.InvalidOffsetException; public class AnnotationMergingMain extends AbstractLanguageAnalyser implements ProcessingResource { /** Annotation sets for merging in one document. */ private String annSetsForMerging; /** Annotation set for merged annotations. */ private String annSetOutput; /** Merging method. */ private MergingMethodsEnum mergingMethod; /** * Minimal number of annotators to agree for the MergingByAnnotatorNum method */ private String minimalAnnNum; /** Specifying the annotation types and features for merging. */ private String annTypesAndFeats; /** Should source annotations be kept when merged? */ private Boolean keepSourceForMergedAnnotations; /** Initialise this resource, and return it. */ public gate.Resource init() throws ResourceInstantiationException { return this; } // init() /** * Run the resource. * * @throws ExecutionException */ public void execute() throws ExecutionException { // get the annotation sets for merging String termSeparator = ";"; // Get all the existing annotation sets from the current document Set<String> annsExisting = document.getAnnotationSetNames(); String[] annsArray; if(annSetsForMerging == null || annSetsForMerging.trim().length() == 0) { // throw new ExecutionException("No annotation set was specified for // merging!"); int num = 0; for(Object obj : annsExisting) { if(obj!= null && obj.toString().trim().length()>0) ++num; } annsArray = new String[num]; num=0; List<String>annsE = new Vector<String>(annsExisting); Collections.sort(annsE); for(Object obj : annsE) { if(obj!= null && obj.toString().trim().length()>0) annsArray[num++] = obj.toString(); } } else { annSetsForMerging = annSetsForMerging.trim(); annsArray = annSetsForMerging.split(termSeparator); } int numAnns = annsArray.length; for(int i = 0; i < numAnns; ++i) annsArray[i] = annsArray[i].trim(); // Check if each annotation set for merging exists in the current // document for(int i = 0; i < numAnns; ++i) if(!annsExisting.contains(annsArray[i])) throw new ExecutionException("The annotation set" + annsArray[i] + "for merging doesn't exist in current document " + document.getName()); // Collect the annotation types from annotation sets for merging HashMap<String, String> annsTypes = new HashMap<String, String>(); if(this.annTypesAndFeats == null || this.annTypesAndFeats.trim().length() == 0) for(int i = 0; i < numAnns; ++i) { Set<String> types = document.getAnnotations(annsArray[i]).getAllTypes(); for(String obj : types) if(!annsTypes.containsKey(obj)) annsTypes.put(obj, null); } else { String[] annTs = this.annTypesAndFeats.split(termSeparator); for(int i = 0; i < annTs.length; ++i) { annTs[i] = annTs[i].trim(); if(annTs[i].contains("->")) { String ty = annTs[i].substring(0, annTs[i].indexOf("->")); String tf = annTs[i].substring(annTs[i].indexOf("->") + 2); annsTypes.put(ty.trim(), tf.trim()); } else annsTypes.put(annTs[i], null); } } // merging annotation for each annotation type and put it into the // merged annotation set. int minimalAnnNumInt = 1; if(minimalAnnNum != null && minimalAnnNum.trim().length() > 0) { if(Integer.parseInt(minimalAnnNum) < 1) minimalAnnNumInt = 1; else if (Integer.parseInt(minimalAnnNum) > numAnns) minimalAnnNumInt = numAnns; else minimalAnnNumInt = Integer.parseInt(minimalAnnNum); } else minimalAnnNumInt = 1; AnnotationSet annsDoc = document.getAnnotations(this.annSetOutput); for(String annT : annsTypes.keySet()) { // collect the annotation set for the current type for merging AnnotationSet[] annsA = new AnnotationSet[numAnns]; for(int i = 0; i < numAnns; ++i) { AnnotationSet anns = document.getAnnotations(annsArray[i]); if(anns.get(annT) != null) annsA[i] = anns.get(annT); } boolean isTheSameInstances = IaaCalculation.isSameInstancesForAnnotators( annsA, 0); HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>(); //Call different merging methods switch(mergingMethod){ case MajorityVoting: AnnotationMerging.mergeAnnotationMajority(annsA, annsTypes.get(annT), mergeInfor, isTheSameInstances); break; case MergingByAnnotatorNum: AnnotationMerging.mergeAnnotation(annsA, annsTypes.get(annT), mergeInfor, minimalAnnNumInt, isTheSameInstances); break; default: throw new ExecutionException("The merging method is not defined!"); } if(annSetOutput != null && annSetOutput.trim().length() != 0) document.getAnnotations(annSetOutput); else document.getAnnotations("mergedAnns"); //Add the merged annotations for(Annotation ann : mergeInfor.keySet()) { if (!keepSourceForMergedAnnotations) { // for each source annotation set for(String ASName : annsArray) { AnnotationSet sourceAS = document.getAnnotations(ASName); // find source annotations for the annotation merged // based only on their offsets AnnotationSet containedAS = sourceAS.getContained( ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); for (Annotation annotation : containedAS) { if (annotation.coextensive(ann)) { // delete source annotations sourceAS.remove(annotation); } } } } FeatureMap featM = Factory.newFeatureMap(); FeatureMap feat0 = ann.getFeatures(); for(Object obj : feat0.keySet()) { featM.put(obj, feat0.get(obj)); } // Get the annotators (annotation set name for each merged annotation) String[] annIndex = mergeInfor.get(ann).split("-"); StringBuffer annNames = new StringBuffer(); for(int i = 0; i < annIndex.length; ++i) { if(i > 0) annNames.append("/"); annNames.append(annsArray[Integer.valueOf(annIndex[i])]); } featM.put("annotators", annNames.toString()); try { annsDoc.add(ann.getStartNode().getOffset(), ann.getEndNode() .getOffset(), annT, featM); } catch(InvalidOffsetException e) { e.printStackTrace(); } } // remove the annotator features from the first annotation set AnnotationSet annsRe = document.getAnnotations(annsArray[0]).get(annT); for(Annotation ann : annsRe) { ann.getFeatures().remove("annotators"); } }// End the loop for annotation type } public void setAnnSetsForMerging(String annSetSeq) { this.annSetsForMerging = annSetSeq; } public String getAnnSetsForMerging() { return this.annSetsForMerging; } public void setAnnSetOutput(String annSet) { this.annSetOutput = annSet; } public String getAnnSetOutput() { return this.annSetOutput; } public void setAnnTypesAndFeats(String annTypeSeq) { this.annTypesAndFeats = annTypeSeq; } public String getAnnTypesAndFeats() { return this.annTypesAndFeats; } public MergingMethodsEnum getMergingMethod() { return this.mergingMethod; } public void setMergingMethod(MergingMethodsEnum m) { this.mergingMethod = m; } public String getMinimalAnnNum() { return this.minimalAnnNum; } public void setMinimalAnnNum(String n) { this.minimalAnnNum = n; } public Boolean getkeepSourceForMergedAnnotations() { return this.keepSourceForMergedAnnotations; } public void setkeepSourceForMergedAnnotations(Boolean b) { this.keepSourceForMergedAnnotations = b; } }