GATE.ac.uk - releases/gate-5.1-beta2-build3402-ALL/plugins/Annotation_Merging/src/gate/merger/AnnotationMergingMain.java

package gate.merger;

import java.util.*;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.ProcessingResource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.AnnotationMerging;
import gate.util.IaaCalculation;
import gate.util.InvalidOffsetException;

public class AnnotationMergingMain extends AbstractLanguageAnalyser implements
                                                                   ProcessingResource {
  /** Annotation sets for merging in one document. */
  private String annSetsForMerging;

  /** Annotation set for merged annotations. */
  private String annSetOutput;

  /** Merging method. */
  private MergingMethodsEnum mergingMethod;

  /**
   * Minimal number of annotators to agree for the MergingByAnnotatorNum method
   */
  private String minimalAnnNum;

  /** Specifying the annotation types and features for merging. */
  private String annTypesAndFeats;

  /** Should source annotations be kept when merged? */
  private Boolean keepSourceForMergedAnnotations;

  /** Initialise this resource, and return it. */
  public gate.Resource init() throws ResourceInstantiationException {
    return this;
  } // init()

  /**
   * Run the resource.
   * 
   * @throws ExecutionException
   */
  public void execute() throws ExecutionException {
    // get the annotation sets for merging
    String termSeparator = ";";
    // Get all the existing annotation sets from the current document
    Set<String> annsExisting = document.getAnnotationSetNames();
    String[] annsArray;
    if(annSetsForMerging == null || annSetsForMerging.trim().length() == 0) {
      // throw new ExecutionException("No annotation set was specified for
      // merging!");
      int num = 0;
      for(Object obj : annsExisting) {
        if(obj!= null && obj.toString().trim().length()>0)
          ++num;
      }
      annsArray = new String[num];
      num=0;
      List<String>annsE = new Vector<String>(annsExisting);
      Collections.sort(annsE);
      for(Object obj : annsE) {
        if(obj!= null && obj.toString().trim().length()>0)
          annsArray[num++] = obj.toString();
      }
    }
    else {
      annSetsForMerging = annSetsForMerging.trim();
      annsArray = annSetsForMerging.split(termSeparator);
    }
    int numAnns = annsArray.length;
    for(int i = 0; i < numAnns; ++i)
      annsArray[i] = annsArray[i].trim();

    // Check if each annotation set for merging exists in the current
    // document
    for(int i = 0; i < numAnns; ++i)
      if(!annsExisting.contains(annsArray[i]))
        throw new ExecutionException("The annotation set" + annsArray[i]
          + "for merging doesn't exist in current document "
          + document.getName());
    // Collect the annotation types from annotation sets for merging
    HashMap<String, String> annsTypes = new HashMap<String, String>();
    if(this.annTypesAndFeats == null
      || this.annTypesAndFeats.trim().length() == 0)
      for(int i = 0; i < numAnns; ++i) {
        Set<String> types = document.getAnnotations(annsArray[i]).getAllTypes();
        for(String obj : types)
          if(!annsTypes.containsKey(obj)) annsTypes.put(obj, null);
      }
    else {
      String[] annTs = this.annTypesAndFeats.split(termSeparator);
      for(int i = 0; i < annTs.length; ++i) {
        annTs[i] = annTs[i].trim();
        if(annTs[i].contains("->")) {
          String ty = annTs[i].substring(0, annTs[i].indexOf("->"));
          String tf = annTs[i].substring(annTs[i].indexOf("->") + 2);
          annsTypes.put(ty.trim(), tf.trim());
        }
        else annsTypes.put(annTs[i], null);
      }
    }
    // merging annotation for each annotation type and put it into the
    // merged annotation set.
    int minimalAnnNumInt = 1;
    if(minimalAnnNum != null && minimalAnnNum.trim().length() > 0) {
      if(Integer.parseInt(minimalAnnNum) < 1) 
        minimalAnnNumInt = 1;
      else if (Integer.parseInt(minimalAnnNum) > numAnns)
        minimalAnnNumInt = numAnns;
      else minimalAnnNumInt = Integer.parseInt(minimalAnnNum);
    }
    else minimalAnnNumInt = 1;
    AnnotationSet annsDoc = document.getAnnotations(this.annSetOutput);
    for(String annT : annsTypes.keySet()) {
      // collect the annotation set for the current type for merging
      AnnotationSet[] annsA = new AnnotationSet[numAnns];
      for(int i = 0; i < numAnns; ++i) {
        AnnotationSet anns = document.getAnnotations(annsArray[i]);
        if(anns.get(annT) != null) annsA[i] = anns.get(annT);
      }
      boolean isTheSameInstances = IaaCalculation.isSameInstancesForAnnotators(
        annsA, 0);
      HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>();
      //Call different merging methods
      switch(mergingMethod){
        case MajorityVoting:
          AnnotationMerging.mergeAnnotationMajority(annsA, annsTypes.get(annT),
            mergeInfor, isTheSameInstances);
          break;
        case MergingByAnnotatorNum:
          AnnotationMerging.mergeAnnotation(annsA, annsTypes.get(annT),
            mergeInfor, minimalAnnNumInt, isTheSameInstances);
          break;
        default:
          throw new ExecutionException("The merging method is not defined!");
      }
      if(annSetOutput != null && annSetOutput.trim().length() != 0)
        document.getAnnotations(annSetOutput);
      else document.getAnnotations("mergedAnns");
      //Add the merged annotations
      for(Annotation ann : mergeInfor.keySet()) {
        if (!keepSourceForMergedAnnotations) {
          // for each source annotation set
          for(String ASName : annsArray) {
            AnnotationSet sourceAS = document.getAnnotations(ASName);
            // find source annotations for the annotation merged
            // based only on their offsets
            AnnotationSet containedAS = sourceAS.getContained(
              ann.getStartNode().getOffset(), ann.getEndNode().getOffset());
            for (Annotation annotation : containedAS) {
              if (annotation.coextensive(ann)) {
                // delete source annotations
                sourceAS.remove(annotation);
              }
            }
          }
        }
        FeatureMap featM = Factory.newFeatureMap();
        FeatureMap feat0 = ann.getFeatures();
        for(Object obj : feat0.keySet()) {
          featM.put(obj, feat0.get(obj));
        }
        // Get the annotators (annotation set name for each merged annotation)
        String[] annIndex = mergeInfor.get(ann).split("-");
        StringBuffer annNames = new StringBuffer();
        for(int i = 0; i < annIndex.length; ++i) {
          if(i > 0) annNames.append("/");
          annNames.append(annsArray[Integer.valueOf(annIndex[i])]);
        }
        featM.put("annotators", annNames.toString());
        try {
          annsDoc.add(ann.getStartNode().getOffset(), ann.getEndNode()
            .getOffset(), annT, featM);
        }
        catch(InvalidOffsetException e) {
          e.printStackTrace();
        }
      }
      // remove the annotator features from the first annotation set
      AnnotationSet annsRe = document.getAnnotations(annsArray[0]).get(annT);
      for(Annotation ann : annsRe) {
        ann.getFeatures().remove("annotators");
      }

    }// End the loop for annotation type

  }

  public void setAnnSetsForMerging(String annSetSeq) {
    this.annSetsForMerging = annSetSeq;
  }

  public String getAnnSetsForMerging() {
    return this.annSetsForMerging;
  }

  public void setAnnSetOutput(String annSet) {
    this.annSetOutput = annSet;
  }

  public String getAnnSetOutput() {
    return this.annSetOutput;
  }

  public void setAnnTypesAndFeats(String annTypeSeq) {
    this.annTypesAndFeats = annTypeSeq;
  }

  public String getAnnTypesAndFeats() {
    return this.annTypesAndFeats;
  }

  public MergingMethodsEnum getMergingMethod() {
    return this.mergingMethod;
  }

  public void setMergingMethod(MergingMethodsEnum m) {
    this.mergingMethod = m;
  }

  public String getMinimalAnnNum() {
    return this.minimalAnnNum;
  }

  public void setMinimalAnnNum(String n) {
    this.minimalAnnNum = n;
  }

  public Boolean getkeepSourceForMergedAnnotations() {
    return this.keepSourceForMergedAnnotations;
  }

  public void setkeepSourceForMergedAnnotations(Boolean b) {
    this.keepSourceForMergedAnnotations = b;
  }

}