Log in Help
Print
Homereleasesgate-6.0-build3764-ALLpluginsAlignmentsrcgatecompositeimpl 〉 SegmentProcessingPR.java
 
package gate.composite.impl;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Corpus;
import gate.CorpusController;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.ProcessingResource;
import gate.Resource;
import gate.composite.CombiningMethod;
import gate.composite.CombiningMethodException;
import gate.composite.CompositeDocument;
import gate.compound.CompoundDocument;
import gate.compound.impl.CompoundDocumentImpl;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.OffsetComparator;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * As the name suggests, the PR is useful processing segments of the
 * text. Given a controller, annotation type and a document, this PR
 * creates a composite documents for every annotation with type as
 * specified by the <annotation type>. Since the composite documents are
 * linked with their original documents, when the PR processing the
 * composite document, the composite document takes care of transferring
 * relevant annotations back to the original document. This is a good
 * way of processing just a segment of a document.
 * 
 * @author niraj
 */
public class SegmentProcessingPR extends AbstractLanguageAnalyser implements
                                                                 ProcessingResource {

  /**
   * Controller that should be used to process segments.
   */
  private CorpusController controller;

  /**
   * annotation type that the segment is annotated with.
   */
  private String segmentAnnotationType;

  /**
   * Annotation set that contains the segment annotation and the
   * annotations to be copied to the composite document.
   */
  private String inputASName;

  /**
   * Used internally - this is the document that will be used for
   * holding the original document and the composite documents.
   */
  private CompoundDocument compoundDoc;

  /**
   * Method used for creating a new composite document.
   */
  protected CombiningMethod combiningMethodInst;

  private boolean debug = false;

  /** Initialise this resource, and return it. */
  public Resource init() throws ResourceInstantiationException {
    // a combining method that creates a composite document with the
    // annotation as identified by the annotation id
    combiningMethodInst = new CombineFromAnnotID();
    compoundDoc = new CompoundDocumentImpl();

    // initializing an empty compound document
    compoundDoc.init();
    return this;
  }

  /* this method is called to reinitialize the resource */
  public void reInit() throws ResourceInstantiationException {
    // reinitialization code
    init();
  }

  /**
   * Should be called to execute this PR on a document.
   */
  public void execute() throws ExecutionException {
    // if no document provided
    if(document == null) {
      throw new ExecutionException("Document is null!");
    }

    // annotation set to use
    AnnotationSet set = inputASName == null || inputASName.trim().length() == 0
            ? document.getAnnotations()
            : document.getAnnotations(inputASName);

    AnnotationSet segmentSet = set.get(segmentAnnotationType);
    if(set.isEmpty())
      throw new ExecutionException("Could not find annotations of type :"
              + segmentAnnotationType);

    String originalDocument = document.getName();

    if(document instanceof CompoundDocument) {
      if(debug) {
        System.out
                .println("Document is a compound document and using the memeber \""
                        + document.getName() + "\" for processing");
      }
      compoundDoc.addDocument(document.getName(), ((CompoundDocument)document)
              .getCurrentDocument());
    }
    else {
      if(debug) {
        System.out.println("Document is a normal GATE document with name \""
                + document.getName() + "\"");
      }

      // add the current document as a member of the compound document
      compoundDoc.addDocument(document.getName(), document);
    }

    Corpus tempCorpus = null;

    try {
      Map<String, Object> map = new HashMap<String, Object>();
      map.put(CombineFromAnnotID.INPUT_AS_NAME_FEATURE_NAME, inputASName);
      map.put(CombineFromAnnotID.DOCUMENT_ID_FEATURE_NAME, document.getName());
      FeatureMap hideMap = Factory.newFeatureMap();
      Gate.setHiddenAttribute(hideMap, true);
      tempCorpus = (Corpus)Factory.createResource("gate.corpora.CorpusImpl",
              Factory.newFeatureMap(), hideMap, "compoundDocCorpus");
      tempCorpus.add(compoundDoc);
      controller.setCorpus(tempCorpus);
      List<Annotation> segmentList = new ArrayList<Annotation>(segmentSet);
      Collections.sort(segmentList, new OffsetComparator());
      for(Annotation annotation : segmentList) {

        if(debug) {
          System.out.println("Processing annotation" + annotation.getType()
                  + "=>" + annotation.getId());
        }
        String nameForCompositeDoc = "Composite" + Gate.genSym();
        map.put(CombineFromAnnotID.ANNOTATION_ID_FEATURE_NAME, annotation
                .getId());
        CompositeDocument compositeDoc = null;
        
        try {
          if(debug) {
            System.out.println("Creating temp composite document:"+nameForCompositeDoc);
          }
          compositeDoc = combiningMethodInst.combine(compoundDoc, map);
          compositeDoc.setName(nameForCompositeDoc);
          compoundDoc.addDocument(nameForCompositeDoc, compositeDoc);

          // change focus to composite document
          compoundDoc.setCurrentDocument(nameForCompositeDoc);

          // now run the application on the composite document
          controller.execute();
          compoundDoc.removeDocument(nameForCompositeDoc);

        }
        catch(CombiningMethodException e) {
          throw new ExecutionException(e);
        }
        finally {
          // finally get rid of the composite document
          compoundDoc.removeDocument(nameForCompositeDoc);

          if(compositeDoc != null) {
            gate.Factory.deleteResource(compositeDoc);
          }
        }
      }
    }
    catch(ResourceInstantiationException e) {
      throw new ExecutionException(e);
    }
    finally {
      compoundDoc.removeDocument(originalDocument);
      if(tempCorpus != null) {
        gate.Factory.deleteResource(tempCorpus);
      }
    }
  }

  /**
   * Gets the set controller. The controller is used for processing the
   * segmented document.
   * 
   * @return
   */
  public CorpusController getController() {
    return controller;
  }

  /**
   * Sets the controller. The controller is used for processing the
   * segmented document.
   * 
   * @param controller
   */
  public void setController(CorpusController controller) {
    this.controller = controller;
  }

  /**
   * Annotation type that has been used for segmenting the document. The
   * PR uses annotations of this type to create new composite documents
   * and process them individually.
   * 
   * @return
   */
  public String getSegmentAnnotationType() {
    return segmentAnnotationType;
  }

  /**
   * Annotation type that has been used for segmenting the document. The
   * PR uses annotations of this type to create new composite documents
   * and process them individually.
   * 
   * @param unitAnnotationType
   */
  public void setSegmentAnnotationType(String segmentAnnotationType) {
    this.segmentAnnotationType = segmentAnnotationType;
  }

  /**
   * Annotation set to use for obtaining segment annotations and the
   * annotations to copy into the composite document.
   * 
   * @return
   */
  public String getInputASName() {
    return inputASName;
  }

  /**
   * Annotation set to use for obtaining segment annotations and the
   * annotations to copy into the composite document.
   * 
   * @param inputASName
   */
  public void setInputASName(String inputAS) {
    this.inputASName = inputAS;
  }
} // class SegmentProcessingPR