Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsAlignmentsrcgatecompositeimpl 〉 SegmentProcessingPR.java
 
package gate.composite.impl;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import gate.*;
import gate.composite.CombiningMethod;
import gate.composite.CombiningMethodException;
import gate.composite.CompositeDocument;
import gate.compound.CompoundDocument;
import gate.compound.impl.CompoundDocumentImpl;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.SerialAnalyserController;

/**
 * As the name suggests, the PR is useful processing segments of the text. Given
 * a controller, annotation type and a document, this PR creates a composite
 * documents for every annotation with type as specified by the <annotation
 * type>. Since the composite documents are linked with their original
 * documents, when the PR processing the composite document, the composite
 * document takes care of transferring relevant annotations back to the original
 * document. This is a good way of processing just a segment of a document.
 * 
 * @author niraj
 */
public class SegmentProcessingPR extends AbstractLanguageAnalyser implements
                                                                 ProcessingResource {

  /**
   * Controller that should be used to process segments.
   */
  private CorpusController controller;

  /**
   * annotation type that the segment is annotated with.
   */
  private String segmentAnnotationType;

  /**
   * Annotation set that contains the segment annotation and the annotations to
   * be copied to the composite document.
   */
  private String inputASName;

  /**
   * Used internally - this is the document that will be used for holding the
   * original document and the composite documents.
   */
  private CompoundDocument compoundDoc;

  /**
   * Method used for creating a new composite document.
   */
  protected CombiningMethod combiningMethodInst;

  /** Initialise this resource, and return it. */
  public Resource init() throws ResourceInstantiationException {
    // a combining method that creates a composite document with the
    // annotation as identified by the annotation id
    combiningMethodInst = new CombineFromAnnotID();
    compoundDoc = new CompoundDocumentImpl();

    // initializing an empty compound document
    compoundDoc.init();
    return this;
  }

  /* this method is called to reinitialize the resource */
  public void reInit() throws ResourceInstantiationException {
    // reinitialization code
    init();
  }

  /**
   * Should be called to execute this PR on a document. 
   */
  public void execute() throws ExecutionException {
    // if no document provided
    if(document == null) { throw new ExecutionException("Document is null!"); }

    // annotation set to use 
    AnnotationSet set =
      inputASName == null || inputASName.trim().length() == 0 ? document
        .getAnnotations() : document.getAnnotations(inputASName);

    AnnotationSet segmentSet = set.get(segmentAnnotationType);
    if(set.isEmpty())
      throw new ExecutionException("Could not find annotations of type :"
        + segmentAnnotationType);

    // add the current document as a member of the compound document
    compoundDoc.addDocument(document.getName(), document);
    Corpus tempCorpus = null;

    try {
      Map<String, Object> map = new HashMap<String, Object>();
      map.put(CombineFromAnnotID.INPUT_AS_NAME_FEATURE_NAME, inputASName);
      map.put(CombineFromAnnotID.DOCUMENT_ID_FEATURE_NAME, document.getName());
      FeatureMap hideMap = Factory.newFeatureMap();
      Gate.setHiddenAttribute(hideMap, true);
      tempCorpus =
        (Corpus)Factory.createResource("gate.corpora.CorpusImpl", Factory
          .newFeatureMap(), hideMap, "compoundDocCorpus");
      tempCorpus.add(compoundDoc);
      controller.setCorpus(tempCorpus);

      for(Annotation annotation : segmentSet) {

        map.put(CombineFromAnnotID.ANNOTATION_ID_FEATURE_NAME, annotation
          .getId());
        CompositeDocument compositeDoc = null;
        try {
          compositeDoc = combiningMethodInst.combine(compoundDoc, map);
          compoundDoc.removeDocument(CompositeDocument.COMPOSITE_DOC_NAME);
          compoundDoc.addDocument(CompositeDocument.COMPOSITE_DOC_NAME,
            compositeDoc);

          // change focus to composite document
          compoundDoc.setCurrentDocument(CompositeDocument.COMPOSITE_DOC_NAME);

          // now run the application on the composite document
          controller.execute();

        }
        catch(CombiningMethodException e) {
          throw new ExecutionException(e);
        }
        finally {
          // finally get rid of the composite document
          compoundDoc.removeDocument(CompositeDocument.COMPOSITE_DOC_NAME);

          if(compositeDoc != null) {
            gate.Factory.deleteResource(compositeDoc);
          }
        }
      }
    }
    catch(ResourceInstantiationException e) {
      throw new ExecutionException(e);
    }
    finally {
      compoundDoc.removeDocument(document.getName());
      compoundDoc.removeDocument(CompositeDocument.COMPOSITE_DOC_NAME);
      if(tempCorpus != null) {
        gate.Factory.deleteResource(tempCorpus);
      }

    }
  }

  /**
   * Gets the set controller. The controller is used for processing the
   * segmented document. 
   * @return
   */
  public CorpusController getController() {
    return controller;
  }

  /**
   * Sets the controller.  The controller is used for processing the segmented
   * document.
   * @param controller
   */
  public void setController(CorpusController controller) {
    this.controller = controller;
  }

  /**
   * Annotation type that has been used for segmenting the document.  The PR
   * uses annotations of this type to create new composite documents and
   * process them individually.
   * @return
   */
  public String getSegmentAnnotationType() {
    return segmentAnnotationType;
  }

  /**
   * Annotation type that has been used for segmenting the document.  The PR
   * uses annotations of this type to create new composite documents and
   * process them individually.
   * @param unitAnnotationType
   */
  public void setSegmentAnnotationType(String segmentAnnotationType) {
    this.segmentAnnotationType = segmentAnnotationType;
  }

  /**
   * Annotation set to use for obtaining segment annotations and the annotations
   * to copy into the composite document.
   * @return
   */
  public String getInputASName() {
    return inputASName;
  }

  /**
   * Annotation set to use for obtaining segment annotations and the annotations
   * to copy into the composite document.
   * @param inputASName
   */
  public void setInputASName(String inputAS) {
    this.inputASName = inputAS;
  }
} // class SegmentProcessingPR