Log in Help
Print
HomegatepluginsLanguage_Identificationsrcorgknallgrauutilstextcat 〉 FingerprintGenerator.java
 
/*
 * FingerprintGenerator
 * 
 * Copyright (c) 2011, The University of Sheffield.
 * 
 * This file is part of GATE (see http://gate.ac.uk/), and is free software,
 * licenced under the GNU Library General Public License, Version 3, June 2007
 * (in the distribution as file licence.html, and also available at
 * http://gate.ac.uk/gate/licence.html).
 * 
 * Mark A. Greenwood, 28/05/2011
 */
package org.knallgrau.utils.textcat;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Controller;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ControllerAwarePR;
import gate.creole.ExecutionException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.Files;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;

import at.knallgrau.textcat.FingerPrint;

@CreoleResource(name = "TextCat Fingerprint Generator", comment = "Generate language fingerprints for use with the TextCat Language Indentification PR", icon = "fingerprint.png", helpURL="http://gate.ac.uk/userguide/sec:misc-creole:language-identification:fingerprints")
public class FingerprintGenerator extends AbstractLanguageAnalyser implements
                                                                  ControllerAwarePR {

  private static final long serialVersionUID = -1039437923175492426L;

  private URL fingerprintURL;

  private StringBuilder text;

  private File fingerprintFile;

  private String annotationType;

  private String annotationSetName;

  @RunTime
  @CreoleParameter(comment = "The file in which the generated fingerprint should be saved")
  public void setFingerprintURL(URL fingerprintURL) {
    this.fingerprintURL = fingerprintURL;
  }

  public URL getFingerprintURL() {
    return fingerprintURL;
  }
  
  @RunTime
  @Optional
  @CreoleParameter(comment = "The annotation type covering the text to use to build the fingerprint, if unspecifed the whole document will be used")
  public void setAnnotationType(String atype) {
    this.annotationType = atype;
  }

  public String getAnnotationType() {
    return this.annotationType;
  }

  @RunTime
  @Optional
  @CreoleParameter(comment = "The annotation set used for input/output (ignored when using the whole document)")
  public void setAnnotationSetName(String inputASName) {
    this.annotationSetName = inputASName;
  }

  public String getAnnotationSetName() {
    return annotationSetName;
  }

  @Override
  public void execute() {

    if(annotationType == null || annotationType.trim().equals("")) {
      // no annotation specified so use the whole document
      text.append(document.getContent().toString()).append("\n\n\n");
    } else {
      AnnotationSet annotations =
              document.getAnnotations(annotationSetName).get(annotationType);
      for(Annotation annotation : annotations) {
        // add the text from each annotation of the specified type
        text.append(Utils.stringFor(document, annotation)).append("\n\n\n");
      }
    }
  }

  public void controllerExecutionStarted(Controller c)
          throws ExecutionException {
    // check that the URL of the fingerprint we want to generate is a file://

    try {
      fingerprintFile = Files.fileFromURL(fingerprintURL);
    } catch(Exception e) {
      throw new ExecutionException(
              "Location of fingerprint must be a file based URL!", e);
    }

    // create a new place holder for the text we are going to process
    text = new StringBuilder();
  }

  public void controllerExecutionFinished(Controller c)
          throws ExecutionException {
    // save the fingerprint and...

    FingerPrint fp = new FingerPrint();
    fp.create(text.toString());

    try {
      FileOutputStream fos = new FileOutputStream(fingerprintFile);
      fos.write(fp.toString().getBytes("utf8"));
      fos.close();
    } catch(IOException e) {
      throw new ExecutionException("Unable to save fingerprint file", e);
    }

    // ...release any memory we have consumed
    text = null;
    fingerprintFile = null;
  }

  public void controllerExecutionAborted(Controller c, Throwable t)
          throws ExecutionException {
    // release any memory we have used
    fingerprintFile = null;
    text = null;
  }

}