/*
* FingerprintGenerator
*
* Copyright (c) 2011, The University of Sheffield.
*
* This file is part of GATE (see http://gate.ac.uk/), and is free software,
* licenced under the GNU Library General Public License, Version 3, June 2007
* (in the distribution as file licence.html, and also available at
* http://gate.ac.uk/gate/licence.html).
*
* Mark A. Greenwood, 28/05/2011
*/
package org.knallgrau.utils.textcat;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Controller;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ControllerAwarePR;
import gate.creole.ExecutionException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.Files;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import at.knallgrau.textcat.FingerPrint;
@CreoleResource(name = "TextCat Fingerprint Generator", comment = "Generate language fingerprints for use with the TextCat Language Indentification PR", icon = "fingerprint.png", helpURL="http://gate.ac.uk/userguide/sec:misc-creole:language-identification:fingerprints")
public class FingerprintGenerator extends AbstractLanguageAnalyser implements
ControllerAwarePR {
private static final long serialVersionUID = -1039437923175492426L;
private URL fingerprintURL;
private StringBuilder text;
private File fingerprintFile;
private String annotationType;
private String annotationSetName;
@RunTime
@CreoleParameter(comment = "The file in which the generated fingerprint should be saved")
public void setFingerprintURL(URL fingerprintURL) {
this.fingerprintURL = fingerprintURL;
}
public URL getFingerprintURL() {
return fingerprintURL;
}
@RunTime
@Optional
@CreoleParameter(comment = "The annotation type covering the text to use to build the fingerprint, if unspecifed the whole document will be used")
public void setAnnotationType(String atype) {
this.annotationType = atype;
}
public String getAnnotationType() {
return this.annotationType;
}
@RunTime
@Optional
@CreoleParameter(comment = "The annotation set used for input/output (ignored when using the whole document)")
public void setAnnotationSetName(String inputASName) {
this.annotationSetName = inputASName;
}
public String getAnnotationSetName() {
return annotationSetName;
}
@Override
public void execute() {
if(annotationType == null || annotationType.trim().equals("")) {
// no annotation specified so use the whole document
text.append(document.getContent().toString()).append("\n\n\n");
} else {
AnnotationSet annotations =
document.getAnnotations(annotationSetName).get(annotationType);
for(Annotation annotation : annotations) {
// add the text from each annotation of the specified type
text.append(Utils.stringFor(document, annotation)).append("\n\n\n");
}
}
}
public void controllerExecutionStarted(Controller c)
throws ExecutionException {
// check that the URL of the fingerprint we want to generate is a file://
try {
fingerprintFile = Files.fileFromURL(fingerprintURL);
} catch(Exception e) {
throw new ExecutionException(
"Location of fingerprint must be a file based URL!", e);
}
// create a new place holder for the text we are going to process
text = new StringBuilder();
}
public void controllerExecutionFinished(Controller c)
throws ExecutionException {
// save the fingerprint and...
FingerPrint fp = new FingerPrint();
fp.create(text.toString());
try {
FileOutputStream fos = new FileOutputStream(fingerprintFile);
fos.write(fp.toString().getBytes("utf8"));
fos.close();
} catch(IOException e) {
throw new ExecutionException("Unable to save fingerprint file", e);
}
// ...release any memory we have consumed
text = null;
fingerprintFile = null;
}
public void controllerExecutionAborted(Controller c, Throwable t)
throws ExecutionException {
// release any memory we have used
fingerprintFile = null;
text = null;
}
}