/* * EntityAnnotationJobBuilder.java * * Copyright (c) 1995-2014, The University of Sheffield. See the file * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 3, June 2007 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * $Id: EntityAnnotationJobBuilder.java 19726 2016-11-08 13:09:38Z domrout $ */ package gate.crowdsource.ne; import gate.Annotation; import gate.AnnotationSet; import gate.Resource; import gate.Utils; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.ExecutionInterruptedException; import gate.creole.ResourceInstantiationException; import gate.creole.metadata.CreoleParameter; import gate.creole.metadata.CreoleResource; import gate.creole.metadata.Optional; import gate.creole.metadata.RunTime; import gate.crowdsource.rest.CrowdFlowerClient; import gate.gui.ActionsPublisher; import java.util.ArrayList; import java.util.List; import javax.swing.Action; @CreoleResource(name = "Entity Annotation Job Builder", comment = "Build a CrowdFlower job asking users to annotate entities " + "within a snippet of text", helpURL = "http://gate.ac.uk/userguide/sec:crowd:annotation") public class EntityAnnotationJobBuilder extends AbstractLanguageAnalyser implements ActionsPublisher { private static final long serialVersionUID = -1584716901194104888L; private String apiKey; private Long jobId; private String snippetAnnotationType; private String snippetASName; private String tokenAnnotationType; private String tokenASName; private String detailFeatureName; private String defaultsASName; private String goldFeatureName; private String goldFeatureValue; private String goldReasonFeatureName; private String entityAnnotationType; private String entityASName; private Boolean skipExisting; protected CrowdFlowerClient crowdFlowerClient; public String getApiKey() { return apiKey; } @CreoleParameter(comment = "CrowdFlower API key") public void setApiKey(String apiKey) { this.apiKey = apiKey; } public Long getJobId() { return jobId; } @RunTime @CreoleParameter public void setJobId(Long jobId) { this.jobId = jobId; } public String getSnippetAnnotationType() { return snippetAnnotationType; } @RunTime @CreoleParameter(defaultValue = "Sentence", comment = "Annotation type for the \"snippet\" annotations. " + "One snippet = one CrowdFlower unit") public void setSnippetAnnotationType(String contextAnnotationType) { this.snippetAnnotationType = contextAnnotationType; } public String getSnippetASName() { return snippetASName; } @Optional @RunTime @CreoleParameter(comment = "Annotation set where snippet annotations can be found") public void setSnippetASName(String contextASName) { this.snippetASName = contextASName; } public String getTokenAnnotationType() { return tokenAnnotationType; } @RunTime @CreoleParameter(defaultValue = "Token", comment = "Annotation type " + "representing the \"tokens\" - the atomic units that " + "workers will have to select to mark entity annotations.") public void setTokenAnnotationType(String tokenAnnotationType) { this.tokenAnnotationType = tokenAnnotationType; } public String getTokenASName() { return tokenASName; } @Optional @RunTime @CreoleParameter(comment = "Annotation set where tokens can be found") public void setTokenASName(String tokenASName) { this.tokenASName = tokenASName; } public String getDetailFeatureName() { return detailFeatureName; } @Optional @RunTime @CreoleParameter(defaultValue = "detail", comment = "Feature on the " + "snippet annotations containing additional details to be shown " + "to the annotators. This is interpreted as HTML, and can be " + "used for example to show a list of clickable links extracted " + "from the snippet.") public void setDetailFeatureName(String detailFeatureName) { this.detailFeatureName = detailFeatureName; } public String getDefaultsASName() { return defaultsASName; } @Optional @RunTime @CreoleParameter(defaultValue = "crowdDefaults", comment = "Annotation set used" + "to mark the default values where available.") public void setDefaultsASName(String defaultsASName) { this.defaultsASName = defaultsASName; } public String getEntityAnnotationType() { return entityAnnotationType; } @RunTime @CreoleParameter(comment = "Annotation type representing the gold " + "standard annotations, i.e. the kind of entities that you want " + "workers to find.") public void setEntityAnnotationType(String entityAnnotationType) { this.entityAnnotationType = entityAnnotationType; } public String getEntityASName() { return entityASName; } @Optional @RunTime @CreoleParameter(comment = "Annotation set where gold entities can be found") public void setEntityASName(String entityASName) { this.entityASName = entityASName; } public String getGoldFeatureName() { return goldFeatureName; } @RunTime @CreoleParameter(defaultValue = "gold", comment = "Name of a feature that marks a snippet as \"gold\"") public void setGoldFeatureName(String goldFeatureName) { this.goldFeatureName = goldFeatureName; } public String getGoldFeatureValue() { return goldFeatureValue; } @RunTime @CreoleParameter(defaultValue = "yes", comment = "Value of the feature that marks a snippet as \"gold\"") public void setGoldFeatureValue(String goldFeatureValue) { this.goldFeatureValue = goldFeatureValue; } public String getGoldReasonFeatureName() { return goldReasonFeatureName; } @Optional @RunTime @CreoleParameter(defaultValue = "reason", comment = "Feature on gold snippet annotations explaining " + "why the snippet's entities are correct") public void setGoldReasonFeatureName(String goldReasonFeatureName) { this.goldReasonFeatureName = goldReasonFeatureName; } public Boolean getSkipExisting() { return skipExisting; } @Optional @RunTime @CreoleParameter(defaultValue = "true", comment = "Should we skip snippets that already " + "have a feature indicating that they have been processed before?") public void setSkipExisting(Boolean skipExisting) { this.skipExisting = skipExisting; } @Override public Resource init() throws ResourceInstantiationException { if(apiKey == null || "".equals(apiKey)) { throw new ResourceInstantiationException("API Key must be set"); } crowdFlowerClient = new CrowdFlowerClient(apiKey); return this; } @Override public void execute() throws ExecutionException { if(isInterrupted()) throw new ExecutionInterruptedException(); interrupted = false; try { if(jobId == null || jobId.longValue() <= 0) { throw new ExecutionException("Job ID must be provided"); } AnnotationSet tokens = getDocument().getAnnotations(tokenASName) .get(tokenAnnotationType); AnnotationSet snippetAnnotations = getDocument().getAnnotations(snippetASName).get( snippetAnnotationType); AnnotationSet defaultAnnotations = getDocument().getAnnotations(defaultsASName) .get(entityAnnotationType); AnnotationSet goldAS = getDocument().getAnnotations(entityASName).get( entityAnnotationType); List<Annotation> allSnippets = Utils.inDocumentOrder(snippetAnnotations); fireStatusChanged("Creating CrowdFlower units for " + allSnippets.size() + " " + snippetAnnotationType + " annotations for " + entityAnnotationType + " annotation task"); int snippetIdx = 0; for(Annotation snippet : allSnippets) { fireProgressChanged((100 * snippetIdx++) / allSnippets.size()); if(isInterrupted()) throw new ExecutionInterruptedException(); // skip existing units, if so configured if(skipExisting != null && skipExisting.booleanValue() && snippet.getFeatures().containsKey( entityAnnotationType + "_unit_id")) { continue; } AnnotationSet snippetTokens = Utils.getContainedAnnotations(tokens, snippet); String detail = null; if(detailFeatureName != null) { Object detailObj = snippet.getFeatures().get(detailFeatureName); if(detailObj != null) { detail = detailObj.toString(); } } AnnotationSet goldAnnots = null; String goldReason = null; if(goldFeatureValue.equals(snippet.getFeatures().get(goldFeatureName))) { goldAnnots = Utils.getContainedAnnotations(goldAS, snippet); if(goldReasonFeatureName != null) { Object goldReasonValue = snippet.getFeatures().get(goldReasonFeatureName); if(goldReasonValue != null) goldReason = goldReasonValue.toString(); } } AnnotationSet snippetDefaults = Utils.getContainedAnnotations(defaultAnnotations, snippet); long unitId = crowdFlowerClient.createAnnotationUnit(jobId, getDocument(), snippetASName, snippet, detail, snippetTokens, goldAnnots, snippetDefaults, goldReason); // store the unit ID - we use the entity annotation type as part // of this feature // name so the same sentences can hold units for different // annotation types // e.g. Person, Location, Organization snippet.getFeatures().put(entityAnnotationType + "_unit_id", Long.valueOf(unitId)); } fireProcessFinished(); fireStatusChanged(allSnippets.size() + " units created"); } finally { interrupted = false; } } private List<Action> actions = null; public List<Action> getActions() { if(actions == null) { actions = new ArrayList<Action>(); actions.add(new NewAnnotationJobAction(this)); } return actions; } }