Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsGazetteer_LKBsrccomontotextkimgate 〉 SesameEnrichment.java
 
package com.ontotext.kim.gate;

import gate.AnnotationSet;
import gate.Resource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;
import org.openrdf.query.Binding;
import org.openrdf.query.BindingSet;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.http.HTTPRepository;

/**
 * The Semantic Enrichment PR allows adding new data to semantic annotations 
 * by querying external RDF (Linked Data) repositories.
 *	
 * <p>
 * Here a semantic annotation is an annotation that is linked to an RDF entity by having 
 * the URI of the entity in the "inst" feature of the annotation. For all such annotation 
 * of a given type, this PR runs a SPARQL query against the defined repository and puts 
 * a comma-separated list of the values mentioned in the query output in the 
 * "connections" feature of the same annotation.
 * 
 * @author mnozchev
 *
 */
public class SesameEnrichment extends AbstractLanguageAnalyser {

	private static final long serialVersionUID = 3650L;
	
	private RepositoryConnection conn;
	private Repository rep;
	private String server;
	private String repositoryId;
	private String inputASName;
	private Set<String> annTypes = new HashSet<String>(Arrays.asList("Lookup"));
	private boolean deleteOnNoRelations = true;
	private final StringBuilder outputData = new StringBuilder(2000);
	private String query = 
		"SELECT ?Person WHERE { " +
		"?Person <http://dbpedia.org/ontology/birthplace> ?BirthPlace . " +
		"?BirthPlace <http://www.geonames.org/ontology#parentFeature> <%s> . " +
		"?Person a <http://sw.opencyc.org/2008/06/10/concept/en/Entertainer> .} LIMIT 100";
	
	private static final Logger log = Logger.getLogger(SesameEnrichment.class);
	
	@Override
	public Resource init() throws ResourceInstantiationException {
		try {
			rep = new HTTPRepository(server, repositoryId);
			conn = rep.getConnection();
		}
		catch (RepositoryException e) {
			throw new ResourceInstantiationException(e);
		}
		return this;
	}

	@Override
	public void execute() throws ExecutionException {		
		AnnotationSet input = document.getAnnotations(inputASName);
		Set<gate.Annotation> deathRow = new HashSet<gate.Annotation>();
		
		for (gate.Annotation ann : input.get(annTypes)) {
			Object instFeature = ann.getFeatures().get("inst");
			if (!(instFeature instanceof String))
				continue;
			String instQuery = String.format(query, instFeature);
			try {
				outputData.setLength(0);
				TupleQuery tq = conn.prepareTupleQuery(QueryLanguage.SPARQL, instQuery);
				TupleQueryResult tqr = tq.evaluate();	
				if (!tqr.hasNext() && deleteOnNoRelations) {
					deathRow.add(ann);
				}
				populateResults(outputData, tqr);
				if (outputData.length() > 0) {
					ann.getFeatures().put("connections", outputData.toString());
				}
			} catch (MalformedQueryException e) {
				log.warn(String.format("Created invalid query [%s] for entity [%s] (in brackets). Parser reported: %s", instQuery, instFeature, e.getMessage()));
			} catch (Exception e) {
				log.warn(String.format("Error executing query [%s] for entity [%s] (in brackets)", instQuery, instFeature), e);
			}			
		}
		
		if (deleteOnNoRelations) {
			input.removeAll(deathRow);
		}
		
	}

	private void populateResults(StringBuilder outputData, TupleQueryResult tqr)
			throws QueryEvaluationException {
		try {
			while (tqr.hasNext()) {
				BindingSet bs = tqr.next();
				for (Object val : bs)
					outputData.append(((Binding)val).getValue().stringValue()).append(",");						
			}
		}
		finally {
			tqr.close();
		}
	}
	
	@Override
	public void reInit() throws ResourceInstantiationException {
		cleanup();
		init();
	}
	
	@Override
	public synchronized void cleanup() {
		try {
			if (conn != null) {
				conn.close();
				conn = null;
			}
			if (rep != null) {
				rep.shutDown();
				rep = null;
			}
		}
		catch (RepositoryException e) {
			log.error("Could not close connection.", e);
		}
	}
	
	
	public String getServer() {
		return server;
	}

	public void setServer(String server) {
		this.server = server;
	}

	public String getRepositoryId() {
		return repositoryId;
	}

	public void setRepositoryId(String repositoryId) {
		this.repositoryId = repositoryId;
	}

	public String getInputASName() {
		return inputASName;
	}

	public void setInputASName(String inputASName) {
		this.inputASName = inputASName;
	}
	public void setAnnotationTypes(List<String> annTypes) {
		this.annTypes = new HashSet<String>(annTypes);
	}

	public List<String> getAnnotationTypes() {
		return new ArrayList<String>(annTypes);
	}

	public void setQuery(String query) {
		this.query = query.replace("\\", "");
	}

	public String getQuery() {
		return query;
	}

	public void setDeleteOnNoRelations(Boolean deleteOnNoRelations) {
		if (deleteOnNoRelations != null)
			this.deleteOnNoRelations = deleteOnNoRelations;
	}

	public Boolean getDeleteOnNoRelations() {
		return deleteOnNoRelations;
	}	
	
	public String getVersion() {
		return this.getClass().getPackage().getImplementationVersion();		
	}
	
	public void setVersion(String v) {
		
	}
}