package com.ontotext.kim.util.datastore; import gate.creole.ResourceInstantiationException; import gate.creole.gazetteer.GazetteerList; import gate.creole.gazetteer.GazetteerNode; import gate.creole.gazetteer.LinearDefinition; import gate.creole.gazetteer.LinearNode; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.Iterator; import java.util.Map; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.WildcardFileFilter; import org.apache.log4j.Logger; import org.openrdf.model.impl.LiteralImpl; import org.openrdf.model.impl.URIImpl; import com.ontotext.kim.client.query.KIMQueryException; import com.ontotext.kim.client.semanticrepository.QueryResultListener; import com.ontotext.kim.client.semanticrepository.QueryResultListener.Feed; import com.ontotext.kim.model.Options; /** * @author mnozchev * */ public class GazetteerListFeed implements Feed { public static final String DEF_EXTENSION = "*.def"; private static Logger log = Logger.getLogger(GazetteerListFeed.class); private final File dictionaryPath; private Options options; public GazetteerListFeed(File dictionaryPath, Options opt) { this.dictionaryPath = dictionaryPath; this.options = opt; } /** * Feeds all list entries in all lists in all definitions to the * specified listener. */ public void feedTo(QueryResultListener listener) throws KIMQueryException { if(!dictionaryPath.exists()) { log.error("The specified dictionary path does not exist!"); throw new KIMQueryException("Invalid dictionary path specified!"); } try { listener.startTableQueryResult(); log.debug("Starting loading of definition lists from: " + dictionaryPath); loadDefinitions(listener); log.debug("Definition loading complete."); listener.endTableQueryResult(); } catch(IOException e) { throw new KIMQueryException(e); } catch(ResourceInstantiationException e) { throw new KIMQueryException(e); } } private void loadDefinitions(QueryResultListener listener) throws ResourceInstantiationException, IOException { Collection<File> definitionPaths = FileUtils.listFiles(dictionaryPath, new WildcardFileFilter(DEF_EXTENSION), null); for(File definitionPath : definitionPaths) { log.debug("Loading definition file: " + definitionPath.toString()); LinearDefinition definition = new LinearDefinition(); definition.setURL(definitionPath.toURI().toURL()); definition.load(); loadLists(listener, definition); } } @SuppressWarnings("rawtypes") private void loadLists(QueryResultListener listener, LinearDefinition definition) throws ResourceInstantiationException, IOException { String separator = options.getSeparator(); Map listsByNode = definition.loadLists(); Iterator inodes = definition.iterator(); LinearNode node; while(inodes.hasNext()) { node = (LinearNode)inodes.next(); if(null == node) { log.error("LinearNode node is null!"); throw new ResourceInstantiationException("LinearNode node is null!"); } GazetteerList gazList = (GazetteerList)listsByNode.get(node); if(null == gazList) { log.error("Gazetteer list not found by node"); throw new ResourceInstantiationException( "Gazetteer list not found by node"); } log.debug("Loading " + gazList.size() + " definitions for list: " + gazList.getName()); Iterator iline = gazList.iterator(); while(iline.hasNext()) { GazetteerNode gazNode = (GazetteerNode)iline.next(); String clazz = "urn:"+node.getMajorType(); String inst = "urn:"+node.getMinorType(); String entry = gazNode.getEntry(); // if a separator string is set, attempt to split the gazetteer // line by that separator and try to get the inst and/or class feature // values. if(!separator.equals("")) { // If the entry contains the separator, split it up and try to find // features class and inst. if(entry.contains(separator)) { String fields[] = entry.split(separator); entry = fields[0]; for(int i = 1; i<fields.length; i++) { if(fields[i].startsWith("inst=")) { inst = fields[i].substring(5); } else if(fields[i].startsWith("class=")) { clazz = fields[i].substring(6); } } } } addEntity(listener, inst, clazz,entry); } } } private void addEntity(QueryResultListener listener, String inst, String clazz, String label) throws IOException { listener.startTuple(); listener.tupleValue(new LiteralImpl(label)); listener.tupleValue(new URIImpl(inst)); listener.tupleValue(new URIImpl(clazz)); listener.endTuple(); } }