Log in Help
Print
HomegatepluginsGazetteer_LKBsrccomontotextkimutildatastore 〉 GazetteerListFeed.java
 
package com.ontotext.kim.util.datastore;

import gate.creole.ResourceInstantiationException;
import gate.creole.gazetteer.GazetteerList;
import gate.creole.gazetteer.GazetteerNode;
import gate.creole.gazetteer.LinearDefinition;
import gate.creole.gazetteer.LinearNode;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.apache.log4j.Logger;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.URIImpl;

import com.ontotext.kim.client.query.KIMQueryException;
import com.ontotext.kim.client.semanticrepository.QueryResultListener;
import com.ontotext.kim.client.semanticrepository.QueryResultListener.Feed;
import com.ontotext.kim.model.Options;

/**
 * @author mnozchev
 * 
 */
public class GazetteerListFeed implements Feed {

  public static final String DEF_EXTENSION = "*.def";

  private static Logger log = Logger.getLogger(GazetteerListFeed.class);
  
  private final File dictionaryPath;
  private Options options;

  public GazetteerListFeed(File dictionaryPath, Options opt) {
    this.dictionaryPath = dictionaryPath;
    this.options = opt;
  }

  /**
   * Feeds all list entries in all lists in all definitions to the
   * specified listener.
   */
  public void feedTo(QueryResultListener listener) throws KIMQueryException {

    if(!dictionaryPath.exists()) {
      log.error("The specified dictionary path does not exist!");
      throw new KIMQueryException("Invalid dictionary path specified!");
    }

    try {
      listener.startTableQueryResult();
      log.debug("Starting loading of definition lists from: " + dictionaryPath);
      loadDefinitions(listener);
      log.debug("Definition loading complete.");
      listener.endTableQueryResult();
    }
    catch(IOException e) {
      throw new KIMQueryException(e);
    }
    catch(ResourceInstantiationException e) {
      throw new KIMQueryException(e);
    }
  }

  private void loadDefinitions(QueryResultListener listener)
          throws ResourceInstantiationException, IOException {

    Collection<File> definitionPaths = FileUtils.listFiles(dictionaryPath,
            new WildcardFileFilter(DEF_EXTENSION), null);
    for(File definitionPath : definitionPaths) {

      log.debug("Loading definition file: " + definitionPath.toString());
      LinearDefinition definition = new LinearDefinition();
      definition.setURL(definitionPath.toURI().toURL());
      definition.load();
      loadLists(listener, definition);
    }
  }

  @SuppressWarnings("rawtypes")
  private void loadLists(QueryResultListener listener,
          LinearDefinition definition) throws ResourceInstantiationException,
          IOException {

    String separator = options.getSeparator();
    Map listsByNode = definition.loadLists();
    Iterator inodes = definition.iterator();
    LinearNode node;
    while(inodes.hasNext()) {

      node = (LinearNode)inodes.next();
      if(null == node) {
    	log.error("LinearNode node is null!");
        throw new ResourceInstantiationException("LinearNode node is null!");
      }

      GazetteerList gazList = (GazetteerList)listsByNode.get(node);
      if(null == gazList) {
   	    log.error("Gazetteer list not found by node");
        throw new ResourceInstantiationException(
                "Gazetteer list not found by node");
      }

      log.debug("Loading " + gazList.size() + " definitions for list: " + gazList.getName());
      Iterator iline = gazList.iterator();
      while(iline.hasNext()) {
        GazetteerNode gazNode = (GazetteerNode)iline.next();
        String clazz = "urn:"+node.getMajorType();
        String inst = "urn:"+node.getMinorType();
        String entry = gazNode.getEntry();
        // if a separator string is set, attempt to split the gazetteer
        // line by that separator and try to get the inst and/or class feature
        // values.
        if(!separator.equals("")) {
          // If the entry contains the separator, split it up and try to find
          // features class and inst. 
          if(entry.contains(separator)) {
            String fields[] = entry.split(separator);
            entry = fields[0];
            for(int i = 1; i<fields.length; i++) {
              if(fields[i].startsWith("inst=")) {
                inst = fields[i].substring(5);
              } else if(fields[i].startsWith("class=")) {
                clazz = fields[i].substring(6);
              }
            }
          }
        }
        
        addEntity(listener, inst, clazz,entry);
      }
      
      
      
    }
  }

  private void addEntity(QueryResultListener listener, String inst,
          String clazz, String label) throws IOException {

    listener.startTuple();
    listener.tupleValue(new LiteralImpl(label));
    listener.tupleValue(new URIImpl(inst));
    listener.tupleValue(new URIImpl(clazz));
    listener.endTuple();
  }

}