Log in Help
Print
Homereleasesgate-6.0-build3764-ALLpluginsGazetteer_Ontology_Basedsrcgatecloneql 〉 OntoRootGaz.java
 
/*
 * OntoRootGaz.java
 * 
 * Copyright (c) 1998-2008, The University of Sheffield.
 * 
 * This file is part of GATE (see http://gate.ac.uk/), and is free software,
 * licenced under the GNU Library General Public License, Version 2, June 1991
 * (in the distribution as file licence.html, and also available at
 * http://gate.ac.uk/gate/licence.html).
 */
package gate.clone.ql;

import gate.Annotation;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.Resource;
import gate.clone.ql.regex.ExpressionFinder;
import gate.creole.ANNIEConstants;
import gate.creole.ExecutionException;
import gate.creole.POSTagger;
import gate.creole.ResourceInstantiationException;
import gate.creole.SerialAnalyserController;
import gate.creole.gazetteer.DefaultGazetteer;
import gate.creole.gazetteer.FSMState;
import gate.creole.gazetteer.LinearDefinition;
import gate.creole.gazetteer.Lookup;
import gate.creole.morph.Morph;
import gate.creole.ontology.InvalidURIException;
import gate.creole.ontology.Ontology;
import gate.creole.ontology.URI;
import gate.creole.tokeniser.DefaultTokeniser;
import gate.util.OffsetComparator;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * 
 * @author Danica Damljanovic
 * 
 */
public class OntoRootGaz extends DefaultGazetteer {
  private static final long serialVersionUID = 0L;

  protected POSTagger posTagger;

  protected DefaultTokeniser tokeniser;

  protected FakeSentenceSplitter sentenceSplitter;

  protected Morph morpher;

  protected SerialAnalyserController rootFinderApplication;

  protected OffsetComparator offsetComparator;

  protected Ontology ontology;

  /**
   * should camelCased words be separated so that projectName becomes
   * project Name
   */
  protected Boolean separateCamelCasedWords;

  /**
   * should resource URI (usually called a fragment identifier - a set
   * of characters after / or #) be considered; for example, if there is
   * a resource with URI http://gate.ac.uk/ns/gate-ontology#POSTagger,
   * should POSTagger be considered or not
   */
  protected Boolean useResourceUri;

  /**
   * should properties be considered or not; NOTE: if this parameter is
   * set to false, than propertiesToInlcude and propertiesToExclude will
   * be ignored
   */
  protected Boolean considerProperties;

  /**
   * a list of lookups that will be created after processing of all
   * relevant data
   */
  protected List<Lookup> allLookups = new ArrayList<Lookup>();

  protected Corpus applicationCorpus;

  /**
   * a map of roots: a key is a lookup.list value, e.g. 'projects', and
   * the value is a root of that key, in this case that would be
   * 'project'
   */
  Map<String, String> listRoots = new HashMap<String, String>();

  /**
   * Should the rules be followed or not: if true then, few heuristic
   * rules will apply: the words containing spaces will be split; for
   * example, if 'pos tagger for spanish' would be analysed, 'for' would
   * be considered a stop word and heuristically derived would be 'pos
   * tagger' and this would be further used to add 'pos tagger' with
   * heuristical level 0, and 'tagger' with hl 1 to the gazetteer list;
   * at runtime lower heuristical level should be prefered
   */
  protected Boolean considerHeuristicRules;

  /**
   * comma separated values of property names that will be considered
   * when initializing the gazetteer
   */
  protected String propertiesToInclude;

  /**
   * comma separated values of property names that will be excluded when
   * initializing the gazetteer NOTE: setting propertiesToInclude to be
   * different from "" automatically means that all properties not in
   * the list will be excluded (in other words, if propertiesToInclude
   * is set, it is not necessary to set propertiesToExclude as all
   * properties not listed in propertiesToInclude will be excluded);
   */
  protected String propertiesToExclude;

  /*****************************************************************************
   * setting logger to log entries to the gazetteer list
   ****************************************************************************/
  private static final Log logger = LogFactory.getLog(OntoRootGaz.class);

  public void reInit() throws ResourceInstantiationException {
    this.init();
  }

  public Resource init() throws ResourceInstantiationException {

    // list of namespaces to be ignored when creating gazetteer list
    List<String> nsToIgnore = new ArrayList<String>();
    nsToIgnore.add("http://www.w3.org/2002/07/owl#");
    nsToIgnore.add("http://www.w3.org/2000/01/rdf-schema#");
    nsToIgnore.add("http://www.w3.org/1999/02/22-rdf-syntax-ns#");

    logger.info("--------------------------------------\n");
    logger.info(" Initializing gazetteer...\n");
    // logger.info(ontology.getURL().toString());
    logger.info("--------------------------------------\n");
    long startedInit = System.currentTimeMillis();
    List<String> propertiesToIncludeList = new ArrayList<String>();
    List<String> propertiesToExcludeList = new ArrayList<String>();
    if(tokeniser == null)
      throw new ResourceInstantiationException("No tokeniser provided!");
    if(sentenceSplitter == null) {
      sentenceSplitter = (FakeSentenceSplitter)Factory
              .createResource("gate.clone.ql.FakeSentenceSplitter");
    }
    if(posTagger == null)
      throw new ResourceInstantiationException(
              "No Part-of-speach Tagger provided!");
    if(morpher == null)
      throw new ResourceInstantiationException(
              "No Morphological Analyzer provided!");
    if(ontology == null) {
      throw new ResourceInstantiationException("No ontology provided!");
    }
    else {
      Ontology2MapManager.getInstance().addOntologyToIndex(ontology);
    }
    /* set default values if they are not set already */
    if(this.useResourceUri == null) useResourceUri = true;
    if(considerProperties == null) considerProperties = true;
    if(separateCamelCasedWords == null) separateCamelCasedWords = true;
    if(considerHeuristicRules == null) considerHeuristicRules = false;
    fsmStates = new HashSet();
    initialState = new FSMState(this);
    /* set the hidden feature to true */
    FeatureMap features = Factory.newFeatureMap();
    FeatureMap parameters = Factory.newFeatureMap();
    Gate.setHiddenAttribute(features, true);
    rootFinderApplication = (SerialAnalyserController)Factory.createResource(
            "gate.creole.SerialAnalyserController", parameters, features);
    rootFinderApplication.add(tokeniser);
    rootFinderApplication.add(sentenceSplitter);
    rootFinderApplication.add(posTagger);
    rootFinderApplication.add(morpher);
    /* create a corpus and hide it inside the GATE GUI */
    FeatureMap corpusParams = Factory.newFeatureMap();
    corpusParams.put("name", this.getClass().getCanonicalName());
    FeatureMap corpusFeatures = Factory.newFeatureMap();
    Gate.setHiddenAttribute(corpusFeatures, true);
    applicationCorpus = (Corpus)Factory.createResource(
            "gate.corpora.CorpusImpl", corpusParams, corpusFeatures);
    rootFinderApplication.setCorpus(applicationCorpus);
    offsetComparator = new OffsetComparator();
    /*
     * move properties to include and exclude from the list of CSV to
     * the actual List objects
     */
    if(considerProperties && propertiesToInclude != null
            && propertiesToExclude != null) {
      String[] listInclude = propertiesToInclude.split(",");
      for(String item : listInclude) {
        if(!"".equals(item.trim())) propertiesToIncludeList.add(item.trim());
      }
      String[] listExclude = propertiesToExclude.split(",");
      for(String item : listExclude) {
        if(!"".equals(item.trim())) propertiesToExcludeList.add(item.trim());
      }
    }
    /*
     * check validity: if a property is in both 'to be excluded' and 'to
     * be included' list throw an exception
     */
    if(propertiesToExcludeList.size() > 0 && propertiesToIncludeList.size() > 0) {
      for(String propertyUri : propertiesToExcludeList) {
        if(propertiesToIncludeList.contains(propertyUri))
          throw new ResourceInstantiationException(
                  "You specified that the same property should be both included and "
                          + "excluded!");
      }
    }
    if(considerProperties) {
      /*************************************************************************
       * instances with all set properties returned in a table with 3
       * columns: ... instanceUri, propertyUri, propertyValue [new line]
       * instanceUri, propertyUri, propertyValue [new line] ...
       ************************************************************************/
      String[] rows = Ontology2MapManager.getInstance().getOntology2Map()
              .getListOfInstances().split(CATConstants.NEW_LINE);
      for(String eachRow : rows) {
        String[] columns = eachRow.split("\\|");
        if(columns.length == 3) {
          String uri = columns[0].trim();
          try {
            /* create uriURI for validation purposes */
            URI uriUri = new URI(uri, false);
            String propUri = columns[1].trim();
            if((propertiesToIncludeList.size() == 0 || propertiesToIncludeList
                    .contains(propUri))
                    && (propertiesToExcludeList.size() == 0 || !(propertiesToExcludeList
                            .contains(propUri)))) {
              if(!nsToIgnore.contains(uriUri.getNameSpace())) {
                String propValue = columns[2].trim();
                Map<String, Object> lookupFeatures = new HashMap<String, Object>();
                lookupFeatures.put(CATConstants.ONTORES_TYPE,
                        CATConstants.TYPE_INSTANCE);
                lookupFeatures.put(CATConstants.FEATURE_URI, uri);
                lookupFeatures.put(CATConstants.FEATURE_PROPERTY_URI, propUri);
                lookupFeatures.put(CATConstants.FEATURE_PROPERTY_VALUE,
                        propValue);
                lookupFeatures.put(CATConstants.CLASS_URI_LIST,
                        Ontology2MapManager.getInstance().getOntology2Map()
                                .getInstanceTypes().get(uri));
                lookupFeatures.put(CATConstants.CLASS_URI,
                        new ArrayList<String>(Ontology2MapManager.getInstance()
                                .getOntology2Map().getInstanceTypes().get(uri))
                                .get(0));
                Lookup aLookup = new Lookup(propValue, "", null, null);
                aLookup.features = lookupFeatures;
                allLookups.add(aLookup);
              }// if uri is in the list of ignored namespaces:
              // nsToIgnore
            }// end if propertiesToIncludeList==0 ...
          }
          catch(InvalidURIException e) {
            logger.info("URI:'" + uri + "' is not valid. Skipping...\n");
          }
        }
      }
      /*************************************************************************
       * classes with all set properties returned in a table with 3
       * columns: classUri, propertyUri, propertyValue
       * ************************************************************ *
       ************************************************************************/
      rows = Ontology2MapManager.getInstance().getOntology2Map()
              .getListOfClasses().split(CATConstants.NEW_LINE);
      for(String eachRow : rows) {
        String[] columns = eachRow.split("\\|");
        if(columns.length == 3) {
          String uri = columns[0].trim();
          try {
            URI uriUri = new URI(uri, false);
            String propUri = columns[1].trim();
            if((propertiesToIncludeList.size() == 0 || propertiesToIncludeList
                    .contains(propUri))
                    && (propertiesToExcludeList.size() == 0 || !(propertiesToExcludeList
                            .contains(propUri)))) {
              if(!nsToIgnore.contains(uriUri.getNameSpace())) {
                String propValue = columns[2].trim();
                Map<String, Object> lookupFeatures = new HashMap<String, Object>();
                lookupFeatures.put(CATConstants.ONTORES_TYPE,
                        CATConstants.TYPE_CLASS);
                lookupFeatures.put(CATConstants.FEATURE_URI, uri);
                lookupFeatures.put(CATConstants.FEATURE_PROPERTY_URI, propUri);
                Lookup aLookup = new Lookup(propValue, "", null, null);
                aLookup.features = lookupFeatures;
                allLookups.add(aLookup);
              }// end if propertiesToIncludeList==0 ...
            }// if uri is in the list of ignored namespaces: nsToIgnore
          }
          catch(InvalidURIException e) {
            logger.info("URI:'" + uri + "' is not valid.\n");
          }
        }
      }
      /*************************************************************************
       * properties with all set properties returned in a table with 3
       * columns: propertyUri, setPropertyUri, propertyValue
       * ************************************************************ *
       ************************************************************************/
      rows = Ontology2MapManager.getInstance().getOntology2Map()
              .getListOfProperties().split(CATConstants.NEW_LINE);
      for(String eachRow : rows) {
        String[] columns = eachRow.split("\\|");
        if(columns.length == 3) {
          String uri = columns[0].trim();
          try {
            URI uriUri = new URI(uri, false);
            String propUri = columns[1].trim();
            if((propertiesToIncludeList.size() == 0 || propertiesToIncludeList
                    .contains(propUri))
                    && (propertiesToExcludeList.size() == 0 || !(propertiesToExcludeList
                            .contains(propUri)))) {
              if(!nsToIgnore.contains(uriUri.getNameSpace())) {
                String propValue = columns[2].trim();
                Map<String, Object> lookupFeatures = new HashMap<String, Object>();
                lookupFeatures.put(CATConstants.ONTORES_TYPE,
                        CATConstants.TYPE_PROPERTY);
                lookupFeatures.put(CATConstants.FEATURE_URI, uri);
                lookupFeatures.put(CATConstants.FEATURE_PROPERTY_URI, propUri);
                lookupFeatures.put(CATConstants.FEATURE_PROPERTY_VALUE,
                        propValue);
                Lookup aLookup = new Lookup(propValue, "", null, null);
                aLookup.features = lookupFeatures;
                allLookups.add(aLookup);
              }// end if propertiesToIncludeList==0 ...
            }
          }
          catch(InvalidURIException e) {
            logger.info("URI:'" + uri + "' is not valid.\n");
          }
        }
      }
    }// end consider properties
    /* uri retrieval */
    if(useResourceUri) {
      /*************************************************************************
       * class uris
       ************************************************************************/
      String[] rows = Ontology2MapManager.getInstance().getOntology2Map()
              .getClassURIs().split(CATConstants.NEW_LINE);
      for(String eachRow : rows) {
        String uri = eachRow.trim();
        try {
          URI uriUri = new URI(uri, false);
          String shortName = uriUri.getResourceName();
          if(!nsToIgnore.contains(uriUri.getNameSpace())) {
            Map<String, Object> lookupFeatures = new HashMap<String, Object>();
            lookupFeatures.put(CATConstants.ONTORES_TYPE,
                    CATConstants.TYPE_CLASS);
            lookupFeatures.put(CATConstants.FEATURE_URI, uri.trim());
            Lookup aLookup = new Lookup(shortName, "", null, null);
            aLookup.features = lookupFeatures;
            allLookups.add(aLookup);
          }
        }
        catch(InvalidURIException e) {
          logger.info("URI:" + uri + " is not valid.\n");
        }
      }
      /*************************************************************************
       * instance uris
       ************************************************************************/
      Set<String> setOfInstanceTypes = Ontology2MapManager.getInstance()
              .getOntology2Map().getInstanceTypes().keySet();
      for(String uri : setOfInstanceTypes) {
        try {
          URI uriUri = new URI(uri, false);
          String shortName = uriUri.getResourceName();
          if(!nsToIgnore.contains(uriUri.getNameSpace())) {
            Map<String, Object> lookupFeatures = new HashMap<String, Object>();
            lookupFeatures.put(CATConstants.ONTORES_TYPE,
                    CATConstants.TYPE_INSTANCE);
            lookupFeatures.put(CATConstants.FEATURE_URI, uri);
            Set<String> l = Ontology2MapManager.getInstance().getOntology2Map()
                    .getInstanceTypes().get(uri);
            lookupFeatures.put(CATConstants.CLASS_URI_LIST, l);
            lookupFeatures.put(CATConstants.CLASS_URI, new ArrayList<String>(l)
                    .get(0));
            Lookup aLookup = new Lookup(shortName, "", null, null);
            aLookup.features = lookupFeatures;
            allLookups.add(aLookup);
          }
        }
        catch(InvalidURIException e) {
          logger.info("URI:" + uri + " is not valid.\n");
        }
      }
      /*************************************************************************
       * property uris
       ************************************************************************/
      rows = Ontology2MapManager.getInstance().getOntology2Map()
              .getPropertyURIs().split(CATConstants.NEW_LINE);
      for(String eachRow : rows) {
        String uri = eachRow.trim();
        try {
          URI uriUri = new URI(uri, false);
          String shortName = uriUri.getResourceName();
          if(!nsToIgnore.contains(uriUri.getNameSpace())) {
            Map<String, Object> lookupFeatures = new HashMap<String, Object>();
            lookupFeatures.put(CATConstants.ONTORES_TYPE,
                    CATConstants.TYPE_PROPERTY);
            lookupFeatures.put(CATConstants.FEATURE_URI, uri);
            Lookup aLookup = new Lookup(shortName, "", null, null);
            aLookup.features = lookupFeatures;
            allLookups.add(aLookup);
          }
        }
        catch(InvalidURIException e) {
          logger.info("URI:" + uri + " is not valid.\n");
        }
      }
    }
    addLookups(allLookups);
    allLookups = new ArrayList<Lookup>();
    /* release GATE resources */
    Factory.deleteResource(applicationCorpus);
    applicationCorpus = null;
    rootFinderApplication.remove(morpher);
    rootFinderApplication.remove(posTagger);
    rootFinderApplication.remove(sentenceSplitter);
    Factory.deleteResource(sentenceSplitter);
    sentenceSplitter = null;
    rootFinderApplication.remove(tokeniser);
    Factory.deleteResource(rootFinderApplication);
    rootFinderApplication = null;
    long currentTime = System.currentTimeMillis();
    logger.info("OntoRootGaz initialized for:" + (currentTime - startedInit)
            + " ms");
    return this;
  }

  /**
   * This method takes a list of lookups as a parameter, process them
   * and returns a list of new Lookups that are than added to the
   * gazetteer. 'Processing' means replacing lookup.list feature with
   * its root. Additionally during the processing a new list if Lookups
   * is created called additionalList: this list contains a new Lookups
   * that needs to be processed by calling this method again afterwards:
   * - if lookup.list contains "-" or "_", replace these chars by space,
   * add new lookups to the additionalList and then extract the root in
   * the next call to this method - if separateCamelCasedWords=true,
   * separate them by adding a space, add new lookups to the
   * additionalList and then extract the root later - if
   * considerHeuristicRules=true then separate words as proposed by
   * these rules, add new lookups to the additionalList and then extract
   * the root later
   * 
   * @param List <Lookup> lookups
   * @throws ResourceInstantiationException
   */
  protected void addLookups(List<Lookup> lookups)
          throws ResourceInstantiationException {
    List<Lookup> lookupsToBeAdded = runRootFinderApplication(lookups);
    List<Lookup> additionalListTemp = new ArrayList<Lookup>();
    additionalListTemp.addAll(additionalList);
    additionalList = new ArrayList<Lookup>();
    List<Lookup> addition = runRootFinderApplication(additionalListTemp);
    List<Lookup> all = new ArrayList<Lookup>();
    all.addAll(lookupsToBeAdded);
    all.addAll(addition);
    for(Lookup aLookup : all) {
      String root = listRoots.get(aLookup.list);
      int hLevel = 0;
      if(root != null) {
        /*
         * check if the root has spaces and if considerHeuristicRules is
         * set to true, if yes, than split words and add
         * heuristical_level to each
         */
        if(root.contains(" ") && considerHeuristicRules == true) {
          Lookup aNewLookup = new Lookup(aLookup.list, "", null, null);
          Map<String, Object> newFeatures = new HashMap<String, Object>();
          for(Object key : aLookup.features.keySet()) {
            newFeatures.put((String)key, aLookup.features.get(key));
          }
          aNewLookup.features = newFeatures;
          aNewLookup.features.put(CATConstants.FEATURE_HEURISTIC_LEVEL, hLevel);
          aNewLookup.features.put(CATConstants.FEATURE_HEURISTIC_VALUE, root);
          addLookup(root.trim(), aNewLookup);
          logger.info("NEW ENTRY: " + root + "\n");
          int firstIndex = root.trim().indexOf(" ");
          String newRoot = root.trim();
          while(firstIndex >= 0) {
            newRoot = newRoot.substring(firstIndex + 1, newRoot.length());
            hLevel++;
            Lookup anotherLookup = new Lookup(aLookup.list, "", null, null);
            Map<String, Object> anotherFeatures = new HashMap<String, Object>();
            for(Object key : aLookup.features.keySet()) {
              anotherFeatures.put((String)key, aLookup.features.get(key));
            }
            anotherLookup.features = anotherFeatures;
            anotherLookup.features.put(CATConstants.FEATURE_HEURISTIC_LEVEL,
                    hLevel);
            anotherLookup.features.put(CATConstants.FEATURE_HEURISTIC_VALUE,
                    newRoot.trim());
            addLookup(newRoot.trim(), anotherLookup);
            logger.info("NEW ENTRY: " + newRoot + "\n");
            firstIndex = newRoot.trim().indexOf(" ");
          }
        }
        else {// if it doesn't have spaces or
          // considerHeuristicRules=false
          aLookup.features.put(CATConstants.FEATURE_HEURISTIC_LEVEL, 0);
          addLookup(root.trim(), aLookup);
          logger.info("NEW ENTRY: " + root + "\n");
        }
      }
      else {
        logger.info("root is null for lookup:" + aLookup);
      }
    }
  }

  /*
   * this list is populated during the processing of all lookups, when
   * some entries have multiple interpretations; for example, when
   * processing Project-Name, 'Project-Name' would be added in the first
   * iteration, while 'Project Name' would be added to the
   * additionalList for later processing
   */
  List<Lookup> additionalList = new ArrayList<Lookup>();

  /**
   * This method process given lookups so that their entries are
   * converted to the root of the entry i.e. lookup.list is processed
   * and 'root' feature is used to be lookup.list for resulting lookups.
   * All unprocessed lookups are added to the additionalList and they
   * are processed later with the same method
   */
  private List<Lookup> runRootFinderApplication(List<Lookup> lookups)
          throws ResourceInstantiationException {
    List<Lookup> lookupsToBeReturned = new ArrayList<Lookup>();
    for(Lookup lookup : lookups) {
      String list = lookup.list;
      if(list != null && list.trim().length() > 0) {
        if(list.contains("_")) {
          String newText = list.replace('_', ' ');
          Lookup aLookup = new Lookup(newText, "", null, null);
          aLookup.features = lookup.features;
          additionalList.add(aLookup);
        }
        // if text is camel cased add space between words
        if(separateCamelCasedWords && list.indexOf(" ") < 0) {
          String separatedCamelCase = ExpressionFinder
                  .findAndSeparateCamelCases(list,
                          CATConstants.REGEX_CAMEL_CASE, " ");
          if(list != null && (!list.equals(separatedCamelCase))) {
            Lookup aLookup = new Lookup(separatedCamelCase, "", null, null);
            aLookup.features = lookup.features;
            additionalList.add(aLookup);
          }
        }
        lookupsToBeReturned.add(lookup);
        /* set new documents to be hidden inside the GATE GUI */
        FeatureMap docParams = Factory.newFeatureMap();
        docParams.put("stringContent", list);
        FeatureMap docFeatures = Factory.newFeatureMap();
        Gate.setHiddenAttribute(docFeatures, true);
        Document aDocument = null;
        try {
          aDocument = (Document)Factory.createResource(
                  "gate.corpora.DocumentImpl", docParams, docFeatures);
          applicationCorpus.add(aDocument);
          rootFinderApplication.execute();
        }
        catch(ExecutionException ee) {
          throw new ResourceInstantiationException(ee);
        }
        Iterator it = applicationCorpus.iterator();
        while(it.hasNext()) {
          Document doc = (Document)it.next();
          Set<String> tokenTypes = new HashSet<String>();
          tokenTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
          tokenTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE);
          List<Annotation> tokenList = new ArrayList<Annotation>(aDocument
                  .getAnnotations().get(tokenTypes));
          Collections.sort(tokenList, offsetComparator);
          StringBuffer rootForText = new StringBuffer("");
          boolean lastAnnWasSpace = false;
          for(Annotation ann : tokenList) {
            if(ann.getType().equals(ANNIEConstants.TOKEN_ANNOTATION_TYPE)) {
              lastAnnWasSpace = false;
              String category = (String)ann.getFeatures().get(
                      ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME);
              /*
               * category "IN" means it is a preposition, and these are
               * used to be a stop words, so crop everything afterwards,
               * but ONLY if parameter considerHeuristicRules is set to
               * be true
               */
              if(considerHeuristicRules == true && category.equals("IN")) {
                break;
              }
              else {
                String root = (String)ann.getFeatures().get("root");
                if(root != null) {
                  rootForText.append(root);
                }
                else {
                  throw new ResourceInstantiationException(
                          "No root found for annotation " + ann.toString());
                }
              }
            }
            else if(ann.getType().equals(
                    ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE)) {
              if(!lastAnnWasSpace) {
                rootForText.append(' ');
              }
              lastAnnWasSpace = true;
            }
            else {
              // malfunction
              throw new ResourceInstantiationException(
                      "Invalid annotation type: " + ann);
            }
          }
          listRoots.put(doc.getContent().toString(), rootForText.toString());
        }
        applicationCorpus.clear();
        Factory.deleteResource(aDocument);
        aDocument = null;
      }
    }
    return lookupsToBeReturned;
  }

  public Morph getMorpher() {
    return morpher;
  }

  public void setMorpher(Morph morpher) {
    this.morpher = morpher;
  }

  public POSTagger getPosTagger() {
    return posTagger;
  }

  public void setPosTagger(POSTagger posTagger) {
    this.posTagger = posTagger;
  }

  public DefaultTokeniser getTokeniser() {
    return tokeniser;
  }

  public void setTokeniser(DefaultTokeniser tokeniser) {
    this.tokeniser = tokeniser;
  }

  public Ontology getOntology() {
    return ontology;
  }

  public void setOntology(Ontology ontology) {
    this.ontology = ontology;
  }

  public Boolean getConsiderProperties() {
    return considerProperties;
  }

  public void setConsiderProperties(Boolean considerProperties) {
    this.considerProperties = considerProperties;
  }

  public Boolean getUseResourceUri() {
    return useResourceUri;
  }

  public void setUseResourceUri(Boolean useResourceUri) {
    this.useResourceUri = useResourceUri;
  }

  /**
   * @return the separateCamelCasedWords
   */
  public Boolean getSeparateCamelCasedWords() {
    return separateCamelCasedWords;
  }

  /**
   * @param separateCamelCasedWords the separateCamelCasedWords to set
   */
  public void setSeparateCamelCasedWords(Boolean separateCamelCasedWords) {
    this.separateCamelCasedWords = separateCamelCasedWords;
  }

  /**
   * @return the propertiesToExclude
   */
  public String getPropertiesToExclude() {
    return propertiesToExclude;
  }

  /**
   * @param propertiesToExclude the propertiesToExclude to set
   */
  public void setPropertiesToExclude(String propertiesToExclude) {
    this.propertiesToExclude = propertiesToExclude;
  }

  /**
   * @return the propertiesToInclude
   */
  public String getPropertiesToInclude() {
    return propertiesToInclude;
  }

  /**
   * @param propertiesToInclude the propertiesToInclude to set
   */
  public void setPropertiesToInclude(String propertiesToInclude) {
    this.propertiesToInclude = propertiesToInclude;
  }

  /**
   * 
   * @return
   */
  public Boolean getConsiderHeuristicRules() {
    return considerHeuristicRules;
  }

  /**
   * 
   * @param considerHeuristicRules
   */
  public void setConsiderHeuristicRules(Boolean considerHeuristicRules) {
    this.considerHeuristicRules = considerHeuristicRules;
  }

  /**
   * Gets the linear definition of the gazetteer. This method is added
   * so that Gaze does not complain when rendering views and showing
   * initialisation parameters.
   * 
   * @return the linear definition of the gazetteer
   */
  public LinearDefinition getLinearDefinition() {
    return new LinearDefinition();
  }
}