/* * OntoRootGaz.java * * Copyright (c) 1998-2008, The University of Sheffield. * * This file is part of GATE (see http://gate.ac.uk/), and is free software, * licenced under the GNU Library General Public License, Version 2, June 1991 * (in the distribution as file licence.html, and also available at * http://gate.ac.uk/gate/licence.html). */ package gate.clone.ql; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.ConsoleHandler; import java.util.logging.FileHandler; import java.util.logging.Formatter; import java.util.logging.Level; import java.util.logging.LogRecord; import java.util.logging.Logger; import gate.Annotation; import gate.Corpus; import gate.Document; import gate.Factory; import gate.FeatureMap; import gate.Gate; import gate.Resource; import gate.clone.ql.CATConstants; import gate.clone.ql.FakeSentenceSplitter; import gate.clone.ql.Ontology2MapManager; import gate.clone.ql.regex.ExpressionFinder; import gate.creole.ANNIEConstants; import gate.creole.ExecutionException; import gate.creole.POSTagger; import gate.creole.ResourceInstantiationException; import gate.creole.SerialAnalyserController; import gate.creole.gazetteer.DefaultGazetteer; import gate.creole.gazetteer.FSMState; import gate.creole.gazetteer.LinearDefinition; import gate.creole.gazetteer.Lookup; import gate.creole.morph.Morph; import gate.creole.ontology.*; import gate.creole.tokeniser.DefaultTokeniser; import gate.util.OffsetComparator; /** * * @author Valentin Tablan, Danica Damljanovic * */ public class OntoRootGaz extends DefaultGazetteer { private static final long serialVersionUID = 0L; protected POSTagger posTagger; protected DefaultTokeniser tokeniser; protected FakeSentenceSplitter sentenceSplitter; protected Morph morpher; protected SerialAnalyserController rootFinderApplication; protected OffsetComparator offsetComparator; protected Ontology ontology; /** * should camelCased words be separated so that projectName becomes project * Name */ protected Boolean separateCamelCasedWords; /** * should resource URI (usually called a fragment identifier - a set of * characters after / or #) be considered; for example, if there is a resource * with URI http://gate.ac.uk/ns/gate-ontology#POSTagger, should POSTagger be * considered or not */ protected Boolean useResourceUri; /** * should properties be considered or not; NOTE: if this parameter is set to * false, than propertiesToInlcude and propertiesToExclude will be ignored */ protected Boolean considerProperties; /** * a list of lookups that will be created after processing of all relevant * data */ protected List<Lookup> allLookups = new ArrayList<Lookup>(); protected Corpus applicationCorpus; /** * a map of roots: a key is a lookup.list value, e.g. 'projects', and the * value is a root of that key, in this case that would be 'project' */ Map<String, String> listRoots = new HashMap<String, String>(); /** * Should the rules be followed or not: if true then, few heuristic rules will * apply: the words containing spaces will be split; for example, if 'pos * tagger for spanish' would be analysed, 'for' would be considered a stop * word and heuristically derived would be 'pos tagger' and this would be * further used to add 'pos tagger' with heuristical level 0, and 'tagger' * with hl 1 to the gazetteer list; at runtime lower heuristical level should * be prefered */ protected Boolean considerHeuristicRules; /** * comma separated values of property names that will be considered when * initializing the gazetteer */ protected String propertiesToInclude; /** * comma separated values of property names that will be excluded when * initializing the gazetteer NOTE: setting propertiesToInclude to be * different from "" automatically means that all properties not in the list * will be excluded (in other words, if propertiesToInclude is set, it is not * necessary to set propertiesToExclude as all properties not listed in * propertiesToInclude will be excluded); */ protected String propertiesToExclude; /***************************************************************************** * setting logger to log entries to the gazetteer list ****************************************************************************/ private static Logger logger = null; static { logger = Logger.getLogger("OntoRootGaz"); logger.setUseParentHandlers(false); File logDir = null; // find the top directory String classFileName = OntoRootGaz.class.getCanonicalName(); classFileName = classFileName.replace('.', '/'); classFileName = "/" + classFileName + ".class"; URL classUrl = OntoRootGaz.class.getResource(classFileName); if(classUrl.getProtocol().equalsIgnoreCase("jar")) { String pathStr = classUrl.getPath(); pathStr = pathStr.substring(0, pathStr.indexOf('!')); File jarFile = null; try { jarFile = new File(new URL(pathStr).getPath()); } catch(MalformedURLException e) { e.printStackTrace(); } File jarDir = new File(jarFile.getParent()); logDir = new File(jarDir, "logs"); } if(logDir != null) { if(!logDir.exists()) logDir.mkdirs(); try { FileHandler logHandler = new FileHandler(logDir.getCanonicalPath() + "/" + OntoRootGaz.class.getSimpleName() + "-%u.log", false); logHandler.setFormatter(new Formatter() { /** * This method format log record to show *message only* */ public String format(LogRecord record) { return record.getMessage(); } }); logHandler.setLevel(Level.ALL); // add the new file handler for everything logger.addHandler(logHandler); // add the handler for Output messages ConsoleHandler outHandler = new ConsoleHandler(); outHandler.setLevel(Level.parse(CATConstants.LOGGER_OUPUT_LEVEL)); outHandler.setFormatter(new Formatter() { /** * This method format log record to show *message only* */ public String format(LogRecord record) { return record.getMessage(); } }); logger.addHandler(outHandler); } catch(SecurityException e) { e.printStackTrace(); } catch(IOException e) { e.printStackTrace(); } } /*************************************************************************** * end setting the logger **************************************************************************/ } public void reInit() throws ResourceInstantiationException { this.init(); } public Resource init() throws ResourceInstantiationException { //list of namespaces to be ignored when creating gazetteer list List<String> nsToIgnore = new ArrayList<String>(); nsToIgnore.add("http://www.w3.org/2002/07/owl#"); nsToIgnore.add("http://www.w3.org/2000/01/rdf-schema#"); nsToIgnore.add("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); logger.info("--------------------------------------\n"); logger.info(" Initializing gazetteer for ontology from location:\n"); logger.info(ontology.getURL().toString()); logger.info("--------------------------------------\n"); long startedInit = System.currentTimeMillis(); List<String> propertiesToIncludeList = new ArrayList<String>(); List<String> propertiesToExcludeList = new ArrayList<String>(); if(tokeniser == null) throw new ResourceInstantiationException("No tokeniser provided!"); if(sentenceSplitter == null) { sentenceSplitter = (FakeSentenceSplitter)Factory .createResource("gate.clone.ql.FakeSentenceSplitter"); } if(posTagger == null) throw new ResourceInstantiationException( "No Part-of-speach Tagger provided!"); if(morpher == null) throw new ResourceInstantiationException( "No Morphological Analyzer provided!"); if(ontology == null) { throw new ResourceInstantiationException("No ontology provided!"); } else { Ontology2MapManager.getInstance().addOntologyToIndex(ontology); } /* set default values if they are not set already */ if(this.useResourceUri == null) useResourceUri = true; if(considerProperties == null) considerProperties = true; if(separateCamelCasedWords == null) separateCamelCasedWords = true; if(considerHeuristicRules == null) considerHeuristicRules = false; fsmStates = new HashSet(); initialState = new FSMState(this); /* set the hidden feature to true */ FeatureMap features = Factory.newFeatureMap(); FeatureMap parameters = Factory.newFeatureMap(); Gate.setHiddenAttribute(features, true); rootFinderApplication = (SerialAnalyserController)Factory.createResource( "gate.creole.SerialAnalyserController", parameters, features); rootFinderApplication.add(tokeniser); rootFinderApplication.add(sentenceSplitter); rootFinderApplication.add(posTagger); rootFinderApplication.add(morpher); /* create a corpus and hide it inside the GATE GUI */ FeatureMap corpusParams = Factory.newFeatureMap(); corpusParams.put("name", this.getClass().getCanonicalName()); FeatureMap corpusFeatures = Factory.newFeatureMap(); Gate.setHiddenAttribute(corpusFeatures, true); applicationCorpus = (Corpus)Factory.createResource("gate.corpora.CorpusImpl", corpusParams, corpusFeatures); rootFinderApplication.setCorpus(applicationCorpus); offsetComparator = new OffsetComparator(); /* * move properties to include and exclude from the list of CSV to the actual * List objects */ if(considerProperties && propertiesToInclude != null && propertiesToExclude != null) { String[] listInclude = propertiesToInclude.split(","); for(String item : listInclude) { if(!"".equals(item.trim())) propertiesToIncludeList.add(item.trim()); } String[] listExclude = propertiesToExclude.split(","); for(String item : listExclude) { if(!"".equals(item.trim())) propertiesToExcludeList.add(item.trim()); } } /* * check validity: if a property is in both 'to be excluded' and 'to be * included' list throw an exception */ if(propertiesToExcludeList.size() > 0 && propertiesToIncludeList.size() > 0) { for(String propertyUri : propertiesToExcludeList) { if(propertiesToIncludeList.contains(propertyUri)) throw new ResourceInstantiationException( "You specified that the same property should be both included and " + "excluded!"); } } if(considerProperties) { /************************************************************************* * instances with all set properties returned in a table with 3 columns: * ... instanceUri, propertyUri, propertyValue [new line] instanceUri, * propertyUri, propertyValue [new line] ... ************************************************************************/ String[] rows = Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()).getListOfInstances().split( CATConstants.NEW_LINE); for(String eachRow : rows) { String[] columns = eachRow.split("\\|"); if(columns.length == 3) { String uri = columns[0].trim(); try { /* create uriURI for validation purposes */ URI uriUri = new URI(uri, false); String propUri = columns[1].trim(); if((propertiesToIncludeList.size() == 0 || propertiesToIncludeList .contains(propUri)) && (propertiesToExcludeList.size() == 0 || !(propertiesToExcludeList .contains(propUri)))) { if(!nsToIgnore.contains(uriUri.getNameSpace())) { String propValue = columns[2].trim(); Map<String, Object> lookupFeatures = new HashMap<String, Object>(); lookupFeatures.put(CATConstants.ONTORES_TYPE, CATConstants.TYPE_INSTANCE); lookupFeatures.put(CATConstants.FEATURE_URI, uri); lookupFeatures.put(CATConstants.FEATURE_PROPERTY_URI, propUri); lookupFeatures.put(CATConstants.FEATURE_PROPERTY_VALUE, propValue); lookupFeatures.put(CATConstants.CLASS_URI_LIST, Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()) .getInstanceTypes().get(uri)); lookupFeatures.put(CATConstants.CLASS_URI, new ArrayList<String>(Ontology2MapManager.getInstance() .getOntology2Map(ontology.getURL().toString()) .getInstanceTypes().get(uri)).get(0)); Lookup aLookup = new Lookup(propValue, "", null, null); aLookup.features = lookupFeatures; allLookups.add(aLookup); }// if uri is in the list of ignored namespaces: nsToIgnore }// end if propertiesToIncludeList==0 ... } catch(InvalidURIException e) { logger.info("URI:'" + uri + "' is not valid. Skipping...\n"); } } } /************************************************************************* * classes with all set properties returned in a table with 3 columns: * classUri, propertyUri, propertyValue * ************************************************************ * ************************************************************************/ rows = Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()).getListOfClasses().split( CATConstants.NEW_LINE); for(String eachRow : rows) { String[] columns = eachRow.split("\\|"); if(columns.length == 3) { String uri = columns[0].trim(); try { URI uriUri = new URI(uri, false); String propUri = columns[1].trim(); if((propertiesToIncludeList.size() == 0 || propertiesToIncludeList .contains(propUri)) && (propertiesToExcludeList.size() == 0 || !(propertiesToExcludeList .contains(propUri)))) { if(!nsToIgnore.contains(uriUri.getNameSpace())) { String propValue = columns[2].trim(); Map<String, Object> lookupFeatures = new HashMap<String, Object>(); lookupFeatures.put(CATConstants.ONTORES_TYPE, CATConstants.TYPE_CLASS); lookupFeatures.put(CATConstants.FEATURE_URI, uri); lookupFeatures.put(CATConstants.FEATURE_PROPERTY_URI, propUri); Lookup aLookup = new Lookup(propValue, "", null, null); aLookup.features = lookupFeatures; allLookups.add(aLookup); }// end if propertiesToIncludeList==0 ... }// if uri is in the list of ignored namespaces: nsToIgnore } catch(InvalidURIException e) { logger.info("URI:'" + uri + "' is not valid.\n"); } } } /************************************************************************* * properties with all set properties returned in a table with 3 columns: * propertyUri, setPropertyUri, propertyValue * ************************************************************ * ************************************************************************/ rows = Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()).getListOfProperties() .split(CATConstants.NEW_LINE); for(String eachRow : rows) { String[] columns = eachRow.split("\\|"); if(columns.length == 3) { String uri = columns[0].trim(); try { URI uriUri = new URI(uri, false); String propUri = columns[1].trim(); if((propertiesToIncludeList.size() == 0 || propertiesToIncludeList .contains(propUri)) && (propertiesToExcludeList.size() == 0 || !(propertiesToExcludeList .contains(propUri)))) { if(!nsToIgnore.contains(uriUri.getNameSpace())) { String propValue = columns[2].trim(); Map<String, Object> lookupFeatures = new HashMap<String, Object>(); lookupFeatures.put(CATConstants.ONTORES_TYPE, CATConstants.TYPE_PROPERTY); lookupFeatures.put(CATConstants.FEATURE_URI, uri); lookupFeatures.put(CATConstants.FEATURE_PROPERTY_URI, propUri); lookupFeatures.put(CATConstants.FEATURE_PROPERTY_VALUE, propValue); Lookup aLookup = new Lookup(propValue, "", null, null); aLookup.features = lookupFeatures; allLookups.add(aLookup); }// end if propertiesToIncludeList==0 ... } } catch(InvalidURIException e) { logger.info("URI:'" + uri + "' is not valid.\n"); } } } }// end consider properties /* uri retrieval */ if(useResourceUri) { /************************************************************************* * class uris ************************************************************************/ String[] rows = Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()).getClassURIs().split( CATConstants.NEW_LINE); for(String eachRow : rows) { String uri = eachRow.trim(); try { URI uriUri = new URI(uri, false); String shortName = uriUri.getResourceName(); if(!nsToIgnore.contains(uriUri.getNameSpace())) { Map<String, Object> lookupFeatures = new HashMap<String, Object>(); lookupFeatures.put(CATConstants.ONTORES_TYPE, CATConstants.TYPE_CLASS); lookupFeatures.put(CATConstants.FEATURE_URI, uri.trim()); Lookup aLookup = new Lookup(shortName, "", null, null); aLookup.features = lookupFeatures; allLookups.add(aLookup); } } catch(InvalidURIException e) { logger.info("URI:" + uri + " is not valid.\n"); } } /************************************************************************* * instance uris ************************************************************************/ Set<String> setOfInstanceTypes = Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()).getInstanceTypes().keySet(); for(String uri : setOfInstanceTypes) { try { URI uriUri = new URI(uri, false); String shortName = uriUri.getResourceName(); if(!nsToIgnore.contains(uriUri.getNameSpace())) { Map<String, Object> lookupFeatures = new HashMap<String, Object>(); lookupFeatures.put(CATConstants.ONTORES_TYPE, CATConstants.TYPE_INSTANCE); lookupFeatures.put(CATConstants.FEATURE_URI, uri); Set<String> l = Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()).getInstanceTypes() .get(uri); lookupFeatures.put(CATConstants.CLASS_URI_LIST, l); lookupFeatures.put(CATConstants.CLASS_URI, new ArrayList<String>(l) .get(0)); Lookup aLookup = new Lookup(shortName, "", null, null); aLookup.features = lookupFeatures; allLookups.add(aLookup); } } catch(InvalidURIException e) { logger.info("URI:" + uri + " is not valid.\n"); } } /************************************************************************* * property uris ************************************************************************/ rows = Ontology2MapManager.getInstance().getOntology2Map( ontology.getURL().toString()).getPropertyURIs().split( CATConstants.NEW_LINE); for(String eachRow : rows) { String uri = eachRow.trim(); try { URI uriUri = new URI(uri, false); String shortName = uriUri.getResourceName(); if(!nsToIgnore.contains(uriUri.getNameSpace())) { Map<String, Object> lookupFeatures = new HashMap<String, Object>(); lookupFeatures.put(CATConstants.ONTORES_TYPE, CATConstants.TYPE_PROPERTY); lookupFeatures.put(CATConstants.FEATURE_URI, uri); Lookup aLookup = new Lookup(shortName, "", null, null); aLookup.features = lookupFeatures; allLookups.add(aLookup); } } catch(InvalidURIException e) { logger.info("URI:" + uri + " is not valid.\n"); } } } addLookups(allLookups); allLookups = new ArrayList<Lookup>(); /* release GATE resources */ Factory.deleteResource(applicationCorpus); applicationCorpus = null; rootFinderApplication.remove(morpher); rootFinderApplication.remove(posTagger); rootFinderApplication.remove(sentenceSplitter); Factory.deleteResource(sentenceSplitter); sentenceSplitter = null; rootFinderApplication.remove(tokeniser); Factory.deleteResource(rootFinderApplication); rootFinderApplication = null; long currentTime = System.currentTimeMillis(); logger.info("OntoRootGaz initialized for:" + (currentTime - startedInit) + " ms"); return this; } /** * This method takes a list of lookups as a parameter, process them and * returns a list of new Lookups that are than added to the gazetteer. * 'Processing' means replacing lookup.list feature with its root. * Additionally during the processing a new list if Lookups is created called * additionalList: this list contains a new Lookups that needs to be processed * by calling this method again afterwards: - if lookup.list contains "-" or * "_", replace these chars by space, add new lookups to the additionalList * and then extract the root in the next call to this method - if * separateCamelCasedWords=true, separate them by adding a space, add new * lookups to the additionalList and then extract the root later - if * considerHeuristicRules=true then separate words as proposed by these rules, * add new lookups to the additionalList and then extract the root later * * @param List * <Lookup> lookups * @throws ResourceInstantiationException */ protected void addLookups(List<Lookup> lookups) throws ResourceInstantiationException { List<Lookup> lookupsToBeAdded = runRootFinderApplication(lookups); List<Lookup> additionalListTemp = new ArrayList<Lookup>(); additionalListTemp.addAll(additionalList); additionalList = new ArrayList<Lookup>(); List<Lookup> addition = runRootFinderApplication(additionalListTemp); List<Lookup> all = new ArrayList<Lookup>(); all.addAll(lookupsToBeAdded); all.addAll(addition); for(Lookup aLookup : all) { String root = listRoots.get(aLookup.list); int hLevel = 0; if(root != null) { /* * check if the root has spaces and if considerHeuristicRules is set to * true, if yes, than split words and add heuristical_level to each */ if(root.contains(" ") && considerHeuristicRules == true) { Lookup aNewLookup = new Lookup(aLookup.list, "", null, null); Map<String, Object> newFeatures = new HashMap<String, Object>(); for(Object key : aLookup.features.keySet()) { newFeatures.put((String)key, aLookup.features.get(key)); } aNewLookup.features = newFeatures; aNewLookup.features.put(CATConstants.FEATURE_HEURISTIC_LEVEL, hLevel); aNewLookup.features.put(CATConstants.FEATURE_HEURISTIC_VALUE, root); addLookup(root.trim(), aNewLookup); logger.info("NEW ENTRY: " + root + "\n"); int firstIndex = root.trim().indexOf(" "); String newRoot = root.trim(); while(firstIndex >= 0) { newRoot = newRoot.substring(firstIndex + 1, newRoot.length()); hLevel++; Lookup anotherLookup = new Lookup(aLookup.list, "", null, null); Map<String, Object> anotherFeatures = new HashMap<String, Object>(); for(Object key : aLookup.features.keySet()) { anotherFeatures.put((String)key, aLookup.features.get(key)); } anotherLookup.features = anotherFeatures; anotherLookup.features.put(CATConstants.FEATURE_HEURISTIC_LEVEL, hLevel); anotherLookup.features.put(CATConstants.FEATURE_HEURISTIC_VALUE, newRoot.trim()); addLookup(newRoot.trim(), anotherLookup); logger.info("NEW ENTRY: " + newRoot + "\n"); firstIndex = newRoot.trim().indexOf(" "); } } else {// if it doesn't have spaces or // considerHeuristicRules=false aLookup.features.put(CATConstants.FEATURE_HEURISTIC_LEVEL, 0); addLookup(root.trim(), aLookup); logger.info("NEW ENTRY: " + root + "\n"); } } else { logger.info("root is null for lookup:" + aLookup); } } } /* * this list is populated during the processing of all lookups, when some * entries have multiple interpretations; for example, when processing * Project-Name, 'Project-Name' would be added in the first iteration, while * 'Project Name' would be added to the additionalList for later processing */ List<Lookup> additionalList = new ArrayList<Lookup>(); /** * This method process given lookups so that their entries are converted to * the root of the entry i.e. lookup.list is processed and 'root' feature is * used to be lookup.list for resulting lookups. All unprocessed lookups are * added to the additionalList and they are processed later with the same * method */ private List<Lookup> runRootFinderApplication(List<Lookup> lookups) throws ResourceInstantiationException { List<Lookup> lookupsToBeReturned = new ArrayList<Lookup>(); for(Lookup lookup : lookups) { String list = lookup.list; if(list != null && list.trim().length() > 0) { if(list.contains("_")) { String newText = list.replace('_', ' '); Lookup aLookup = new Lookup(newText, "", null, null); aLookup.features = lookup.features; additionalList.add(aLookup); } // if text is camel cased add space between words if(separateCamelCasedWords && list.indexOf(" ") < 0) { String separatedCamelCase = ExpressionFinder.findAndSeparateCamelCases(list, CATConstants.REGEX_CAMEL_CASE, " "); if(list != null && (!list.equals(separatedCamelCase))) { Lookup aLookup = new Lookup(separatedCamelCase, "", null, null); aLookup.features = lookup.features; additionalList.add(aLookup); } } lookupsToBeReturned.add(lookup); /* set new documents to be hidden inside the GATE GUI */ FeatureMap docParams = Factory.newFeatureMap(); docParams.put("stringContent", list); FeatureMap docFeatures = Factory.newFeatureMap(); Gate.setHiddenAttribute(docFeatures, true); Document aDocument = null; try { aDocument = (Document)Factory.createResource("gate.corpora.DocumentImpl", docParams, docFeatures); applicationCorpus.add(aDocument); rootFinderApplication.execute(); } catch(ExecutionException ee) { throw new ResourceInstantiationException(ee); } Iterator it = applicationCorpus.iterator(); while(it.hasNext()) { Document doc = (Document)it.next(); Set<String> tokenTypes = new HashSet<String>(); tokenTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE); tokenTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE); List<Annotation> tokenList = new ArrayList<Annotation>(aDocument.getAnnotations().get( tokenTypes)); Collections.sort(tokenList, offsetComparator); StringBuffer rootForText = new StringBuffer(""); boolean lastAnnWasSpace = false; for(Annotation ann : tokenList) { if(ann.getType().equals(ANNIEConstants.TOKEN_ANNOTATION_TYPE)) { lastAnnWasSpace = false; String category = (String)ann.getFeatures().get( ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME); /* * category "IN" means it is a preposition, and these are used to * be a stop words, so crop everything afterwards, but ONLY if * parameter considerHeuristicRules is set to be true */ if(considerHeuristicRules == true && category.equals("IN")) { break; } else { String root = (String)ann.getFeatures().get("root"); if(root != null) { rootForText.append(root); } else { throw new ResourceInstantiationException( "No root found for annotation " + ann.toString()); } } } else if(ann.getType().equals( ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE)) { if(!lastAnnWasSpace) { rootForText.append(' '); } lastAnnWasSpace = true; } else { // malfunction throw new ResourceInstantiationException( "Invalid annotation type: " + ann); } } listRoots.put(doc.getContent().toString(), rootForText.toString()); } applicationCorpus.clear(); Factory.deleteResource(aDocument); aDocument = null; } } return lookupsToBeReturned; } public Morph getMorpher() { return morpher; } public void setMorpher(Morph morpher) { this.morpher = morpher; } public POSTagger getPosTagger() { return posTagger; } public void setPosTagger(POSTagger posTagger) { this.posTagger = posTagger; } public DefaultTokeniser getTokeniser() { return tokeniser; } public void setTokeniser(DefaultTokeniser tokeniser) { this.tokeniser = tokeniser; } public Ontology getOntology() { return ontology; } public void setOntology(Ontology ontology) { this.ontology = ontology; } public Boolean getConsiderProperties() { return considerProperties; } public void setConsiderProperties(Boolean considerProperties) { this.considerProperties = considerProperties; } public Boolean getUseResourceUri() { return useResourceUri; } public void setUseResourceUri(Boolean useResourceUri) { this.useResourceUri = useResourceUri; } /** * @return the separateCamelCasedWords */ public Boolean getSeparateCamelCasedWords() { return separateCamelCasedWords; } /** * @param separateCamelCasedWords * the separateCamelCasedWords to set */ public void setSeparateCamelCasedWords(Boolean separateCamelCasedWords) { this.separateCamelCasedWords = separateCamelCasedWords; } /** * @return the propertiesToExclude */ public String getPropertiesToExclude() { return propertiesToExclude; } /** * @param propertiesToExclude * the propertiesToExclude to set */ public void setPropertiesToExclude(String propertiesToExclude) { this.propertiesToExclude = propertiesToExclude; } /** * @return the propertiesToInclude */ public String getPropertiesToInclude() { return propertiesToInclude; } /** * @param propertiesToInclude * the propertiesToInclude to set */ public void setPropertiesToInclude(String propertiesToInclude) { this.propertiesToInclude = propertiesToInclude; } /** * * @return */ public Boolean getConsiderHeuristicRules() { return considerHeuristicRules; } /** * * @param considerHeuristicRules */ public void setConsiderHeuristicRules(Boolean considerHeuristicRules) { this.considerHeuristicRules = considerHeuristicRules; } /** * Gets the linear definition of the gazetteer. This method is added so that * Gaze does not complain when rendering views and showing initialisation * parameters. * * @return the linear definition of the gazetteer */ public LinearDefinition getLinearDefinition() { return new LinearDefinition(); } }