package com.ontotext.russie.morph; import gate.Factory; import gate.FeatureMap; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; /** * LemmaImpl.java Implementation of the Lemma inteface that contains the * wordforms of a word with their morpho-syntactic types. NOTE: The current * implementation considers that the types are distinct in a single lemma. * <p> * Title: RussIE * </p> * <p> * Description: Russian Information Extraction based on GATE * </p> * <p> * Copyright: Copyright (c) 2003 * </p> * <p> * Company: Ontotext Lab. * </p> * * @author borislav popov * @version 1.0 */ public class LemmaImpl implements Lemma { /** the main form in the lemma */ private String mainForm; /** the type of the main form in the lemma */ private String mainFormType; private String annotationType = ""; private FeatureMap fm = Factory.newFeatureMap(); public String getAnnotationType() { return annotationType; } public void setAnnotationType(String type) { annotationType = type; } public FeatureMap getFeatureMap() { return fm; } public void setFeatureMap(FeatureMap fm) { this.fm = fm; } /** the so-called root of the lemma */ private String root; /** map of types vs word-form suffixes */ private Map<String, String> typeVsSuffix; /** the nest of suffixes */ private SuffixNest suffixNest = new SuffixNestImpl(); public LemmaImpl() { typeVsSuffix = new HashMap<String, String>(); } /** * Sets the main word-form with its type. * * @param wf * word form * @param type * the type of the word form */ public void setMainForm(String wf, String type) { mainForm = wf; type = TypePool.getDistinctType(type); mainFormType = type; addWordForm(wf, type); } // setMainForm(wf,type) /** * Fetch the main word-form. * * @return the main word-form */ public String getMainForm() { return mainForm; } /** * Fetch the main word-form type. * * @return the main word-form type */ public String getMainFormType() { return mainFormType; } /** * Fetch the root of the lemma. * * @return the root of the lemma */ public String getRoot() { return root; } /** * Adds a word form with its type * * @param wf * word-form * @param type * the type of the word-form */ public void addWordForm(String wf, String type) { // fit the new wf to the root and retain wf suffix. String suffix = adjustRoot(wf); if(suffixNest == null) { suffixNest = new SuffixNestImpl(); } type = TypePool.getDistinctType(type); suffixNest.add(suffix, type); typeVsSuffix.put(type, suffix); } /** * Get word-form by type. * * @param type * the word-form type * @return the word-form that has this type */ public String getWordForm(String type) { return root + typeVsSuffix.get(type); } /** * Get suffix by type. * * @param type * the word-form type * @return the suffix that has this type */ public String getSuffix(String type) { return typeVsSuffix.get(type); } /** * Get the set of types relevant to a word-form * * @param wf * the word-form * @return the set of types relevant to the word-form */ public Set<String> getTypeByWF(String wf) { return suffixNest.getType(wf.substring(root.length())); } /** * Get the set of types relevant to a word-form suffix * * @param suffix * the suffix of the wf * @return the set of types relevant to the word-form with this suffix */ public Set<String> getTypeBySuffix(String suffix) { return suffixNest.getType(suffix); } /** * Fetch a set of the word-forms in the lemma. * * @return the word-forms in this lemma */ public Set<String> getWordForms() { Set<String> sufs = suffixNest.getSuffixes(); Iterator<String> it = sufs.iterator(); Set<String> wfs = new HashSet<String>(); while(it.hasNext()) { wfs.add(root + it.next()); } return wfs; } // / getWordForms() /** * Fetch a set of the word-form suffixes in the lemma according to the root. * * @return the word-form suffixes in this lemma */ public Set<String> getSuffixes() { return suffixNest.getSuffixes(); } // / getSuffixes() /** * Fetch the set of word-form types in the lemma * * @return the set of word-form types in the lemma */ public Set<String> getTypes() { return typeVsSuffix.keySet(); } /** * It is needed to adjust inflation suffixes while dynamically building the * root. adds to the suffixes in the internal representation. * * @param prefix * the prefix to be added */ private void add2Suffixes(String prefix) { if(prefix == null || prefix.length() == 0) return; Iterator<String> ki = typeVsSuffix.keySet().iterator(); String type; String suffix; while(ki.hasNext()) { type = ki.next(); suffix = typeVsSuffix.get(type); typeVsSuffix.put(type, prefix + suffix); } // while keys suffixNest.addPrefix2Suffixes(prefix); } // add2Suffixes(prefix) /** * Adjusts the root according to a new word-form and returns the suffix of the * wf according to this new root. Accordingly adjusts the other suffixes in * the lemma if the root changes. * * @param wf * the new word-form * @return the suffix according to the new root */ private String adjustRoot(String wf) { String suffix = ""; // the common suffix as a part of the root for the wforms so far. String rootSuffix = ""; if(root == null) { root = wf; return ""; } if(root.length() < wf.length()) { suffix = wf.substring(root.length()); wf = wf.substring(0, root.length()); } else { if(root.length() > wf.length()) { rootSuffix = root.substring(wf.length()); root = root.substring(0, wf.length()); } } // else while(!root.equals(wf)) { rootSuffix = root.charAt(root.length() - 1) + rootSuffix; root = root.substring(0, root.length() - 1); suffix = wf.charAt(wf.length() - 1) + suffix; wf = wf.substring(0, wf.length() - 1); } // while root is found add2Suffixes(rootSuffix); return suffix; }// adjustRoot(wf) /** * Compares roots, suffixes and set of types - if all equal - considers the * objects are equal */ public boolean equals(Object obj) { if(!(obj instanceof Lemma)) return false; Lemma l2 = (Lemma)obj; if(!l2.getRoot().equals(this.getRoot())) return false; if(!l2.getSuffixes().equals(this.getSuffixes())) return false; if(!l2.getTypes().equals(this.getTypes())) return false; return true; } // equals(obj) /** * Get the suffix nest associated with this lemma. * * @return the nest */ public SuffixNest getSuffixNest() { return suffixNest; } /** * Synchronizes the current nest with the pool of nests and sets the unique * nest to the lemma. To be called after finishing the incremental uploading * of suffixes and types to the lemma */ public void synchWithSuffixPool() { suffixNest = SuffixPool.getDistinctNestAs(suffixNest); } /** * Calculates the difference between the specified name and this lemma in * terms of finding a common root and returns the count of characters * different at the suffix of the name compared to the lemma. * * @param name * @return */ public int difference2(String name) { int diff = 0; String mf = this.mainForm; if(mf.length() < name.length()) { diff = name.length() - mf.length(); name = name.substring(0, mf.length()); } else { if(mf.length() > name.length()) { diff = mf.length() - name.length(); mf = mf.substring(0, name.length()); } } // at this point both mf and name are with equal length // and diff has been accumulated while(!mf.equals(name)) { mf = mf.substring(0, mf.length() - 1); name = name.substring(0, name.length() - 1); diff++; } // return diff; } // difference2 } // class LemmaImpl