Log in Help
Print
HomegatepluginsLang_Russiansrccomontotextrussiemorph 〉 LemmaImpl.java
 
package com.ontotext.russie.morph;

import gate.Factory;
import gate.FeatureMap;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
 * LemmaImpl.java Implementation of the Lemma inteface that contains the
 * wordforms of a word with their morpho-syntactic types. NOTE: The current
 * implementation considers that the types are distinct in a single lemma.
 * <p>
 * Title: RussIE
 * </p>
 * <p>
 * Description: Russian Information Extraction based on GATE
 * </p>
 * <p>
 * Copyright: Copyright (c) 2003
 * </p>
 * <p>
 * Company: Ontotext Lab.
 * </p>
 * 
 * @author borislav popov
 * @version 1.0
 */
public class LemmaImpl implements Lemma {

  /** the main form in the lemma */
  private String mainForm;

  /** the type of the main form in the lemma */
  private String mainFormType;

  private String annotationType = "";

  private FeatureMap fm = Factory.newFeatureMap();

  public String getAnnotationType() {
    return annotationType;
  }

  public void setAnnotationType(String type) {
    annotationType = type;
  }

  public FeatureMap getFeatureMap() {
    return fm;
  }

  public void setFeatureMap(FeatureMap fm) {
    this.fm = fm;
  }

  /** the so-called root of the lemma */
  private String root;

  /** map of types vs word-form suffixes */
  private Map<String, String> typeVsSuffix;

  /** the nest of suffixes */
  private SuffixNest suffixNest = new SuffixNestImpl();

  public LemmaImpl() {
    typeVsSuffix = new HashMap<String, String>();
  }

  /**
   * Sets the main word-form with its type.
   * 
   * @param wf
   *          word form
   * @param type
   *          the type of the word form
   */
  public void setMainForm(String wf, String type) {
    mainForm = wf;
    type = TypePool.getDistinctType(type);
    mainFormType = type;
    addWordForm(wf, type);
  } // setMainForm(wf,type)

  /**
   * Fetch the main word-form.
   * 
   * @return the main word-form
   */
  public String getMainForm() {
    return mainForm;
  }

  /**
   * Fetch the main word-form type.
   * 
   * @return the main word-form type
   */
  public String getMainFormType() {
    return mainFormType;
  }

  /**
   * Fetch the root of the lemma.
   * 
   * @return the root of the lemma
   */
  public String getRoot() {
    return root;
  }

  /**
   * Adds a word form with its type
   * 
   * @param wf
   *          word-form
   * @param type
   *          the type of the word-form
   */
  public void addWordForm(String wf, String type) {

    // fit the new wf to the root and retain wf suffix.
    String suffix = adjustRoot(wf);

    if(suffixNest == null) {
      suffixNest = new SuffixNestImpl();
    }

    type = TypePool.getDistinctType(type);
    suffixNest.add(suffix, type);

    typeVsSuffix.put(type, suffix);
  }

  /**
   * Get word-form by type.
   * 
   * @param type
   *          the word-form type
   * @return the word-form that has this type
   */
  public String getWordForm(String type) {
    return root + typeVsSuffix.get(type);
  }

  /**
   * Get suffix by type.
   * 
   * @param type
   *          the word-form type
   * @return the suffix that has this type
   */
  public String getSuffix(String type) {
    return typeVsSuffix.get(type);
  }

  /**
   * Get the set of types relevant to a word-form
   * 
   * @param wf
   *          the word-form
   * @return the set of types relevant to the word-form
   */
  public Set<String> getTypeByWF(String wf) {
    return suffixNest.getType(wf.substring(root.length()));
  }

  /**
   * Get the set of types relevant to a word-form suffix
   * 
   * @param suffix
   *          the suffix of the wf
   * @return the set of types relevant to the word-form with this suffix
   */
  public Set<String> getTypeBySuffix(String suffix) {
    return suffixNest.getType(suffix);
  }

  /**
   * Fetch a set of the word-forms in the lemma.
   * 
   * @return the word-forms in this lemma
   */
  public Set<String> getWordForms() {
    Set<String> sufs = suffixNest.getSuffixes();
    Iterator<String> it = sufs.iterator();
    Set<String> wfs = new HashSet<String>();
    while(it.hasNext()) {
      wfs.add(root + it.next());
    }
    return wfs;
  } // / getWordForms()

  /**
   * Fetch a set of the word-form suffixes in the lemma according to the root.
   * 
   * @return the word-form suffixes in this lemma
   */
  public Set<String> getSuffixes() {
    return suffixNest.getSuffixes();
  } // / getSuffixes()

  /**
   * Fetch the set of word-form types in the lemma
   * 
   * @return the set of word-form types in the lemma
   */
  public Set<String> getTypes() {
    return typeVsSuffix.keySet();
  }

  /**
   * It is needed to adjust inflation suffixes while dynamically building the
   * root. adds to the suffixes in the internal representation.
   * 
   * @param prefix
   *          the prefix to be added
   */
  private void add2Suffixes(String prefix) {
    if(prefix == null || prefix.length() == 0) return;

    Iterator<String> ki = typeVsSuffix.keySet().iterator();
    String type;
    String suffix;
    while(ki.hasNext()) {
      type = ki.next();
      suffix = typeVsSuffix.get(type);
      typeVsSuffix.put(type, prefix + suffix);
    } // while keys

    suffixNest.addPrefix2Suffixes(prefix);
  } // add2Suffixes(prefix)

  /**
   * Adjusts the root according to a new word-form and returns the suffix of the
   * wf according to this new root. Accordingly adjusts the other suffixes in
   * the lemma if the root changes.
   * 
   * @param wf
   *          the new word-form
   * @return the suffix according to the new root
   */
  private String adjustRoot(String wf) {
    String suffix = "";

    // the common suffix as a part of the root for the wforms so far.
    String rootSuffix = "";

    if(root == null) {
      root = wf;
      return "";
    }
    if(root.length() < wf.length()) {
      suffix = wf.substring(root.length());
      wf = wf.substring(0, root.length());
    } else {
      if(root.length() > wf.length()) {
        rootSuffix = root.substring(wf.length());
        root = root.substring(0, wf.length());
      }
    } // else

    while(!root.equals(wf)) {
      rootSuffix = root.charAt(root.length() - 1) + rootSuffix;

      root = root.substring(0, root.length() - 1);

      suffix = wf.charAt(wf.length() - 1) + suffix;

      wf = wf.substring(0, wf.length() - 1);

    } // while root is found

    add2Suffixes(rootSuffix);

    return suffix;
  }// adjustRoot(wf)

  /**
   * Compares roots, suffixes and set of types - if all equal - considers the
   * objects are equal
   */
  public boolean equals(Object obj) {
    if(!(obj instanceof Lemma)) return false;
    Lemma l2 = (Lemma)obj;
    if(!l2.getRoot().equals(this.getRoot())) return false;
    if(!l2.getSuffixes().equals(this.getSuffixes())) return false;
    if(!l2.getTypes().equals(this.getTypes())) return false;
    return true;
  } // equals(obj)

  /**
   * Get the suffix nest associated with this lemma.
   * 
   * @return the nest
   */
  public SuffixNest getSuffixNest() {
    return suffixNest;
  }

  /**
   * Synchronizes the current nest with the pool of nests and sets the unique
   * nest to the lemma. To be called after finishing the incremental uploading
   * of suffixes and types to the lemma
   */
  public void synchWithSuffixPool() {
    suffixNest = SuffixPool.getDistinctNestAs(suffixNest);
  }

  /**
   * Calculates the difference between the specified name and this lemma in
   * terms of finding a common root and returns the count of characters
   * different at the suffix of the name compared to the lemma.
   * 
   * @param name
   * @return
   */
  public int difference2(String name) {
    int diff = 0;
    String mf = this.mainForm;
    if(mf.length() < name.length()) {
      diff = name.length() - mf.length();
      name = name.substring(0, mf.length());
    } else {
      if(mf.length() > name.length()) {
        diff = mf.length() - name.length();
        mf = mf.substring(0, name.length());
      }
    } // at this point both mf and name are with equal length
    // and diff has been accumulated
    while(!mf.equals(name)) {
      mf = mf.substring(0, mf.length() - 1);
      name = name.substring(0, name.length() - 1);
      diff++;
    } //

    return diff;
  } // difference2

} // class LemmaImpl