Log in Help
Print
Homereleasesgate-7.1-build4485-ALLpluginsTermRaidersrcgatetermraiderbank 〉 HyponymyTermbank.java
 
/*
 *  Copyright (c) 2008--2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: HyponymyTermbank.java 16337 2012-11-27 14:42:40Z adamfunk $
 */
package gate.termraider.bank;

import gate.creole.metadata.*;
import gate.gui.ActionsPublisher;
import gate.*;
import gate.termraider.util.*;
import java.util.*;



@CreoleResource(name = "HyponymyTermbank",
        icon = "termbank-lr.png",
        comment = "TermRaider Termbank derived from head/string hyponymy")

public class HyponymyTermbank extends AbstractTermbank
    implements ActionsPublisher  {
  private static final long serialVersionUID = 6846311108437600048L;
  

  
  /* EXTRA CREOLE PARAMETERS */
  protected List<String> inputHeadFeatures;

  
  /* EXTRA DATA FOR ANALYSIS */
  private Map<Term, Set<String>> termHeads;
  private Map<Term, Set<String>> termHyponyms;

  
  /* Methods for the debugging GUI to get the data   */
  public Map<Term, Set<String>> getTermHeads() {
    return this.termHeads;
  }

  public Map<Term, Set<String>> getTermHyponyms() {
    return this.termHyponyms;
  }

  
  
  private double calculateOneRawScore(Term term) {
    double docFreq = (double) getSetFromMap(termDocuments, term).size();
    double hyponyms = (double) getSetFromMap(termHyponyms, term).size();
    return docFreq * (1.0F + hyponyms);
  }

  
  protected void addData(Document document) {
    String documentSource = Utilities.sourceOrName(document);
    AnnotationSet candidates = document.getAnnotations(inputASName).get(inputAnnotationTypes);
    
    for (Annotation candidate : candidates) {
      Term term = makeTerm(candidate, document);

      FeatureMap features = candidate.getFeatures();
      String head = Utils.stringFor(document, candidate);
      
      for (String key : inputHeadFeatures) {
        if (features.containsKey(key)) {
          head = features.get(key).toString();
          break;
        }
      }
      
      addToMapSet(termDocuments, term, documentSource);
      addToMapSet(termHeads, term, head);
      incrementTermFreq(term, 1);
    }
  }

  
  private void addToMapSet(Map<Term, Set<String>> map, Term key, String value) {
    Set<String> valueSet;
    if (map.containsKey(key)) {
      valueSet = map.get(key);
    }
    else {
      valueSet = new HashSet<String>();
    }
    
    valueSet.add(value);
    map.put(key, valueSet);
  }
  
  private Set<String> getSetFromMap(Map<Term, Set<String>> map, Term key) {
    if (map.containsKey(key)) {
      return map.get(key);
    }
    
    //implied else
    Set<String> valueSet = new HashSet<String>();
    map.put(key, valueSet);
    return valueSet;
  }
  
  

  public void calculateScores() {
    Set<Term> terms = termHeads.keySet();
    Set<String> headsI, headsJ;
    
    for (Term termI : terms) {
      headsI = termHeads.get(termI);
      
      for (Term termJ : terms) {
        if (termJ.getTermString().contains(termI.getTermString())
                && (! termI.equals(termJ))) {
          headsJ = termHeads.get(termJ);
          
          hyponymLoop:
            for (String headI : headsI) {
              for (String headJ : headsJ) {
                if (headI.endsWith(headJ)) {
                  addToMapSet(termHyponyms, termI, termJ.getTermString());
                  break hyponymLoop;
                }
              }
            }
        }
      }
    }
    
    for (Term term : terms) {
      double rawScore = calculateOneRawScore(term);
      rawTermScores.put(term, rawScore);
      double score = Utilities.normalizeScore(rawScore);
      termScores.put(term, score);
    }
    
    termsByDescendingScore = new ArrayList<Term>(termScores.keySet());
    Collections.sort(termsByDescendingScore, new TermComparatorByDescendingScore(termScores));
    
    termsByDescendingFrequency = new ArrayList<Term>(termScores.keySet());
    Collections.sort(termsByDescendingFrequency, new TermComparatorByDescendingScore(termFrequencies));
    
    termsByDescendingDocFrequency = new ArrayList<Term>(termScores.keySet());
    Collections.sort(termsByDescendingFrequency, new TermComparatorByDescendingScore(docFrequencies));
    
    if (debugMode) {
      System.out.println("Termbank: nbr of terms = " + termsByDescendingScore.size());
    }
  }
  
  
  protected void resetScores() {
    termHeads       = new HashMap<Term, Set<String>>();
    termHyponyms    = new HashMap<Term, Set<String>>();
    termDocuments   = new HashMap<Term, Set<String>>();
    termScores      = new HashMap<Term, Double>();
    rawTermScores   = new HashMap<Term, Double>();
    termsByDescendingScore     = new ArrayList<Term>();
    termsByDescendingFrequency = new ArrayList<Term>();
    termsByDescendingDocFrequency = new ArrayList<Term>();
    termFrequencies = new HashMap<Term, Integer>();
    docFrequencies = new HashMap<Term, Integer>();
  }

  
  /***** CREOLE PARAMETERS *****/

  @CreoleParameter(comment = "Annotation features (in order) to be scanned as terms' heads")
  public void setInputHeadFeatures(List<String> list) {
    this.inputHeadFeatures = list;
  }
  
  public List<String> getInputHeadFeatures() {
    return this.inputHeadFeatures;
  }
  
  
  /* override default value from AbstractTermbank   */
  @CreoleParameter(defaultValue = "kyotoDomainRelevance")
  public void setScoreProperty(String name) {
    super.setScoreProperty(name);
  }


}