Log in Help
Print
Homereleasesgate-8.4-build5748-ALLpluginsTermRaidersrcgatetermraiderbank 〉 HyponymyTermbank.java
 
/*
 *  Copyright (c) 2008--2014, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: HyponymyTermbank.java 18970 2015-10-26 14:46:14Z adamfunk $
 */
package gate.termraider.bank;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.FeatureMap;
import gate.Utils;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.gui.ActionsPublisher;
import gate.termraider.modes.Normalization;
import gate.termraider.util.ScoreType;
import gate.termraider.util.Term;
import gate.termraider.util.Utilities;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;



@CreoleResource(name = "HyponymyTermbank",
        icon = "termbank-lr.png",
        comment = "TermRaider Termbank derived from head/string hyponymy",
        helpURL = "http://gate.ac.uk/userguide/sec:creole:termraider:hyponymy")

public class HyponymyTermbank extends AbstractTermbank
    implements ActionsPublisher  {

  private static final long serialVersionUID = -2382834479385875682L;

  
  /* EXTRA CREOLE PARAMETERS */
  protected List<String> inputHeadFeatures;
  private Normalization normalization;

  
  /* EXTRA DATA FOR ANALYSIS */
  private Map<Term, Set<String>> termHeads;
  private Map<Term, Set<String>> termHyponyms;
  private ScoreType termFrequencyST, hyponymsST, localDocFrequencyST, rawScoreST;

  
  /* Methods for the debugging GUI to get the data   */
  public Map<Term, Set<String>> getTermHeads() {
    return this.termHeads;
  }

  public Map<Term, Set<String>> getTermHyponyms() {
    return this.termHyponyms;
  }

  
  
  private double calculateOneRawScore(Term term) {
    Integer hyponyms = Utilities.getStringSetFromMap(termHyponyms, term).size();
    Integer docFreq = Utilities.getStringSetFromMap(termDocuments, term).size();
    Utilities.setScoreTermValue(scores, hyponymsST, term, hyponyms);
    return docFreq.doubleValue() * (1.0F + hyponyms.doubleValue());
  }

  
  protected void processDocument(Document document, int index) {
    documentCount++;
    String documentSource = Utilities.docIdentifier(document, idDocumentFeature, index);
    AnnotationSet candidates = document.getAnnotations(inputASName).get(inputAnnotationTypes);
    
    for (Annotation candidate : candidates) {
      Term term = makeTerm(candidate, document);

      FeatureMap features = candidate.getFeatures();
      String head = Utils.stringFor(document, candidate);
      
      for (String key : inputHeadFeatures) {
        if (features.containsKey(key)) {
          head = features.get(key).toString();
          break;
        }
      }
      
      Utilities.addToMapSet(termDocuments, term, documentSource);
      Utilities.addToMapSet(termHeads, term, head);
      Utilities.incrementScoreTermValue(scores, termFrequencyST, term, 1);
    }
  }

  
  
  public void calculateScores() {
    Set<Term> terms = termHeads.keySet();
    Set<String> headsI, headsJ;
    
    for (Term termI : terms) {
      headsI = termHeads.get(termI);
      
      for (Term termJ : terms) {
        if (termJ.getTermString().contains(termI.getTermString())
                && (! termI.equals(termJ))) {
          headsJ = termHeads.get(termJ);
          
          hyponymLoop:
            for (String headI : headsI) {
              for (String headJ : headsJ) {
                if (headI.endsWith(headJ)) {
                  Utilities.addToMapSet(termHyponyms, termI, termJ.getTermString());
                  break hyponymLoop;
                }
              }
            }
        }
      }
    }
    
    for (Term term : terms) {
      this.languages.add(term.getLanguageCode());
      this.types.add(term.getType());
      
      double rawScore = calculateOneRawScore(term);
      double normalized = Normalization.calculate(normalization, rawScore);
      Utilities.setScoreTermValue(scores, rawScoreST, term, rawScore);
      Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, normalized);
      int localDF = this.termDocuments.get(term).size();
      Utilities.setScoreTermValue(scores, localDocFrequencyST, term, localDF);
    }
    
    if (debugMode) {
      System.out.println("Termbank: nbr of terms = " + termDocuments.size());
    }
  }
  
  
  protected void resetScores() {
    scores = new HashMap<ScoreType, Map<Term,Number>>();
    for (ScoreType st : scoreTypes) {
      scores.put(st, new HashMap<Term, Number>());
    }
    termHeads       = new HashMap<Term, Set<String>>();
    termHyponyms    = new HashMap<Term, Set<String>>();
    termDocuments   = new HashMap<Term, Set<String>>();
    languages = new HashSet<String>();
    types = new HashSet<String>();
  }

  
  protected void initializeScoreTypes() {
    this.scoreTypes = new ArrayList<ScoreType>();
    this.scoreTypes.add(new ScoreType(scoreProperty));
    this.rawScoreST = new ScoreType(scoreProperty + RAW_SUFFIX);
    this.scoreTypes.add(rawScoreST);
    this.termFrequencyST = new ScoreType("termFrequency");
    this.scoreTypes.add(termFrequencyST);
    this.hyponymsST = new ScoreType("hyponymCount");
    this.scoreTypes.add(hyponymsST);
    this.localDocFrequencyST = new ScoreType("localDocFrequency");
    this.scoreTypes.add(localDocFrequencyST);
  }

  
  
  /***** CREOLE PARAMETERS *****/

  @CreoleParameter(comment = "Annotation features (in order) to be scanned as terms' heads")
  public void setInputHeadFeatures(List<String> list) {
    this.inputHeadFeatures = list;
  }
  
  public List<String> getInputHeadFeatures() {
    return this.inputHeadFeatures;
  }
  
  
  /* override default value from AbstractTermbank   */
  @CreoleParameter(defaultValue = "kyotoDomainRelevance")
  public void setScoreProperty(String name) {
    super.setScoreProperty(name);
  }

  
  @CreoleParameter(comment = "score normalization",
          defaultValue = "Sigmoid")
  public void setNormalization(Normalization mode) {
    this.normalization = mode;
  }
  
  public Normalization getNormalization() {
    return this.normalization;
  }


  @Override
  public Map<String, String> getMiscDataForGui() {
    Map<String, String> result = new HashMap<String, String>();
    result.put("nbr of local documents", String.valueOf(this.documentCount));
    result.put("nbr of terms", String.valueOf(this.getDefaultScores().size()));
    return result;
  }

}