Log in Help
Print
Homereleasesgate-8.4-build5748-ALLpluginsTermRaidersrcgatetermraiderbank 〉 AbstractPairbank.java
 
/*
 *  Copyright (c) 2008--2014, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: AbstractPairbank.java 18970 2015-10-26 14:46:14Z adamfunk $
 */
package gate.termraider.bank;

import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.Resource;
import gate.creole.ResourceInstantiationException;
import gate.gui.ActionsPublisher;
import gate.termraider.gui.ActionSaveCsv;
import gate.termraider.output.PairCsvGenerator;
import gate.termraider.util.UnorderedTermPair;
import gate.util.GateException;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import javax.swing.Action;



public abstract class AbstractPairbank extends AbstractBank
    implements ActionsPublisher {

  private static final long serialVersionUID = 3544077331310241919L;

  protected transient List<Action> actionsList;
  
  protected Map<UnorderedTermPair, Double> scores;
  protected Map<UnorderedTermPair, Set<String>> documents;
  protected Map<UnorderedTermPair, Integer> pairCount;



  public Resource init() throws ResourceInstantiationException {
    prepare();
    resetScores();
    processCorpora();
    calculateScores();
    return this;
  }
  

  public void cleanup() {
    super.cleanup();
  }

  public Set<UnorderedTermPair> getPairs() {
    return this.pairCount.keySet();
  }
  
  public Map<UnorderedTermPair, Double> getScores() {
    return this.scores;
  }
  
  public Map<UnorderedTermPair, Set<String>> getDocuments() {
    return this.documents;
  }
  
  public int getDocumentCount(UnorderedTermPair pair) {
    if (this.documents.containsKey(pair)) {
      return this.documents.get(pair).size();
    }
    
    return 0;
  }
  
  public int getPairCount(UnorderedTermPair pair) {
    if (this.pairCount.containsKey(pair)) {
      return this.pairCount.get(pair);
    }
    // implied else
    return 0;
  }
  
  
  public Double getMinScore() {
    if (this.scores.isEmpty()) {
      return 0.0;
    }
    // implied else
    return Collections.min(this.scores.values());
  }
  
  public Double getMaxScore() {
    if (this.scores.isEmpty()) {
      return 0.0;
    }
    // implied else
    return Collections.max(this.scores.values());
  }
  
  
  protected void prepare() throws ResourceInstantiationException {
    if ( (corpora == null) || (corpora.size() == 0) ) {
      throw new ResourceInstantiationException("No corpora given");
    }
  }
  
  protected void createActions() {
    actionsList = new ArrayList<Action>();
    actionsList.add(new ActionSaveCsv("Save as CSV...", this));
  }
  
  
  protected void processCorpora() {
    for (Corpus corpus : corpora) {
      processCorpus(corpus);
      if (debugMode) {
        System.out.println("Pairbank " + this.getName() + ": added corpus " + corpus.getName() + " with " + corpus.size() + " documents");
      }
    }
    scanTypesAndLanguages();
  }
  
  
  private void scanTypesAndLanguages() {
    this.types = new TreeSet<String>();
    this.languages = new TreeSet<String>();
    for (UnorderedTermPair pair : this.pairCount.keySet()) {
      this.languages.add(pair.getTerm0().getLanguageCode());
      this.languages.add(pair.getTerm1().getLanguageCode());
      this.types.add(pair.getTerm0().getType());
      this.types.add(pair.getTerm1().getType());
    }
  }

  
  protected void processCorpus(Corpus corpus) {
    for (int i=0 ; i < corpus.size() ; i++) {
      boolean wasLoaded = corpus.isDocumentLoaded(i);
      Document document = (Document) corpus.get(i);
      
      addData(document, i);

      // datastore safety
      if (! wasLoaded) {
        corpus.unloadDocument(document);
        Factory.deleteResource(document);
      }
    }
  }
  
  
  protected void resetScores() {
    this.documents = new HashMap<UnorderedTermPair, Set<String>>();
    this.scores = new HashMap<UnorderedTermPair, Double>();
    this.pairCount = new HashMap<UnorderedTermPair, Integer>();
    resetImplScores();
  }


  /* BEHOLD THE GUBBINS to distinguish the various (potential) types of Pairbanks*/

  protected abstract void addData(Document document, int index);
  
  protected abstract void calculateScores(); 
  
  protected abstract void resetImplScores();

  
  
  public Double getScore(UnorderedTermPair pair) {
    if (scores.containsKey(pair)) {
      return scores.get(pair);
    }
    
    // error code
    return null;
  }
  
  
  
  
  /* Methods for saving as CSV */
  
  public void saveAsCsv(Number threshold, File outputFile) throws GateException {
    PairCsvGenerator generator = new PairCsvGenerator();
    generator.generateAndSaveCsv(this, threshold, outputFile);
  }

  /**
   * Convenience method to save everything in the termbank.
   * @param outputFile
   * @throws GateException
   */
  public void saveAsCsv(File outputFile) throws GateException {
    PairCsvGenerator generator = new PairCsvGenerator();
    generator.generateAndSaveCsv(this, -100.0F, outputFile);
  }
  
  
  
  @Override
  public List<Action> getActions() {
    // lazy instantiation because it's transient
    if (this.actionsList == null) {
      createActions();
    }
    
    return this.actionsList;
  }

  
  /***** CREOLE PARAMETERS *****/

  

}