Log in Help
Print
Homereleasesgate-8.4-build5748-ALLpluginsTermRaidersrcgatetermraidermodes 〉 IdfCalculation.java
 
/*
 *  Copyright (c) 2012--2014, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: IdfCalculation.java 17538 2014-03-04 21:48:49Z adamfunk $
 */
package gate.termraider.modes;

import gate.termraider.util.Utilities;

public enum IdfCalculation {
  Logarithmic,
  LogarithmicScaled,
  Scaled,
  Natural;
  
  /* These calculations are partly based on Manning & Schütze, 
   * Foundations of Statistical NLP, section 15.2 (p.544).
   */
  
  public static double calculate(IdfCalculation mode, int rawDF, int corpusSize) {
    double df = (double) rawDF;
    double n = (double) corpusSize;
    
    if (mode == LogarithmicScaled) {
      return 1.0 + Utilities.log2(n / (df + 1.0));
    }

    if (mode == Logarithmic) {
      return 1.0 + Utilities.log2(1.0 / (df + 1.0));
    }

    if (mode == Scaled) {
      return (1.0 + n )/ (df + 1.0);
    }
    
    // must be Natural
    return 1.0 / (df + 1.0);
  }

}