Log in Help
Print
Homereleasesgate-8.4-build5748-ALLpluginsTermRaidersrcgatetermraidermodes 〉 Normalization.java
 
/*
 *  Copyright (c) 2013--2014, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: Normalization.java 18978 2015-10-29 15:55:39Z adamfunk $
 */
package gate.termraider.modes;

public enum Normalization {
  None,
  Hundred,
  Sigmoid,
  SigmoidOld;
  
  
  // Old scale was mushing too many terms together where rounding made them
  // all appear tied for 100.0.
  // The old scale is available as a non-default option for backward compatibility
  private static double xScaleDefault = 80;
  private static double xScaleOld = 4.8;

  
  
  public static double calculate(Normalization mode, Number raw) {
    if (mode == None) {
      return raw.doubleValue();
    }
    
    if (mode == Hundred) {
      return 100.0 * raw.doubleValue();
    }
    
    if (mode == SigmoidOld) {
      return normalizeScore(raw.doubleValue(), xScaleOld);
    }
    
    
    // must be sigmoid
    return normalizeScore(raw.doubleValue(), xScaleDefault);
  }
  
  
  /**
   * The following produces the right half of a sigmoid 
   * curve adjusted so that
   * f(0) = 0; f(inf) = 100; f(x>0) > 0
   * @param score from 0 to inf 
   * @return score from 0 to 100
   */
  private static double normalizeScore(double score, double xScale) {
    double norm = 2.0 / (1.0 + Math.exp(-score / xScale)) - 1.0;
    return (double) (100.0F * norm);
  }
  
  /* Note: Normalization mode does not apply to the AnnotationTermbank, since it
   * is derived from (presumably) already normalized tf.idf values.
   */

  
}