Log in Help
Print
Homereleasesgate-7.1-build4485-ALLpluginsTermRaidersrcgatetermraiderutil 〉 Utilities.java
 
/*
 *  Copyright (c) 2008--2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: Utilities.java 16320 2012-11-23 13:56:00Z adamfunk $
 */
package gate.termraider.util;

import gate.*;
import gate.creole.ANNIEConstants;
import java.io.*;
import java.net.*;
import java.util.*;
import gate.termraider.bank.*;


public class Utilities implements ANNIEConstants {

  public static final String EXTENSION_CSV = "csv";
  public static final String EXTENSION_RDF = "rdf";

  private static double log10of2;
  
  static {
    log10of2 = Math.log10(2.0);
  }


  public static double meanDoubleList(List<Double> list) {
    if (list.isEmpty()) {
      return 0.0;
    }
    // implied else
    double total = 0.0;
    for (Double item : list) {
      total += item;
    }
    return total / ((double) list.size());
  }
  
  
  public static double normalizeScore(double score) {
    double norm = 1.0 - 1.0 / (1.0 + Math.log10(1.0 + score));
    return (double) (100.0F * norm);
  }

  

  public static Double convertToDouble(Object x) {
    if (x instanceof Number) {
      return ((Number) x).doubleValue();
    }
    
    return Double.parseDouble(x.toString()) ;
  }

  
  /**
   * Suitable for embedding in URIs.
   */
  public static String veryCleanString(String input) {
    String clean = input.trim();
    return clean.replaceAll("[^\\p{Alnum}\\p{Lu}\\p{Ll}]+", "_");
  }

  
  
  public static String generateID(String prefix, String suffix) {
    return prefix + java.util.UUID.randomUUID().toString() + suffix;
  }

  
  public static URL getUrlInJar(AbstractTermbank termbank, String filename) {
    ClassLoader cl = termbank.getClass().getClassLoader();
    return cl.getResource(filename);
  }
  
  public static List<String> keysAsStrings(FeatureMap fm) {
    List<String> result = new ArrayList<String>();
    if (fm != null) {
      Set<?> keys = fm.keySet();
      for (Object key : keys) {
        result.add(key.toString());
      }
    }
    return result;
  }


  public static List<String> valuesAsStrings(FeatureMap fm) {
    List<String> result = new ArrayList<String>();
    if (fm != null) {
      for (Object key : fm.keySet()) {
        result.add(fm.get(key).toString());
      }
    }
    return result;
  }
  
  
  public static void setCanonicalFromLemma(Annotation token, Document doc, String lemmaFeatureName) {
    String canonical = getCanonicalFromLemma(token, doc, lemmaFeatureName);
    token.getFeatures().put("canonical", canonical);
  }

  
  public static String getCanonicalFromLemma(Annotation token, Document doc, String lemmaFeatureName) {
    FeatureMap fm = token.getFeatures();
    String canonical = "";
    if (fm.containsKey(lemmaFeatureName)) {
      canonical = fm.get(lemmaFeatureName).toString().toLowerCase();
    }

    if (canonical.equals("") || canonical.equals("<unknown>")) {
      if (fm.containsKey(TOKEN_STRING_FEATURE_NAME)) {
        canonical = fm.get(TOKEN_STRING_FEATURE_NAME).toString().toLowerCase();
      }
      else {
        canonical = gate.Utils.stringFor(doc, token).toLowerCase();
      }
    }
    
    return canonical;
  }


  public static void setCanonicalFromString(Annotation token, Document doc) {
    String canonical = getCanonicalFromString(token, doc);
    token.getFeatures().put("canonical", canonical);
  }

  
  public static String getCanonicalFromString(Annotation token, Document doc) {
    FeatureMap fm = token.getFeatures();
    String canonical = "";
    if (fm.containsKey(TOKEN_STRING_FEATURE_NAME)) {
      canonical = fm.get(TOKEN_STRING_FEATURE_NAME).toString().toLowerCase();
    }
    else {
      canonical = gate.Utils.stringFor(doc, token).toLowerCase();
    }
    
    return canonical;
  }

  
  public static String sourceOrName(Document document) {
    URL url = document.getSourceUrl();
    if (url == null) {
      return document.getName();
    }
    
    //implied else
    return url.toString();
  }
  
  
  
  
  
  public static File addExtensionIfNotExtended(File file, String extension) {
    String name = file.getName();
    if (name.contains(".")) {
      return file;
    }

    // implied else: add extension
    File parentDir = file.getParentFile();
    if (extension.startsWith(".")) {
      name = name + extension;
    }
    else {
      name = name + "." + extension;
    }

    return new File(parentDir, name);
  }

  
  public static String integerToString(Integer i) {
    if (i == null) {
      return "<null>";
    }
    // implied else
    return Integer.toString(i);
  }
  
  
  public static double log2(double input) {
    /*  log_a x = log_b x * log_a b
     * 
     *  log_b x = log_a x / log_a b
     */
    return Math.log10(input) / log10of2;
  }


}