Log in Help
Print
Homereleasesgate-5.1-beta2-build3402-ALLpluginsGazetteer_Ontology_Basedsrcgatecloneqlregex 〉 ExpressionFinder.java
 
/*
 *  ExpressionFinder.java
 *
 *  Copyright (c) 1998-2008, The University of Sheffield.
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 */
package gate.clone.ql.regex;

import java.util.logging.Logger;

import gate.clone.ql.CATConstants;

import org.apache.oro.text.awk.AwkCompiler;
import org.apache.oro.text.awk.AwkMatcher;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternCompiler;
import org.apache.oro.text.regex.PatternMatcher;
import org.apache.oro.text.regex.PatternMatcherInput;
import org.apache.oro.text.regex.StringSubstitution;
import org.apache.oro.text.regex.Substitution;
import org.apache.oro.text.regex.Util;
/**
 * 
 * @author Danica Damljanovic
 *
 */
public class ExpressionFinder {
  Logger logger = Logger.getLogger(ExpressionFinder.class.getName());

   /**
   * This method finds camelCase words inside the input string and substitutes
   * all occurencies with lowerCase substitute (usually one space) upperCase
   * Maching group is defined by regularExpression.
   * 
   * Example: inputString="detectCamelCaseWord" call method: String resultString =
   * findAndSeparateCamelCases(inputString, CloneQlConstants.REGEX_CAMEL_CASE, "
   * ");
   * 
   * Result: "detect Camel Case Word"
   * 
   * @param inputString
   * @param regularExpression
   * @return
   */
  public static String findAndSeparateCamelCases(String inputString,
    String regularExpression, String substituteString) {
    String afterSubstitution = null;
    int matches = 0;
    Pattern pattern = null;
    PatternMatcherInput input;
    PatternCompiler compiler;
    PatternMatcher matcher;
    MatchResult result = null;
    // Create AwkCompiler and AwkMatcher instances.
    compiler = new AwkCompiler();
    matcher = new AwkMatcher();
    // Attempt to compile the pattern. If the pattern is not valid,
    // report the error and exit.
    try {
      pattern = compiler.compile(regularExpression);
    }
    catch(MalformedPatternException e) {
      System.err.println("Bad pattern.");
      System.err.println(e.getMessage());
      System.exit(1);
    }
    input = new PatternMatcherInput(inputString);
    // System.out.println("\nPatternMatcherInput: " + input);
    // Loop until there are no more matches left.
    afterSubstitution = inputString;
    while(matcher.contains(input, pattern)) {
      // Since we're still in the loop, fetch match that was found.
      result = matcher.getMatch();
      ++matches;
      // System.out.println("Match " + matches + ": " + result);
      String foundMatch = result.toString();
      String[] chars = foundMatch.split("");

      String lowerCase = chars[1];
      String upperCase = chars[2];
      // adding space (substitute string) in between lower and upper case
      StringBuffer substitute =
        new StringBuffer(lowerCase).append(substituteString).append(upperCase);

      Substitution aSubstitution =
        new StringSubstitution(substitute.toString());
      // Perform substitution and print result.
      String newString =
        Util.substitute(matcher, pattern, aSubstitution, input.toString(), 1);
      afterSubstitution = newString;
      // System.out.println("newString: " + newString);
      input = new PatternMatcherInput(newString);
    }
    return afterSubstitution;
  }

  public static boolean isCamelCase(String inputString, String regularExpression) {
    boolean result = false;
    Pattern pattern = null;
    PatternMatcherInput input;
    PatternCompiler compiler;
    PatternMatcher matcher;
    // Create AwkCompiler and AwkMatcher instances.
    compiler = new AwkCompiler();
    matcher = new AwkMatcher();
    // Attempt to compile the pattern. If the pattern is not valid,
    // report the error and exit.
    try {
      pattern = compiler.compile(regularExpression);
    }
    catch(MalformedPatternException e) {
      System.err.println("Bad pattern.");
      System.err.println(e.getMessage());
      System.exit(1);
    }
    input = new PatternMatcherInput(inputString);
    // System.out.println("\nPatternMatcherInput: " + input);
    while(matcher.contains(input, pattern)) {
      result = true;
    }
    return result;
  }
/**
 * Main method for testing.
 * @param args
 */
  public static final void main(String args[]) {

    String inputString = "camelCaseDetection";
    String resultString =
      findAndSeparateCamelCases(inputString, CATConstants.REGEX_CAMEL_CASE,
        " ");
    System.out.println(resultString);
  }
}