Log in Help
Print
HomegatepluginsLang_Welshsrcwnltmorph 〉 Interpret.java
 
/*
 *  Interpret.java
 *  This file is part of Welsh Natural Language Toolkit (WNLT)
 *  (see http://gate.ac.uk/), and is free software, licenced under 
 *  the GNU Library General Public License, Version 2, June 1991
 *  
 */
package wnlt.morph;

import gate.creole.ResourceInstantiationException;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Pattern;

import wnlt.LexiconCY;

/**
 * <p>
 * Title: Interpret.java
 * </p>
 * <p>
 * Description: This is the main class which which should be invoked to load the
 * rule file in the system and then to execute the program to find the root word
 * and the affix to it. The class has been modified for the purposes of the 
 * Welsh Natural Language ToolkitS
 * </p>
 */
public class Interpret {

	/**
	 * instance of the ReadFile class which reads the file and stores each line
	 * of the given program in the arraylist which can be read using different
	 * methods of the ReadFile class
	 */
	private ReadFile file;

	/** Boolean variables to keep track on which section is being read */
	private boolean isDefineVarSession, isDefineRulesSession;

	/** Instance of Storage class, which is used store all the variables details */
	private Storage variables;

	/** This variables keeps the record of available methods for the morphing */
	private Method[] methods;

	/** This variables holds the affix */
	private String affix;

	private Pattern vPat = Pattern.compile("((VB)[DGNPZ]?)|(MD)");

	private Pattern nPat = Pattern.compile("(NN)[PMFS]?");

	MorphFunctions morphInst;

	List<Pattern> patterns = new ArrayList<Pattern>();
	List<List<CharClass>> fsms = new ArrayList<List<CharClass>>();
	
	/**
	 * The initial state of the FSM that backs this morpher
	 */
	protected FSMState initialState;

	//protected Set lastStates;
	
	/** Lexicon of lemma input read from an external file */ 
	LexiconCY lexicon;
	

	/**
	 * It starts the actual program
	 */
	public void init(URL ruleFileURL, URL lexiconURL, String encoding) throws ResourceInstantiationException {
		variables = new Storage();
		prepareListOfMorphMethods();
		file = new ReadFile(ruleFileURL);
		affix = null;
		isDefineRulesSession = false;
		isDefineVarSession = false;
		morphInst = new MorphFunctions();

		readProgram();
		initialState = new FSMState(-1);
		
		//lastStates = new HashSet();
		interpretProgram();

		variables = null;
		file = null;
		//lastStates = null;
		
		//Instantiate lexicon
		 if(lexiconURL == null){
		      throw new ResourceInstantiationException(
		        "NoURL provided for the lexicon!");
		    }
		 try{
			 this.lexicon = new LexiconCY(lexiconURL, encoding);
		    }catch(Exception e){
		      throw new ResourceInstantiationException(e);
		    }
	}
	
	/**
	 * Initialize this Interpret by copying pointers to the sharable state
	 * of an existing Interpret instance.
	 */
	public void init(Interpret existingInterpret) {
    affix = null;
    isDefineRulesSession = false;
    isDefineVarSession = false;
    morphInst = new MorphFunctions();
    
    // copy shared state
    fsms = existingInterpret.fsms;
    patterns = existingInterpret.patterns;
    initialState = existingInterpret.initialState;
    lexicon = existingInterpret.lexicon;
	}

	class CharClass {
		char ch;
		FSMState st;
	}
	
	public void addState(char ch, FSMState fsm, int index) {
		if(index == fsms.size()) {
			fsms.add(new ArrayList<CharClass>());
		}
		
		List<CharClass> fs = fsms.get(index);
		for(int i=0;i<fs.size();i++) {
			CharClass cc = fs.get(i);
			if(cc.ch == ch)
				return;
		}
		
		CharClass cc = new CharClass();
		cc.ch = ch;
		cc.st = fsm;
		fs.add(cc);
	}

	
	public FSMState getState(char ch, int index) {
		if(index >= fsms.size()) return null;
		List<CharClass> fs = fsms.get(index);
		for(int i=0;i<fs.size();i++) {
			CharClass cc = fs.get(i);
			if(cc.ch == ch)
				return cc.st;
		}
		return null;
	}
	
	private Set<FSMState> getStates(char ch, Set<FSMState> states) {
		Set<FSMState> newStates = new HashSet<FSMState>();
		Iterator<FSMState> iter = states.iterator();
		while (iter.hasNext()) {
			FSMState st = iter.next();
			FSMState chState = st.next(ch, FSMState.CHILD_STATE);
			if (chState != null) {
				newStates.add(chState);
			}

			FSMState adState = st.next(ch, FSMState.ADJ_STATE);
			if (adState != null) {
				newStates.add(adState);
			}
		}
		return newStates;
	}

	private boolean validCategory(String category) {
		if (category.equals("*")) {
			return true;
		} else if (vPat.matcher(category).matches()) {
			return true;
		} else if (nPat.matcher(category).matches()) {
			return true;
		}
		return false;
	}
	
	/**
	 * @return set of the Lookups associated with the parameter
	 */
	public String runMorpher(String word, String category) {
		
		   String[] result = null;
		   String lexiconResult ="";
			List<String> lemmas = lexicon.get(word.toLowerCase());
		    if(lemmas != null){
		      result = new String[lemmas.size()];
		      for(int i = 0; i < result.length; i++){
		        result[i] = lemmas.get(i);
		        lexiconResult += lemmas.get(i);
		      }
		    }
	// Condition - if there is no match in lexicon then check the rules file. 
	if (lexiconResult.isEmpty()) {
		affix = null;
		if(!validCategory(category)) {
			return word;
		}
		
		foundRule = false;
		Set<FSMState> states = new HashSet<FSMState>();
		states.add(initialState);
		for (int i = 0; i < word.length(); i++) {
			char ch = word.charAt(i);
			states = getStates(ch, states);
			if (states.isEmpty()) {
				return word;
			}

		}

		// we have all states here
		// we obtain all RHSes
		SortedSet<RHS> rhses = new TreeSet<RHS>(new Comparator<RHS>() {
      @Override
      public int compare(RHS r1, RHS r2) {
        return r1.getPatternIndex() - r2.getPatternIndex();
      }
    });
    
		Iterator<FSMState> iter = states.iterator();
		while (iter.hasNext()) {
			FSMState st = iter.next();
			rhses.addAll(st.getRHSes());
		}

		if (rhses.isEmpty()) {
			return word;
		}

		return executeRHSes(rhses, word, category);
	}
		    
	else {
		    return lexiconResult;
		    }
	}
	protected int patternIndex = -1;
	public int getPatternIndex() {
	  return patternIndex;
	}
	
	protected String executeRHSes(SortedSet<RHS> rhses, String word, String category) {
    foundRule = false;
    // rhses are in sorted order
    // we need to check if the word is compatible with pattern
    Iterator<RHS> rhsiter = rhses.iterator();
    while (rhsiter.hasNext()){
      RHS r1 = rhsiter.next();
      String answer = executeRHS(word, category, r1);
      
      if (foundRule) {
        patternIndex = r1.getPatternIndex();
        return answer;
      }
    }
    return word;
	}
	
	protected boolean foundRule = false;

	protected String executeRHS(String word, String category, RHS rhs) {
		if (category.equals("*")) {
			return executeRule(word, rhs);
		} else if (rhs.isVerb() && vPat.matcher(category).matches()) {
			return executeRule(word, rhs);
		} else if (rhs.isNoun() && nPat.matcher(category).matches()) {
			return executeRule(word, rhs);
		}
		return word;
	}

	private String executeRule(String word, RHS rhs) {
		Pattern p = patterns.get(rhs.getPatternIndex());

		short methodIndex = rhs.getMethodIndex();
		if (!p.matcher(word).matches()) {
			foundRule = false;
			return word;
		}

		// call the appropriate function
		String[] parameters = rhs.getParameters();

		// set the given word in that morph program
		morphInst.setInput(word);
		String answer = null;
		switch (methodIndex) {
		case ParsingFunctions.IRREG_STEM:
			answer = morphInst.irreg_stem(parameters[0], parameters[1]);
			break;
		case ParsingFunctions.NULL_STEM:
			answer = morphInst.null_stem();
			break;
		case ParsingFunctions.SEMIREG_STEM:
			answer = morphInst.semi_reg_stem(Integer.parseInt(parameters[0]),
					parameters[1]);
			break;
		case ParsingFunctions.STEM:
			answer = morphInst.stem(Integer.parseInt(parameters[0]),
					parameters[1], parameters[2]);
			break;
		default:
			answer = null;
			break;
		}
		
		if(answer != null) {
			this.affix = morphInst.getAffix();
			foundRule = true;
			return answer;
		} else {
			foundRule = false;
			return word;
		}
	}

	/**
	 * This method prepares the list of available methods in the MorphFunctions
	 * class
	 */
	private void prepareListOfMorphMethods()
			throws ResourceInstantiationException {
		methods = MorphFunctions.class.getDeclaredMethods();
	}

	/**
	 * read the program file
	 */
	private void readProgram() throws ResourceInstantiationException {
		// read the program file
		boolean readStatus = file.read();

		// check if read was success
		if (!readStatus) {
			// not it wasn't so simply display the message and ask user to check
			// it
			generateError("Some errors reading program file.. please check the"
					+ "program and try again");
		}
	}

	/**
	 * This method reads each line of the program and interpret them
	 */
	private void interpretProgram() throws ResourceInstantiationException {
		// read each line and parse it
		while (file.hasNext()) {
			String currentLine = file.getNext();

			if (currentLine == null || currentLine.trim().length() == 0) {
				continue;
			}

			// remove all the leading spaces
			currentLine = currentLine.trim();

			/*
			 * if commandType is 0 ==> defineVars command if commandType is 1
			 * ==> defineRules command if commandType is 2 ==> variable
			 * declaration if commandType is 3 ==> rule declaration otherwise //
			 * unknown generate error
			 */
			int commandType = findCommandType(currentLine);
			switch (commandType) {
			case -1:
				// comment command
				continue;
			case 0:
				// defineVars command
				defineVarsCommand();
				break;
			case 1:
				// defineRules command
				defineRulesCommand();
				break;
			case 2:
				// variable declaration
				variableDeclarationCommand(currentLine);
				break;
			case 3:
				// rule declaration
				ruleDeclarationCommand(currentLine);
				break;
			default:
				generateError("Syntax Error at line " + file.getPointer()
						+ " : " + currentLine);
				break;
			}
		} // end while
	}

	/**
	 * This method interprets the line and finds out the type of command and
	 * returns the integer indicating the type of the command
	 * 
	 * @param line
	 *            The program command to be interpreted
	 * @return and <tt>int</tt> value
	 */
	private int findCommandType(String line) {

		// check for the comment command
		if (line.substring(0, 2).equals("//") || line.charAt(0) == '#') {
			return -1;
		} else if (line.equals("defineVars")) {
			return 0;
		} else if (line.equals("defineRules")) {
			return 1;
		} else if (isDefineVarSession && line.split("==>").length == 2) {
			return 2;
		} else if (isDefineRulesSession &&
		/*
		 * (line.charAt(0) == '{' || line.charAt(0) == '[' || line.charAt(0) ==
		 * '(' || line.charAt(0) == '\"')
		 */(line.charAt(0) == '<') && line.split("==>").length == 2) {
			return 3;
		} else {
			return Codes.ERROR_CODE;
		}
	}

	/**
	 * This method processes the command to define the variable section
	 */
	private void defineVarsCommand() throws ResourceInstantiationException {

		// variable section can only be defined once
		if (isDefineVarSession) {
			generateError("Variable Section already defined - " + "see line "
					+ file.getPointer());
		} else if (isDefineRulesSession) {
			generateError("Variable section must be declared before the Rule "
					+ "Section - see line " + file.getPointer());
		} else {
			isDefineVarSession = true;
		}
	}

	/**
	 * This method processes the command to define the rule section
	 */
	private void defineRulesCommand() throws ResourceInstantiationException {
		if (isDefineRulesSession) {
			generateError("Rule Section already defined - see " + "line "
					+ file.getPointer());
		} else {
			isDefineVarSession = false;
			isDefineRulesSession = true;
		}
	}

	/**
	 * This method processes the command to declare the variable
	 * 
	 * @param line
	 */
	private void variableDeclarationCommand(String line)
			throws ResourceInstantiationException {
		// ok so first find the variable name and the value for it
		String varName = (line.split("==>"))[0].trim();
		String varValue = (line.split("==>"))[1].trim();

		// find the type of variable it is
		int valueType = ParsingFunctions.findVariableType(varValue
				.trim());
		if (valueType == Codes.ERROR_CODE) {
			generateError(varName + " - Variable Syntax Error - see " + "line"
					+ file.getPointer() + " : " + line);
		}

		// based on the variable type create the instance
		Variable varInst = null;
		switch (valueType) {
		case Codes.CHARACTER_RANGE_CODE:
			varInst = new CharacterRange();
			break;
		case Codes.CHARACTER_SET_CODE:
			varInst = new CharacterSet();
			break;
		case Codes.STRING_SET_CODE:
			varInst = new StringSet();
			break;
		}

		// set the values in the variable
		if (!varInst.set(varName, varValue)) {
			generateError(varName
					+ " - Syntax Error while assigning value to the "
					+ "variable - see line" + file.getPointer() + " : " + line);
		}

		// and finally add the variable in
		if (!variables.add(varName, varInst.getPattern())) {
			generateError(varName.trim() + " - Variable already defined - see "
					+ "line " + file.getPointer() + " : " + line);
		}

		varInst.resetPointer();
	}

	/**
	 * This method processes the command to declare the rule
	 * 
	 * @param line
	 */
	private void ruleDeclarationCommand(String line)
			throws ResourceInstantiationException {
		// lets divide the rule into two parts
		// LHS and RHS.
		// LHS is a part which requires to be parsed and
		// RHS should be checked for the legal function name and valid arguments
		// we process RHS first and then the LHS
		String[] ruleParts = line.split("==>");
		if (ruleParts.length != 2) {
			generateError("Error in declaring rule at line : "
					+ file.getPointer() + " : " + line);
		}

		// now check if the method which has been called in this rule actually
		// available in the MorphFunction Class
		String methodCalled = ruleParts[1].trim();
		if (!isMethodAvailable(methodCalled)) {

			// no method is not available so print the syntax error
			generateError("Syntax error - method does not exists - see "
					+ "line " + file.getPointer() + " : " + line);
		}

		// so RHS part is Ok
		// now we need to check if LHS is written properly
		// and convert it to the pattern that is recognized by the java
		String category = "";
		// we need to find out the category
		int i = 1;
		for (; i < ruleParts[0].length(); i++) {
			if (ruleParts[0].charAt(i) == '>')
				break;
			category = category + ruleParts[0].charAt(i);
		}

		if (i >= ruleParts[0].length()) {
			generateError("Syntax error - pattern not written properly - see "
					+ "line " + file.getPointer() + " : " + line);
		}

		RHS rhs = new RHS(ruleParts[1], category, (short)patterns.size());
		ruleParts[0] = ruleParts[0].substring(i + 1, ruleParts[0].length())
				.trim();
		String regExp = ParsingFunctions.convertToRegExp(
				ruleParts[0], variables);
		patterns.add(Pattern.compile(regExp));
		String[] rules = ParsingFunctions.normlizePattern(regExp);
		for (int m = 0; m < rules.length; m++) {
			Set<Set<FSMState>> lss = new HashSet<Set<FSMState>>();
			lss.clear();
			Set<FSMState> newSet = new HashSet<FSMState>();
			newSet.add(initialState);
			lss.add(newSet);
			PatternPart parts[] = ParsingFunctions
					.getPatternParts(rules[m].trim());
			for (int j = 0; j < parts.length; j++) {
				lss = ParsingFunctions.createFSMs(parts[j].getPartString(), parts[j].getType(), lss, this);
			}
			Iterator<Set<FSMState>> iter = lss.iterator();
			while (iter.hasNext()) {
				Set<FSMState> set = iter.next();
				Iterator<FSMState> subIter = set.iterator();
				while (subIter.hasNext()) {
					FSMState st = subIter.next();
					st.addRHS(rhs);
				}
			}
		}
		//drawFSM();
	}

	@SuppressWarnings("unused")
  private Set<FSMState> intersect(Set<FSMState> a, Set<FSMState> b) {
		Set<FSMState> result = new HashSet<FSMState>();
		Iterator<FSMState> iter = a.iterator();
		while (iter.hasNext()) {
			FSMState st = iter.next();
			if (b.contains(st)) {
				result.add(st);
			}
		}
		return result;
	}

	@SuppressWarnings("unused")
  private void drawFSM() {
		// we start with initialState
		System.out.println("Initial:");
		String space = "";
		drawFSM(initialState, space);
	}

	private void drawFSM(FSMState st, String space) {
		CharMap map = st.getTransitionFunction();
		char[] keys = map.getItemsKeys();
		if (keys != null) {
			System.out.println(space + "Child:");
			for (int i = 0; i < keys.length; i++) {
				System.out.println(space + "'" + keys[i] + "':");
				drawFSM(map.get(keys[i], FSMState.CHILD_STATE), space + "  ");
			}
		}
		keys = map.getAdjitemsKeys();
		if (keys != null) {
			System.out.println("ADJ:");
			for (int i = 0; i < keys.length; i++) {
				System.out.println(space + "'" + keys[i] + "' :");
				// drawFSM(map.get(keys[i], FSMState.ADJ_STATE), space+" ");
			}
		}
	}

	/**
	 * This method takes a method signature and searches if the method
	 * 
	 * @param method
	 * @return a <tt>boolean</tt> value.
	 */
	private boolean isMethodAvailable(String method) {
		// now first find the name of the method
		// their parameters and their types
		int index = method.indexOf("(");
		if (index == -1 || index == 0
				|| method.charAt(method.length() - 1) != ')') {
			return false;
		}

		String methodName = method.substring(0, index);
		// now get the parameters

		String[] parameters;
		int[] userMethodParams;

		String arguments = method.substring(index + 1, method.length() - 1);
		if (arguments == null || arguments.trim().length() == 0) {
			parameters = null;
			userMethodParams = null;
		} else {
			parameters = method.substring(index + 1, method.length() - 1)
					.split(",");
			userMethodParams = new int[parameters.length];
		}

		// find the parameter types
		// here we define only three types of arguments
		// String, boolean and int
		if (parameters != null) {
			for (int i = 0; i < parameters.length; i++) {
				if (parameters[i].startsWith("\"")
						&& parameters[i].endsWith("\"")) {
					userMethodParams[i] = 7;
					parameters[i] = "java.lang.String";
					continue;
				} else if (ParsingFunctions.isBoolean(parameters[i])) {
					userMethodParams[i] = 6;
					parameters[i] = "boolean";
				} else if (ParsingFunctions.isInteger(parameters[i])) {
					userMethodParams[i] = 2;
					parameters[i] = "int";
				} else {
					// type cannot be recognized so generate error
					return false;
				}
			}
		}

		// now parameters have been found, so check them with the available
		// methods
		// in the morph function
		for (int i = 0; i < methods.length; i++) {
			if (methods[i].getName().equals(methodName)) {
				// yes method has found now check for the parameters
				// compatibility
				Class<?>[] methodParams = methods[i].getParameterTypes();
				// first check for the number of parameters
				if (methods[i].getName().equals("null_stem")) {
					return true;
				}
				if (methodParams.length == parameters.length) {
					// yes arity has matched
					// now set the precedence
					int[] paramPrecedence = new int[methodParams.length];

					// assign precedence
					for (int j = 0; j < methodParams.length; j++) {
						if (methodParams[j].getName()
								.equals("java.lang.String"))
							paramPrecedence[j] = 7;
						else if (methodParams[j].getName().equals("boolean"))
							paramPrecedence[j] = 6;
						else if (methodParams[j].getName().equals("int"))
							paramPrecedence[j] = 2;
						else
							return false;
					}

					// if we are here that means all the type matched
					// so valid method declaration
					return true;
				}
			}
		}
		// if we are here that means method doesnot found
		return false;
	}

	/**
	 * Generates the error and stop the execution
	 * 
	 * @param mess -
	 *            message to be displayed as an error on the standard output
	 */
	private void generateError(String mess)
			throws ResourceInstantiationException {
		System.out.println("\n\n" + mess);
		System.out.println("Program terminated...");
		throw new ResourceInstantiationException("\n\n" + mess);
	}

	/**
	 * Main method
	 * 
	 * @param args
	 */
	public static void main(String[] args)
			throws ResourceInstantiationException {
		if (args == null || args.length < 3) {
			System.out
					.println("Usage : Interpret <Rules fileName> <word> <POS>");
			System.exit(-1);
		}
		Interpret interpret = new Interpret();
		try {
			interpret.init(new URL(args[0]), new URL(args[0]), new String(args[0]));
		} catch (MalformedURLException mue) {
			throw new RuntimeException(mue);
		}
		String rootWord = interpret.runMorpher(args[1], args[2]);
		String affix = interpret.getAffix();
		System.out.println("Root : " + rootWord);
		System.out.println("affix : " + affix);
	}

	/**
	 * This method tells what was the affix to the provided word
	 * 
	 * @return affix
	 */
	public String getAffix() {
		return this.affix;
	}

	public FSMState getInitialState() {
		return initialState;
	}
}