Log in Help
Print
HomegatepluginsParser_SUPPLEsrcshefnlpsupplecategory 〉 Category.java
 
package shef.nlp.supple.category;

import gate.FeatureMap;
import gate.creole.ExecutionException;
import gate.util.Err;
import gate.util.SimpleFeatureMapImpl;

import java.util.Hashtable;
import java.util.Iterator;
import java.util.TreeSet;

import shef.nlp.supple.utils.EmptyFeatureMap;
import shef.nlp.supple.utils.IllegalCategoryName;

public class Category
{
	static TreeSet ALL_CAT = new TreeSet();
	static TreeSet NE_CAT = new TreeSet();
	static TreeSet POS_CAT = new TreeSet();
	static Hashtable DEF_CAT = new Hashtable();

	/* DEFAULT FEATURE MAPS FOR POS AND SYNTACTIC CATEGORIES */
	static FeatureMap DEF_POS = new SimpleFeatureMapImpl();
	static FeatureMap N_POS = new SimpleFeatureMapImpl();
	static FeatureMap A_POS = new SimpleFeatureMapImpl();
	static FeatureMap V_POS = new SimpleFeatureMapImpl();
	static FeatureMap TOP_POS = new SimpleFeatureMapImpl();
	static FeatureMap BOTTOM_POS = new SimpleFeatureMapImpl();

	/* CONSTANT FEATURE NAMES VALID FOR BUCHART PARSING */
	public static String S_FORM = "s_form";
	public static String M_ROOT = "m_root";
	public static String M_AFFIX = "m_affix";
	public static String TEXT = "text";
	public static String EDGE = "edge";
	public static String SEM = "sem";
	public static String HEAD = "head";
	public static String SOURCE = "source";
	public static String PERSON = "person";
	public static String NUMBER = "number";
	public static String GENDER = "gender";
	public static String TENSE = "tense";
	public static String ASPECT = "aspect";
	public static String VOICE = "voice";
	public static String VFORM = "vform";
	public static String DEGREE = "degree";
	public static String NE_TAG = "ne_tag";
	public static String NE_TYPE = "ne_type";

	/* ANY VALUE */
	static String ANY = "_";
	static String EMPTY = "";
	static String BODY = "body";

	String name;
	public String getCategory() { return name; }
	public void  setCategory(String Name) { name = Name; }

	FeatureMap features = new SimpleFeatureMapImpl();
	public FeatureMap getFeatures() { return features; }
	public void setFeatures(FeatureMap Features) { features = Features; }

	static boolean is_ne(String type) { return NE_CAT.contains(type); }

	public boolean equals(Object o)
	{
		if (!(o instanceof Category)) return false;

		Category c = (Category)o;

		return name.equals(c.name) && features.equals(c.features);
	}

	static
	{
		/* FEATURES FOR DEFAULT POS CATEGORIES */
		DEF_POS.put(S_FORM,ANY);
		DEF_POS.put(M_ROOT,EMPTY);
		DEF_POS.put(M_AFFIX,EMPTY);
		DEF_POS.put(TEXT,"body");
		/* FEATURES FOR NOUN POS CATEGORIES */

		N_POS.put(S_FORM,ANY);
		N_POS.put(M_ROOT,EMPTY);
		N_POS.put(M_AFFIX,EMPTY);
		N_POS.put(TEXT,"body");
		N_POS.put(PERSON,ANY);
		N_POS.put(NUMBER,ANY);

		/* FEATURES FOR ADJ AND ADV CATEGORIES */
		A_POS.put(S_FORM,ANY);
		A_POS.put(M_ROOT,EMPTY);
		A_POS.put(M_AFFIX,EMPTY);
		A_POS.put(TEXT,"body");
		A_POS.put(DEGREE,ANY);

		/* FEATURES FOR VERB CATEGORIES */
		V_POS.put(S_FORM,ANY);
		V_POS.put(M_ROOT,EMPTY);
		V_POS.put(M_AFFIX,EMPTY);
		V_POS.put(TEXT,"body");
		V_POS.put(PERSON,ANY);
		V_POS.put(NUMBER,ANY);
		V_POS.put(TENSE,ANY);
		V_POS.put(VFORM,ANY);

		/* TOP */
		TOP_POS.put(S_FORM,"top");
		TOP_POS.put(M_ROOT,"top");
		TOP_POS.put(M_AFFIX,EMPTY);
		TOP_POS.put(TEXT,"body");

		/* BOTTOM */
		BOTTOM_POS.put(S_FORM,"bottom");
		BOTTOM_POS.put(M_ROOT,"bottom");
		BOTTOM_POS.put(M_AFFIX,EMPTY);
		BOTTOM_POS.put(TEXT,"body");

		/* SET OF POS CATS FOR BUCHART */
		POS_CAT.add("n");
		POS_CAT.add("pn");
		POS_CAT.add("v");
		POS_CAT.add("jj");
		POS_CAT.add("rb");
		POS_CAT.add("fw");
		POS_CAT.add("cd");
		POS_CAT.add("cc");
		POS_CAT.add("dt");
		POS_CAT.add("ex");
		POS_CAT.add("in");
		POS_CAT.add("ls");
		POS_CAT.add("md");
		POS_CAT.add("pdt");
		POS_CAT.add("pos");
		POS_CAT.add("pp");
		POS_CAT.add("pps");
		POS_CAT.add("rp");
		POS_CAT.add("to");
		POS_CAT.add("uh");
		POS_CAT.add("wdt");
		POS_CAT.add("wp");
		POS_CAT.add("wrb");
		POS_CAT.add("sym");
		POS_CAT.add("period");
		POS_CAT.add("comma");
		POS_CAT.add("top");
		POS_CAT.add("bottom");

		// NON POS CATS
		NE_CAT.add("list_np");

		/* ALL VALID CATEGORIES */
		ALL_CAT.addAll(NE_CAT);
		ALL_CAT.addAll(POS_CAT);

		Iterator ite_pos = POS_CAT.iterator();
		while(ite_pos.hasNext())
		{
			DEF_CAT.put(ite_pos.next(),DEF_POS);
		}

		/* DEFAULT SET OF CATEGORIES AND THEIR FEATURES */
		DEF_CAT.put("n",N_POS);
		DEF_CAT.put("v",V_POS);
		DEF_CAT.put("jj",A_POS);
		DEF_CAT.put("pn",N_POS);
		DEF_CAT.put("rb",A_POS);
		DEF_CAT.put("top",TOP_POS);
		DEF_CAT.put("bottom",BOTTOM_POS);
		DEF_CAT.put("list_np",DEF_POS);
	}

	public Category() {}

	public Category(String Name, FeatureMap Features)
	{
		name = Name;
		features = new SimpleFeatureMapImpl();
		features.putAll(Features);
	}

	public Category(String Name) throws IllegalCategoryName
	{
		if(!ALL_CAT.contains(Name))
		{
			throw new IllegalCategoryName(Name+" is invalid");
		}
		name = Name;
	}

	public static Category getDefaultCategory(String Name)
	{
		return new Category(Name,(FeatureMap) DEF_CAT.get(Name));
	}

	public static String quoteValue(String value)
	{
		String output = "";
		output = "\'";
		int len = value.length();
		for(int i=0; i<len; i++)
		{
			if(value.charAt(i)=='\'')
			{
				output += "''";
			}
			else if(value.charAt(i)=='\n')
			{
				output += " ";
			}
			else
				output +=  value.charAt(i);
		}

		output += '\'';
		return output;
	}

	public String toSUPPLEFormat() throws EmptyFeatureMap, ExecutionException
	{
		String cat;
		FeatureMap features;
		String output= "";

		cat = this.getCategory();
		features = this.getFeatures();

		/* FOR POS CATEGORIES */

		if (POS_CAT.contains(cat) )
		{
			output += cat;
			output += "(";

			output += S_FORM+":"+quoteValue((String) features.get(S_FORM))+",";
			output += M_ROOT+":"+quoteValue((String) features.get(M_ROOT))+",";
			output += M_AFFIX+":"+quoteValue((String) features.get(M_AFFIX))+",";
			output += TEXT+":"+quoteValue((String) features.get(TEXT));

			if (cat == "n" || cat == "pn")
			{
				output += ",";
				output += PERSON+":"+features.get(PERSON)+",";
				output += NUMBER+":"+features.get(NUMBER);
			}
			else if (cat == "v")
			{
				output += ",";
				output += PERSON+":"+features.get(PERSON)+",";
				output += NUMBER+":"+features.get(NUMBER);
				output += ",";
				output += TENSE+":"+features.get(TENSE)+",";
				output += VFORM+":"+features.get(VFORM);
			}
			else if (cat == "jj" | cat == "rb" )
			{
				output += ",";
				output += DEGREE+":"+features.get(DEGREE);
			}
			else
			{
				/* nothing else */
			}
		}
		else if (NE_CAT.contains(cat))
		{
			if (cat=="list_np")
			{
				output += cat;
				output += "(";

				output += S_FORM+":"+quoteValue((String) features.get(S_FORM))+",";
				output += M_ROOT+":"+quoteValue((String) features.get(M_ROOT))+",";
				output += M_AFFIX+":"+quoteValue((String) features.get(M_AFFIX))+",";
				output += TEXT+":"+quoteValue((String) features.get(TEXT))+",";
				output += NE_TAG+":"+quoteValue((String) features.get("ne_tag"))+",";

				if (features.get("ne_type") != null)
				{
					output += NE_TYPE+":"+quoteValue((String) features.get("ne_type"))+",";
				}

				output += GENDER+":"+quoteValue((String) features.get("gender"));
			}
			else
			{
				Err.println("There is no category "+cat);
			}
		}
		output += ")";

		return output;
	}

	public static FeatureMap getDefaultFeatureMap(String Name)
	{
		return (FeatureMap) DEF_CAT.get(Name);
	}

	public void emptyCategory(String Name) throws IllegalCategoryName
	{
		if (Name=="n")
		{
			this.setCategory("n");
			this.setFeatures(N_POS);
			return;
		}
		if (Name=="v")
		{
			this.setCategory("v");
			this.setFeatures(V_POS);
			return;
		}

		throw new IllegalCategoryName(Name+" is invalid");
	}

	public void setFeatureValue(FeatureValue fv)
	{
		this.features.put(fv.feature,fv.value);
	}

	public void show()
	{
		System.out.println("CAT: "+name);
		System.out.println("FEATURES: "+features);
	}

	public void showFeatureMap()
	{
		System.out.println("Features "+features);
	}
}