// Decompiled by Jad v1.5.8e. Copyright 2001 Pavel Kouznetsov.
// Jad home page: http://www.geocities.com/kpdus/jad.html
// Decompiler options: packimports(3)
// Source File Name: POSMapper.java
package com.ontotext.russie.morph;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Resource;
import gate.annotation.AnnotationSetImpl;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.OffsetComparator;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;
import com.ontotext.russie.RussIEConstants;
@CreoleResource(name = "POS Mapper",
comment = "Map complex Russian morphology tags into simpler POS categories",
helpURL = "http://gate.ac.uk/userguide/sec:misc-creole:language-plugins:russian")
public class POSMapper extends AbstractLanguageAnalyser implements
RussIEConstants {
private static final long serialVersionUID = 748287388332117288L;
public Resource init() throws ResourceInstantiationException {
fireStatusChanged("Init POS Mapper structures ...");
initMap();
fireProcessFinished();
return this;
}
private void initMap() {
categoriesMap = new HashMap<String, String>();
String adjTypes =
"Au,Aupfpaa,Aupfpai,Aupfpd,Aupfpg,Aupfpi,Aupfpl,Aupfpn,Aupfsfa,Aupfsfd,Aupfsfg,Aupfsfi,Aupfsfl,Aupfsfn,Aupfsmaa,Aupfsmai,Aupfsmd,Aupfsmg,Aupfsmi,Aupfsml,Aupfsmn,Aupfsna,Aupfsnd,Aupfsng,Aupfsni,Aupfsnl,Aupfsnn,Aupsp,Aupssf,Aupssm,Aupssn";
String type;
for(StringTokenizer tok = new StringTokenizer(adjTypes, ","); tok
.hasMoreElements(); categoriesMap.put(type, "JJ"))
type = tok.nextToken();
fireStatusChanged("Categories in map count: " + categoriesMap.size());
categoriesMap.put("Auc", "JJR");
categoriesMap.put("C", "CC");
categoriesMap.put("D", "RB");
categoriesMap.put("T", "RP");
categoriesMap.put("P", "PP");
categoriesMap.put("R", "IN");
fireStatusChanged("Categories in map count: " + categoriesMap.size());
String nnTypes =
"Nfa,Nfi,Nma,Nmi,Nmi2g,Nmi2l,Nmi2lg,Nmi2lgs2g,Nmi2lgs2l,Nmi2ls2g,Nna,Nni,Nfasa,Nfasd,Nfasg,Nfasi,Nfasl,Nfasn,Nfisa,Nfisd,Nfisg,Nfisi,Nfisl,Nfisn,Nmasa,Nmasd,Nmasg,Nmasi,Nmasl,Nmasn,Nmi2gsa,Nmi2gsd,Nmi2gsg,Nmi2gsi,Nmi2gsl,Nmi2gsn,Nmi2lgsa,Nmi2lgsd,Nmi2lgsg,Nmi2lgsi,Nmi2lgsl,Nmi2lgsn,Nmi2lsa,Nmi2lsd,Nmi2lsg,Nmi2lsi,Nmi2lsl,Nmi2lsn,Nmisa,Nmisd,Nmisg,Nmisi,Nmisl,Nmisn,Nnasa,Nnasd,Nnasg,Nnasi,Nnasl,Nnasn,Nnisa,Nnisd,Nnisg,Nnisi,Nnisl,Nnisn";
for(StringTokenizer tok = new StringTokenizer(nnTypes, ","); tok
.hasMoreElements(); categoriesMap.put(type, "NN"))
type = tok.nextToken();
fireStatusChanged("Categories in map count: " + categoriesMap.size());
String nnsTypes =
"Nfapa,Nfapd,Nfapg,Nfapi,Nfapl,Nfapn,Nfipa,Nfipd,Nfipg,Nfipi,Nfipl,Nfipn,Nmapa,Nmapd,Nmapg,Nmapi,Nmapl,Nmapn,Nmi2gpa,Nmi2gpd,Nmi2gpg,Nmi2gpi,Nmi2gpl,Nmi2gpn,Nmi2lgpa,Nmi2lgpd,Nmi2lgpg,Nmi2lgpi,Nmi2lgpl,Nmi2lgpn,Nmi2lpa,Nmi2lpd,Nmi2lpg,Nmi2lpi,Nmi2lpl,Nmi2lpn,Nmipa,Nmipd,Nmipg,Nmipi,Nmipl,Nmipn,Nnapa,Nnapd,Nnapg,Nnapi,Nnapl,Nnapn,Nnipa,Nnipd,Nnipg,Nnipi,Nnipl,Nnipn,Np,Nppa,Nppd,Nppg,Nppi,Nppl,Nppn";
for(StringTokenizer tok = new StringTokenizer(nnsTypes, ","); tok
.hasMoreElements(); categoriesMap.put(type, "NNS"))
type = tok.nextToken();
String nnpTypes =
"Npfa,Npfi,Npma,Npmi,Npmi2g,Npmi2l,Npmi2lg,Npmi2lgs2g,Npmi2lgs2l,Npmi2ls2g,Npna,Npni,Npfasa,Npfasd,Npfasg,Npfasi,Npfasl,Npfasn,Npfisa,Npfisd,Npfisg,Npfisi,Npfisl,Npfisn,Npmasa,Npmasd,Npmasg,Npmasi,Npmasl,Npmasn,Npmi2gsa,Npmi2gsd,Npmi2gsg,Npmi2gsi,Npmi2gsl,Npmi2gsn,Npmi2lgsa,Npmi2lgsd,Npmi2lgsg,Npmi2lgsi,Npmi2lgsl,Npmi2lgsn,Npmi2lsa,Npmi2lsd,Npmi2lsg,Npmi2lsi,Npmi2lsl,Npmi2lsn,Npmisa,Npmisd,Npmisg,Npmisi,Npmisl,Npmisn,Npnasa,Npnasd,Npnasg,Npnasi,Npnasl,Npnasn,Npnisa,Npnisd,Npnisg,Npnisi,Npnisl,Npnisn";
for(StringTokenizer tok = new StringTokenizer(nnpTypes, ","); tok
.hasMoreElements(); categoriesMap.put(type, "NNP"))
type = tok.nextToken();
String nnpsTypes =
"Npfapa,Npfapd,Npfapg,Npfapi,Npfapl,Npfapn,Npfipa,Npfipd,Npfipg,Npfipi,Npfipl,Npfipn,Npmapa,Npmapd,Npmapg,Npmapi,Npmapl,Npmapn,Npmi2gpa,Npmi2gpd,Npmi2gpg,Npmi2gpi,Npmi2gpl,Npmi2gpn,Npmi2lgpa,Npmi2lgpd,Npmi2lgpg,Npmi2lgpi,Npmi2lgpl,Npmi2lgpn,Npmi2lpa,Npmi2lpd,Npmi2lpg,Npmi2lpi,Npmi2lpl,Npmi2lpn,Npmipa,Npmipd,Npmipg,Npmipi,Npmipl,Npmipn,Npnapa,Npnapd,Npnapg,Npnapi,Npnapl,Npnapn,Npnipa,Npnipd,Npnipg,Npnipi,Npnipl,Npnipn,Npp,Npppa,Npppd,Npppg,Npppi,Npppl,Npppn";
for(StringTokenizer tok = new StringTokenizer(nnpsTypes, ","); tok
.hasMoreElements(); categoriesMap.put(type, "NNPS"))
type = tok.nextToken();
fireStatusChanged("Categories in map count: " + categoriesMap.size());
showMessage("Categories in map count: " + categoriesMap.size());
}
public void execute() throws ExecutionException {
if(super.document == null)
throw new ExecutionException("No document to process!");
AnnotationSet annotationSet;
if(inputASName == null || inputASName.equals(""))
annotationSet = super.document.getAnnotations();
else annotationSet = super.document.getAnnotations(inputASName);
if(annotationSet == null) {
showMessage("No annotation set!");
return;
} else {
fireStatusChanged("Mapping of morphology information...");
mapCategories(annotationSet);
fireProcessFinished();
fireStatusChanged("POS Mapper processing finished!");
return;
}
}
private void mapCategories(AnnotationSet annSet) {
AnnotationSet msdSet = annSet.get("MSD");
if(msdSet == null) {
showMessage("No annotations from type MSD");
return;
}
AnnotationSetImpl tokenSet = (AnnotationSetImpl)annSet.get("Token");
if(tokenSet == null) {
showMessage("No annotations from type Token");
return;
}
HashSet<String> mappedTypes = new HashSet<String>();
Annotation list[] = msdSet.toArray(new Annotation[mappedTypes.size()]);
Arrays.sort(list, new OffsetComparator());
for(int index = 0; index < list.length;) {
Annotation msdAnn;
String tokType;
do {
msdAnn = list[index];
String msdType = (String)msdAnn.getFeatures().get("type");
if(msdType != null) {
tokType = categoriesMap.get(msdType);
if(tokType != null) mappedTypes.add(tokType);
}
} while(++index < list.length && msdAnn.compareTo(list[index]) == 0);
tokType = "";
for(Iterator<String> it = mappedTypes.iterator(); it.hasNext();)
tokType = tokType + it.next() + " ";
tokType = tokType.trim();
AnnotationSet singleToken =
tokenSet.getStrict(msdAnn.getStartNode().getOffset(), msdAnn
.getEndNode().getOffset());
if(singleToken.size() > 0 && tokType.length() > 0) {
Annotation tokAnn = singleToken.iterator().next();
tokAnn.getFeatures().put("category", tokType);
} else if(singleToken.size() == 0)
showMessage("No token annotation for MSD annotation: " + msdAnn);
else showMessage("No recognized category for MSD annotation: " + msdAnn);
mappedTypes.clear();
}
}
@Optional
@RunTime
@CreoleParameter(comment = "The name of the annotation set containing "
+ "MSD and Token annotations")
public void setInputASName(String newInputASName) {
inputASName = newInputASName;
}
public String getInputASName() {
return inputASName;
}
private void showMessage(String s) {
}
protected static final boolean DEBUG = false;
protected static final boolean DETAILED_DEBUG = false;
protected String inputASName;
protected Map<String, String> categoriesMap;
}