package shef.nlp.supple;
/**
*
* <p>Title: SUPPLE</p>
* <p>Copyright: Copyright (c) 2003-2006</p>
* @version 1.0
*/
//gate stuff
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.DocumentContent;
import gate.Factory;
import gate.FeatureMap;
import gate.ProcessingResource;
import gate.Resource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.gui.STreeNode;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;
import gate.util.SimpleFeatureMapImpl;
import gate.util.Files;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import shef.nlp.supple.category.Chart;
import shef.nlp.supple.prolog.Prolog;
import shef.nlp.supple.utils.BestParseOutput;
import shef.nlp.supple.utils.SemOutput;
import shef.nlp.supple.utils.SynOutput;
import shef.nlp.supple.utils.SynSemTriple;
public class SUPPLE extends AbstractLanguageAnalyser implements ProcessingResource, Serializable
{
/** Name of the temp file prefix for the input buchart **/
protected static String InTempFileName = "SUPPLE--IN--";
/** Name of the temp file prefix for the output from buchart **/
protected static String OutTempFileName = "SUPPLE--OUT--";
/** Name of the temp file prefix for the semantic output from buchart **/
protected static String SemTempFileName = "SUPPLE--SEM--";
public File InTempFile, OutTempFile, SemTempFile;
/** The name of the executable BuChart **/
private URL suppleFileUrl;
private File suppleFile;
public void setSUPPLEFile(URL suppleFile) { suppleFileUrl = suppleFile; }
public URL getSUPPLEFile() { return suppleFileUrl; }
/*
protected String syntaxSetName;
public void setSyntaxSetName(String n) { syntaxSetName = n; }
public String getSyntaxSetName() { return syntaxSetName; }
*/
/* where to store the semantics of each parsed chunk */
protected String semanticsSetName;
public void setSemanticsSetName(String n) { semanticsSetName = n; }
public String getSemanticsSetName() { return semanticsSetName; }
/** The document under analysis. */
protected Document document;
public Document getDocument() { return document; }
public void setDocument(Document doc) { document = doc; }
/** the configuration file **/
public URL configFile;
public void setConfigFile(URL configFile) {this.configFile = configFile; }
public URL getConfigFile() { return configFile;}
/** the feature table **/
public URL featureFile;
public void setFeatureFile(URL featureFile) { this.featureFile = featureFile; }
public URL getFeatureFile() { return featureFile;}
/** only pass longest chunk to the chart parser **/
public Boolean longestMatch;
public Boolean getLongestMatch() { return longestMatch;}
public void setLongestMatch(Boolean m) { longestMatch=m;}
/** Different Prolog Implementations **/
private Prolog prolog;
private String prologImpl;
public String getPrologImplementation() { return prologImpl;}
public void setPrologImplementation(String prologImpl) { this.prologImpl=prologImpl; }
/** Debug flag */
private Boolean debug;
public Boolean getDebug() { return debug; }
public void setDebug(Boolean debug) { this.debug = debug; }
/** Gate specification **/
public ArrayList gateAnnotations;
public ArrayList gateConstraints;
public ArrayList gateVariables;
/** Gate annotation sets and annotations to consider */
public Hashtable annotationSetTable;
/** Buchart specification **/
private ArrayList buchartConstraints;
private ArrayList buchartVariables;
/** show mapping Gate-Buchart **/
public void showMapping()
{
for(int i=0;i<gateConstraints.size();i++)
{
System.out.println("Annotation " + i);
System.out.println(gateAnnotations.get(i));
System.out.println("Constraint " );
System.out.println(gateConstraints.get(i));
System.out.println(buchartConstraints.get(i));
System.out.println("Variables ");
System.out.println(gateVariables.get(i));
System.out.println(buchartVariables.get(i));
}
}
/** valid categories and their buchart attributes **/
private Hashtable buchartCategories;
/** priorities associated with the buchart categories if longest match to be used */
public Hashtable priorityList;
private static final String CAT_DELIMITER=";";
private static final String ATT_DELIMITER=";";
private static final String GATE_LINE="Gate";
private static final String SUPPLE_LINE="SUPPLE";
private static final String SUPPLE_CAT="category";
public static final String CONFIG_FILE_PAR="configFile";
public static final String FEATURE_FILE_PAR="featureFile";
public Resource init() throws ResourceInstantiationException
{
try {
suppleFile = Files.fileFromURL(suppleFileUrl);
}
catch(IllegalArgumentException iae) {
throw new ResourceInstantiationException(
"SUPPLEFile parameter must be a valid file: URL");
}
/** Check the specified prolog saved state **/
if (!suppleFile.exists() || !suppleFile.isFile())
{
throw new ResourceInstantiationException("SUPPLEFile parameter does not point to a file");
}
/** Check the specified prolog **/
try
{
Class c = Class.forName(prologImpl);
prolog = (Prolog)c.newInstance();
prolog.init(suppleFile);
}
catch (Exception e)
{
e.printStackTrace();
throw new ResourceInstantiationException("Unable to correctly load and initialise the Prolog interface");
}
/* read feature table */
StringTokenizer tokenizer;
/* gate constraints and variables */
gateConstraints=new ArrayList();
gateVariables=new ArrayList();
gateAnnotations=new ArrayList();
/* buchart constraints and variables */
buchartConstraints=new ArrayList();
buchartVariables=new ArrayList();
/* annotations to pass */
annotationSetTable=new Hashtable();
/* the configuration file */
priorityList=new Hashtable();
int priority=0;
try
{
BufferedReader in = new BufferedReader(new InputStreamReader(featureFile.openStream()));
String line;
String cat;
String feature;
buchartCategories=new Hashtable();
ArrayList features;
while((line = in.readLine()) != null)
{
tokenizer=new StringTokenizer(line,CAT_DELIMITER);
/* first element if the category */
if(tokenizer.hasMoreElements())
{
cat=tokenizer.nextToken();
priorityList.put(cat,new Integer(priority));
priority++;
/* rest elements are the features in edge order */
features=new ArrayList();
while(tokenizer.hasMoreElements())
{
features.add(tokenizer.nextToken());
}
/* create entry in table */
buchartCategories.put(cat,features);
}
}
}
catch(IOException ioe)
{
throw new ResourceInstantiationException(ioe + " while reading " + getFeatureFile());
}
/* the mapping file */
try
{
BufferedReader in = new BufferedReader(new InputStreamReader(configFile.openStream()));
String line;
String attVal;
String attribute;
String value;
FeatureMap constraints;
FeatureMap variables;
FeatureMap annotations;
int nextMapping=0;
int lnum=0;
/** format expected is: gate line followed by a buchart line
* anything else is ignored
* **/
while((line = in.readLine()) != null)
{
lnum++;
if(line.startsWith(GATE_LINE))
{
constraints=Factory.newFeatureMap();
variables=Factory.newFeatureMap();
annotations=Factory.newFeatureMap();
processGateLine(line,annotations,constraints,variables);
try
{
updateAnnotationSets(annotations, annotationSetTable);
}
catch(Exception e)
{
throw new ResourceInstantiationException(e.getMessage()+ " config file (" + lnum + ")");
}
gateConstraints.add(constraints);
gateVariables.add(variables);
gateAnnotations.add(annotations);
/* get next line */
line = in.readLine();
lnum++;
if(line.startsWith(SUPPLE_LINE))
{
constraints=Factory.newFeatureMap();
variables=Factory.newFeatureMap();
processLine(line,constraints,variables);
buchartConstraints.add(constraints);
buchartVariables.add(variables);
}
else
{
/* format error */
throw new ResourceInstantiationException("Format error in config file line " + lnum);
}
}
}
}
catch(IOException ioe)
{
throw new ResourceInstantiationException(ioe + " while reading " + getConfigFile());
}
/* initialize longest match at true */
if(longestMatch==null) { longestMatch=new Boolean(true);}
if(debug==null) { debug=new Boolean(false);}
return this;
}
/** the same for reinitialization **/
public Resource ReInit() throws ResourceInstantiationException
{
init();
return this;
}
/* updates info on annotation sets and annotations to consider */
public static void updateAnnotationSets(FeatureMap annotations, Hashtable table) throws Exception
{
/* annotations contains two keys 'AnnotationType' and 'AnnotationSet' */
String annSet;
String annType;
Set auxSet;
if(annotations.containsKey("AnnotationSet"))
{
annSet=(String)annotations.get("AnnotationSet");
}
else
{
annSet="Default";
}
if(annotations.containsKey("AnnotationType"))
{
annType=(String)annotations.get("AnnotationType");
}
else
{
throw new Exception("No 'AnnotationType' specified");
}
if(table.containsKey(annSet))
{
auxSet=(Set)table.get(annSet);
}
else
{
auxSet=new HashSet();
}
auxSet.add(annType);
table.put(annSet,auxSet);
}
/* creates the chart edges according to the information provided in the config files */
public void execute() throws ExecutionException
{
DocumentContent dc=document.getContent();
String stringContent=dc.toString();
PrintWriter out = null;
try
{
InTempFile = File.createTempFile(InTempFileName, "");
OutTempFile = File.createTempFile(OutTempFileName, "");
//SemTempFile = File.createTempFile(SemTempFileName,"");
out = new PrintWriter(new FileWriter(InTempFile));
}
catch(IOException ioe)
{
throw new ExecutionException("Problems creating temporary files" + ioe.toString());
// ioe.printStackTrace();
// return;
}
// System.out.println("Annotation Sets and Types to be considered");
Iterator iteKey=annotationSetTable.keySet().iterator();
Set annotationTypes;
String annotationSet;
String annotationSetName;
String annotationType;
Iterator iteTypes;
AnnotationSet auxSet;
AnnotationSet all=document.getAnnotations();
AnnotationSet sentences=all.get("Sentence");
//AnnotationSets and types
if(sentences==null || sentences.isEmpty())
{
throw new ExecutionException("No sentences to parse");
}
ArrayList sentList=new ArrayList(sentences);
/* put them in order */
Collections.sort(sentList,new OffsetComparator());
/* now for each sentence */
Annotation sentence;
Long startSent;
Long endSent;
ArrayList inSentenceList;
Annotation auxAnnotation;
Iterator iteAnnotation;
AnnotationSet auxAnnotationSet;
Set allInSentence;
Annotation annotation;
String type;
FeatureMap fm;
int index;
Object[] pair;
Chart[] charts=new Chart[sentList.size()];
for(int s=0;s<sentList.size();s++)
{
inSentenceList=new ArrayList();
sentence=(Annotation) sentList.get(s);
startSent=sentence.getStartNode().getOffset();
endSent=sentence.getEndNode().getOffset();
/* create the empty chart */
charts[s]=new Chart(new Long(s+1),startSent,endSent,new SUPPLERecord("top",startSent,startSent,Factory.newFeatureMap()));
/* for each annotation set get annotation types that span the offsets of the sentence */
iteKey=annotationSetTable.keySet().iterator();
while(iteKey.hasNext())
{
annotationSet=(String)iteKey.next();
if(annotationSet.compareTo("Default")==0)
{
auxSet=document.getAnnotations();
}
else
{
if (document.getNamedAnnotationSets().containsKey(annotationSet))
{
auxSet = document.getAnnotations(annotationSet);
if (auxSet == null || auxSet.isEmpty())
{
throw new ExecutionException(annotationSet+" does not exist");
}
}
else
{
auxSet=null;
}
}
annotationTypes=(Set) annotationSetTable.get(annotationSet);
iteTypes=annotationTypes.iterator();
while(iteTypes.hasNext())
{
annotationType=(String)iteTypes.next();
if(!(auxSet==null) && !auxSet.isEmpty())
{
auxAnnotationSet=auxSet.get(annotationType, startSent,endSent);
if(!(auxAnnotationSet==null) && !auxAnnotationSet.isEmpty())
{
iteAnnotation = auxAnnotationSet.iterator();
while(iteAnnotation.hasNext())
{
auxAnnotation =(Annotation)iteAnnotation.next();
pair=new Object[2];
pair[0]=annotationSet;
pair[1]=auxAnnotation;
inSentenceList.add(pair);
}
}
}
}
}
ArrayList workingList;
Collections.sort(inSentenceList,pairComparator());
FeatureMap variables;
Long annStart, annEnd;
SUPPLERecord record;
ArrayList buchartList=new ArrayList();
for(int a=0;a<inSentenceList.size();a++)
{
pair=(Object[])inSentenceList.get(a);
annotationSetName=(String)pair[0];
annotation=(Annotation)pair[1];
annStart=annotation.getStartNode().getOffset();
annEnd=annotation.getEndNode().getOffset();
type=annotation.getType();
fm=annotation.getFeatures();
/** look for constraints **/
index=getIndex(annotationSetName,annotation,gateAnnotations,gateConstraints);
if(index>=0)
{
/* instantiate variables and consider defaults for 'string' and 'text' */
FeatureMap fmvar=(FeatureMap)gateVariables.get(index);
Iterator ite=fmvar.keySet().iterator();
String feature, var, val;
variables=Factory.newFeatureMap();
while(ite.hasNext())
{
feature=(String) ite.next();
var=(String)fmvar.get(feature);
if(fm.containsKey(feature))
{
val=(String)fm.get(feature);
}
else
{
if(feature.compareTo("text")==0)
{
val="body";
}
else if(feature.compareTo("string")==0)
{
val=stringContent.substring(annStart.intValue(),annEnd.intValue());
}
else
{
val = "_";
}
}
variables.put(var,val);
}
FeatureMap fmbuchartvars=(FeatureMap)buchartVariables.get(index);
ite=fmbuchartvars.keySet().iterator();
/* create a feature map for the mapping */
FeatureMap buchartMap=Factory.newFeatureMap();
while(ite.hasNext())
{
feature=(String) ite.next();
var=(String)fmbuchartvars.get(feature);
if(variables.containsKey(var))
{
buchartMap.put(feature,(String) variables.get(var));
}
else
{
buchartMap.put(feature,"_");
}
}
buchartMap.putAll((FeatureMap)buchartConstraints.get(index));
record=new SUPPLERecord((String) buchartMap.get("category"),annStart,annEnd,buchartMap);
buchartList.add(record);
}
else
{
// throw new ExecutionException("Restriction not found for annotation type " + type);
}
}
/* sort the list of buchart categories by offset */
Collections.sort(buchartList,SUPPLERecord.SUPPLERecordComparator());
/* organise by offset and longest match */
if(getLongestMatch().booleanValue()) {
/* keeps one element per valid start offset */
workingList=keepLongest(buchartList,priorityList);
}
else
{
workingList=buchartList;
}
/* list buchart format of each */
String cat;
ArrayList outFeatures;
for(int c=0;c<workingList.size();c++)
{
record=(SUPPLERecord) workingList.get(c);
charts[s].setNext(record);
cat=record.getCategory();
if(buchartCategories.containsKey(cat))
{
outFeatures = (ArrayList) buchartCategories.get(cat);
}
else
{
throw new ExecutionException(cat + " without printable features!");
}
}
charts[s].setFinal(new SUPPLERecord("bottom",endSent,endSent,Factory.newFeatureMap()));
out.println(charts[s].toSUPPLEFormat(buchartCategories));
out.flush();
}
out.close();
/** Mark **/
prolog.parse(InTempFile,OutTempFile, debug.booleanValue());
/** Horacio **/
// callParser(InTempFile,OutTempFile,SemTempFile,"flag");
/* check instantiation of this resource */
SynSemTriple parserOutput=extractSynSem();
ArrayList synOut=parserOutput.getSyntax();
createSyntacticAnnotations(synOut);
ArrayList list=parserOutput.getSemnatics();
ArrayList best=parserOutput.getBestParse();
SemOutput aux;
Long semStart, semEnd;
ArrayList semList;
if(semanticsSetName != null && semanticsSetName.equals("")) semanticsSetName = null;
AnnotationSet semAnnotationSet = (semanticsSetName == null) ? document.getAnnotations() : document.getAnnotations(semanticsSetName);
FeatureMap semfm;
for(int i=0;i<list.size();i++)
{
aux=(SemOutput) list.get(i);
semStart=aux.getStart();
semEnd=aux.getEnd();
semList=aux.getSemantics();
semfm=Factory.newFeatureMap();
semfm.put("qlf",semList);
try
{
semAnnotationSet.add(semStart,semEnd,"semantics",semfm);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
}
BestParseOutput auxBest;
String startBest,endBest;
for(int i=0;i<best.size();i++)
{
auxBest=(BestParseOutput) best.get(i);
startBest=auxBest.getStart();
endBest=auxBest.getEnd();
semfm=Factory.newFeatureMap();
semfm.put("best_parse",auxBest.getBestParse());
try
{
all.add(new Long(startBest),new Long(endBest),"parse",semfm);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
}
if(!debug.booleanValue())
{
InTempFile.delete();
OutTempFile.delete();
}
}
/* the list contains pairs (set name,annotation) and is sorted by annotation offset in ascending order */
public static ArrayList keepLongest(ArrayList list)
{
ArrayList working=new ArrayList();
Object[] pair;
Object[] previous;
Annotation preann;
Annotation curann;
long presize;
long cursize;
Long prestart, preend, start, end;
int index=0;
if(list.size()>0)
{
previous=(Object[])list.get(0);
preann=(Annotation) previous[1];
prestart=preann.getStartNode().getOffset();
preend=preann.getEndNode().getOffset();
presize=preend.longValue()-prestart.longValue();
working.add(previous);
for(int e=1;e<list.size();e++)
{
pair=(Object[]) list.get(e);
curann=(Annotation)pair[1];
start=curann.getStartNode().getOffset();
end=curann.getEndNode().getOffset();
cursize=end.longValue()-start.longValue();
if(start.compareTo(prestart)==0)
{
if(cursize>presize)
{
working.remove(working.size()-1);
working.add(pair);
}
}
else if(start.compareTo(preend)>0)
{
working.add(pair);
}
previous=(Object[])working.get(working.size()-1);
preann=(Annotation) previous[1];
prestart=preann.getStartNode().getOffset();
preend=preann.getEndNode().getOffset();
presize=preend.longValue()-prestart.longValue();
}
}
return working;
}
/* the list contains pairs (set name,annotation) and is sorted by annotation offset in ascending order */
public static ArrayList keepLongest(ArrayList list,Hashtable priorities) throws ExecutionException
{
ArrayList working=new ArrayList();
SUPPLERecord current;
SUPPLERecord previous;
long presize;
long cursize;
Long prestart, preend, start, end;
int index=0;
int prepriority;
int curpriority;
String auxCat;
if(list.size()>0)
{
previous=(SUPPLERecord)list.get(0);
auxCat=(String)previous.getCategory();
if(priorities.containsKey(auxCat))
{
prepriority = ( (Integer) priorities.get(previous.getCategory())).intValue();
}
else
{
throw new ExecutionException(auxCat + " not found in feature table");
}
prestart=previous.getStart();
preend=previous.getEnd();
presize=preend.longValue()-prestart.longValue();
working.add(previous);
for(int e=1;e<list.size();e++)
{
current=(SUPPLERecord) list.get(e);
auxCat=(String)current.getCategory();
if(priorities.containsKey(auxCat))
{
curpriority = ( (Integer) priorities.get(current.getCategory())).intValue();
}
else
{
throw new ExecutionException(auxCat + " not found in feature table");
}
start=current.getStart();
end=current.getEnd();
cursize=end.longValue()-start.longValue();
if(start.compareTo(prestart)==0)
{
if(cursize>presize)
{
working.remove(working.size()-1);
working.add(current);
}
else if(cursize==presize)
{
if(curpriority<prepriority)
{
working.remove(working.size()-1);
working.add(current);
}
}
}
else if(start.compareTo(preend)>=0)
{
working.add(current);
}
previous=(SUPPLERecord)working.get(working.size()-1);
prepriority=((Integer) priorities.get(previous.getCategory())).intValue();
prestart=previous.getStart();
preend=previous.getEnd();
presize=preend.longValue()-prestart.longValue();
}
}
return working;
}
public static int getIndex(String annotationSet,Annotation annotation, ArrayList annotations, ArrayList constraints)
{
int index=-1;
String type=annotation.getType();
FeatureMap fm=annotation.getFeatures();
// System.out.println(" >>" + annotationSet + "<<" );
FeatureMap annfm;
FeatureMap consfm;
String auxType;
String auxSet;
for(int i=0;i<annotations.size();i++)
{
annfm=(FeatureMap)annotations.get(i);
consfm=(FeatureMap)constraints.get(i);
auxType=(String)annfm.get("AnnotationType");
if(annfm.containsKey("AnnotationSet"))
{
auxSet = (String) annfm.get("AnnotationSet");
}
else
{
auxSet = "Default";
}
if(auxType.compareTo(type)==0 && auxSet.compareTo(annotationSet)==0)
{
if(fm.subsumes(consfm))
{
return i;
}
}
}
return index;
}
public static Comparator pairComparator()
{
Comparator comp=new Comparator()
{
public int compare(Object o1, Object o2)
{
Object[] a1 = (Object[]) o1;
Object[] a2 = (Object[]) o2;
Annotation an1 = (Annotation) a1[1];
Annotation an2 = (Annotation) a2[1];
return an1.getStartNode().getOffset().compareTo(an2.getStartNode().getOffset());
}
};
return comp;
}
public static void processGateLine(String line, FeatureMap annotations, FeatureMap constraints, FeatureMap variables)
{
/* attribute value pattern */
Pattern attValPat=Pattern.compile("(.*)=(.*)");
/* read feature table */
StringTokenizer tokenizer;
String attVal;
String attribute;
String value;
tokenizer=new StringTokenizer(line,ATT_DELIMITER);
/* consume firet element */
tokenizer.nextElement();
while(tokenizer.hasMoreElements())
{
attVal=(String)tokenizer.nextToken();
Matcher m=attValPat.matcher(attVal);
if(m.matches())
{
attribute=m.group(1);
value=m.group(2);
if(attribute.compareTo("AnnotationSet")==0 || attribute.compareTo("AnnotationType")==0)
{
annotations.put(attribute,value);
}
else
{
/* variable or value */
if (value.indexOf("&") == 0)
{
variables.put(attribute, value);
}
else
{
constraints.put(attribute, value);
}
}
}
}
}
public static void processLine(String line, FeatureMap constraints, FeatureMap variables)
{
/* attribute value pattern */
Pattern attValPat=Pattern.compile("(.*)=(.*)");
/* read feature table */
StringTokenizer tokenizer;
String attVal;
String attribute;
String value;
tokenizer=new StringTokenizer(line,ATT_DELIMITER);
/* consume firet element */
tokenizer.nextElement();
while(tokenizer.hasMoreElements())
{
attVal=(String)tokenizer.nextToken();
Matcher m=attValPat.matcher(attVal);
if(m.matches())
{
attribute=m.group(1);
value=m.group(2);
/* variable or value */
if(value.indexOf("&")==0)
{
variables.put(attribute,value);
}
else
{
constraints.put(attribute,value);
}
}
}
}
public SynSemTriple extractSynSem()
{
DocumentContent dc=document.getContent();
/* stack for the best parse */
SynSemTriple outTriple;
String start, end, category, constituents;
Pattern offsets=Pattern.compile("semantics (\\d+) (\\d+)");
Matcher match;
ArrayList list=new ArrayList();
ArrayList outList=new ArrayList();
ArrayList bestParse=new ArrayList();
String startSem, endSem;
startSem="null";
endSem="null";
String startBest, endBest;
startBest=null;
endBest=null;
SemOutput singleSem=null;
ArrayList output = new ArrayList();
ArrayList bestParseOut= new ArrayList();
String SYN="syntax";
StringTokenizer tokeniser;
boolean first=true;
int level=0;
if(OutTempFile.isFile())
{
try
{
String TempName = OutTempFile.getAbsolutePath();
BufferedReader in = new BufferedReader(new FileReader(TempName));
String line;
while((line = in.readLine()) != null)
{
tokeniser = new StringTokenizer(line," ");
if(tokeniser.hasMoreTokens())
{
String test = tokeniser.nextToken();
if(test.compareTo(SYN)==0)
{
if(first)
{
first=false;level=0;
}
else
{
level++;
}
/* next two are offsets */
start = tokeniser.nextToken();
end = tokeniser.nextToken();
if(first) { startBest=start; endBest=end;}
category = tokeniser.nextToken();
/* skip one */
tokeniser.nextToken();
/* constituents */
constituents = tokeniser.nextToken();
output.add(new SynOutput(start,end,category,constituents,level));
bestParse.add(new SynOutput(start,end,category,constituents,level));
}
else
{
match=offsets.matcher(line);
if(match.matches())
{
SynOutput aux;
ArrayList stack=new ArrayList();
int consti;
String categ;
String text;
for(int b=bestParse.size()-1;b>=0;b--)
{
aux=(SynOutput)bestParse.get(b);
categ=aux.getCategory();
consti=(new Integer(aux.getConstituens())).intValue();
if(consti==0)
{
/* to the stack */
try
{
stack.add(0, "( " + categ + " \"" +
dc.getContent(new Long(aux.getStart()),
new Long(aux.getEnd())) + "\"" +
" )");
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
// dc.getContent(new Long(aux.getStart()),new Long(aux.getEnd()));
}
else
{
/* create (cat (c1) (c2) .... (cn)) and put it into the stack */
String element="";
for(int c=0;c<consti;c++)
{
element=element + " " + (String) stack.get(0);
stack.remove(0);
}
element="( " +categ+ " " + element + " )";
stack.add(0,element);
}
// System.out.println(aux.getCategory() + " " + aux.getConstituens());
}
bestParse=new ArrayList();
/*
if(first) {
first=false;
} else {
singleSem.setSemantics(list);
outList.add(singleSem);
}
*/
list=new ArrayList();
singleSem=new SemOutput();
startSem=match.group(1);
endSem=match.group(2);
singleSem.setStart(new Long(startSem));
singleSem.setEnd(new Long(endSem));
bestParseOut.add(new BestParseOutput(startSem,endSem,(String)stack.get(0)));
/* loop on each term */
boolean inSem=true;
while(inSem && (line = in.readLine()) != null)
{
if(line.length()!=0 && !line.startsWith(" ") && line!="" && !line.startsWith(SYN))
{
list.add(line);
}
else
{
inSem=false;
}
}
singleSem.setSemantics(list);
outList.add(singleSem);
}
/* re start with syntax */
first=true;
}
}
}
}
catch(IOException ioe)
{
ioe.printStackTrace();
}
}
outTriple=new SynSemTriple(output,outList,bestParseOut);
return outTriple;
}
public ArrayList readSynFile()
{
String start, end, category, constituents;
ArrayList output = new ArrayList();
String SYN="syntax";
StringTokenizer tokeniser;
boolean first=true;
int level=0;
if(OutTempFile.isFile())
{
// System.out.println("Trying to read buchart output file!");
try
{
String TempName = OutTempFile.getAbsolutePath();
BufferedReader in = new BufferedReader(new FileReader(TempName));
String line;
while((line = in.readLine()) != null)
{
// System.out.println(line);
tokeniser = new StringTokenizer(line," ");
if(tokeniser.hasMoreTokens())
{
String test = tokeniser.nextToken();
if(test.compareTo(SYN)==0)
{
if(first)
{
first=false;level=0;
}
else
{
level++;
}
/* next two are offsets */
start = tokeniser.nextToken();
end = tokeniser.nextToken();
category = tokeniser.nextToken();
/* skip one */
tokeniser.nextToken();
/* constituents */
constituents = tokeniser.nextToken();
output.add(new SynOutput(start,end,category,constituents,level));
}
else
{
first=true;
}
}
}
}
catch(IOException ioe)
{
ioe.printStackTrace();
}
}
return output;
}
public static Integer SynTreeBack(ArrayList constituents,AnnotationSet docAnnotations,Integer yourFather)
{
// System.out.println("Syntactic tree back...");
ArrayList father;
Integer son;
Integer id=null;
STreeNode node=null;
FeatureMap fm;
ArrayList components = new ArrayList();
if(constituents.size()==0) System.out.println("Error!!!");
SynOutput next = (SynOutput) constituents.get(0);
String category=next.getCategory();
Integer NRO=new Integer(next.getConstituens());
int nro=NRO.intValue();
String cat=next.getCategory();
Long start = new Long(next.getStart());
Long end = new Long(next.getEnd());
constituents.remove(0);
/* for each component, construct a tree recursively */
if(nro>0)
{
fm=Factory.newFeatureMap();
try
{
id=docAnnotations.add(start,end,"SyntaxTreeNode",fm);
node= new STreeNode(docAnnotations.get(id));
node.setAllowsChildren(true);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
for(int i=0; i<nro; i++)
{
son=SynTreeBack(constituents,docAnnotations,id);
// node.add(son);
components.add(son);
}
fm.put("consists",components);
fm.put("cat",category);
father=new ArrayList();
if(!(yourFather.compareTo(new Integer(0))==0))
{
father.add(yourFather);
}
else
{
/* root */
}
fm.put("father",father);
}
else
{
AnnotationSet auxTokens=docAnnotations.get("Token").get(start,end);
if(auxTokens.size()>1)
{
//if(category.compareTo("sem_cat")==0) {
/* is a semantic category, simulate a 'tree' structure */
ArrayList neComponents=new ArrayList();
/* get all tokens spanning the start and end */
ArrayList auxList=new ArrayList(auxTokens);
Annotation auxToken;
String tokenCat;
FeatureMap tokenfm;
Long startToken,endToken;
Integer id1;
fm=Factory.newFeatureMap();
father=new ArrayList();
if(!(yourFather.compareTo(new Integer(0))==0))
{
father.add(yourFather);
}
else
{
/* root */
}
fm.put("father",father);
try
{
id=docAnnotations.add(start,end,"SyntaxTreeNode",fm);
node = new STreeNode(docAnnotations.get(id));
node.setAllowsChildren(true);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
STreeNode node1;
for(int h=0;h<auxList.size();h++)
{
auxToken=(Annotation) auxList.get(h);
tokenfm=auxToken.getFeatures();
startToken=auxToken.getStartNode().getOffset();
endToken=auxToken.getEndNode().getOffset();
FeatureMap fm1=Factory.newFeatureMap();
if(tokenfm.containsKey("category"))
{
tokenCat=(String) tokenfm.get("category");
tokenCat=tokenCat.toLowerCase();
}
else
{
tokenCat=(String) tokenfm.get("string");
}
father=new ArrayList();
father.add(id);
fm1.put("consists",new ArrayList());
fm1.put("cat",tokenCat);
fm1.put("father",father);
try
{
id1=docAnnotations.add(startToken,endToken,"SyntaxTreeNode",fm1);
//System.out.println(id1);
node1 = new STreeNode(docAnnotations.get(id1));
node1.setAllowsChildren(false);
neComponents.add(id1);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
}
fm.put("consists",neComponents);
fm.put("cat",category);
}
else
{
/* is a token */
fm=Factory.newFeatureMap();
fm.put("consists",new ArrayList());
fm.put("cat",category);
father=new ArrayList();
if(!(yourFather.compareTo(new Integer(0))==0))
{
father.add(yourFather);
}
else
{
/* root */
}
fm.put("father",father);
try
{
id=docAnnotations.add(start,end,"SyntaxTreeNode",fm);
node = new STreeNode(docAnnotations.get(id));
node.setAllowsChildren(false);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
}
}
return id;
}
public ArrayList ExtractQLF()
{
Pattern offsets=Pattern.compile("semantics (\\d+) (\\d+)");
Matcher match;
ArrayList list=new ArrayList();
ArrayList outList=new ArrayList();
String start, end;
start="null";
end="null";
SemOutput singleSem=null;
if(SemTempFile.isFile())
{
try
{
String TempName = SemTempFile.getAbsolutePath();
BufferedReader in = new BufferedReader(new FileReader(TempName));
String line;
boolean first=true;
while ( (line = in.readLine()) != null)
{
match=offsets.matcher(line);
if(match.matches())
{
if(first)
{
first=false;
}
else
{
singleSem.setSemantics(list);
outList.add(singleSem);
}
list=new ArrayList();
singleSem=new SemOutput();
start=match.group(1);
end=match.group(2);
singleSem.setStart(new Long(start));
singleSem.setEnd(new Long(end));
}
else
{
list.add(line);
}
}
singleSem.setSemantics(list);
outList.add(singleSem);
}
catch (IOException ioe)
{
ioe.printStackTrace();
}
}
else
{
System.out.println("Can't read semantic output!!!");
}
return outList;
}
public void createSyntacticAnnotations()
{
AnnotationSet theAnnotationSet=document.getAnnotations();
AnnotationSet theTokens=theAnnotationSet.get("Token");
ArrayList tokens=new ArrayList(theTokens);
Collections.sort(tokens,new OffsetComparator());
ArrayList synOut = new ArrayList();
ArrayList synOut1;
try
{
synOut= readSynFile();
}
catch(Exception e)
{
e.printStackTrace();
}
synOut1=new ArrayList(synOut);
while(synOut.size()>0)
{
// System.out.println("creating the annotations");
SynTreeBack(synOut,theAnnotationSet,new Integer(0));
}
/* for tokens without STreeNode we should create one */
Annotation auxToken;
Long tokenStart,tokenEnd;
AnnotationSet auxSynSet;
FeatureMap auxfm;
String tokenCat;
for(int i=0;i<tokens.size();i++)
{
auxToken=(Annotation) tokens.get(i);
auxfm=auxToken.getFeatures();
tokenCat=auxfm.get("category").toString();
FeatureMap fm_token=Factory.newFeatureMap();
fm_token.put("father",new ArrayList());
fm_token.put("consists",new ArrayList());
fm_token.put("cat",tokenCat);
tokenStart=auxToken.getStartNode().getOffset();
tokenEnd =auxToken.getEndNode().getOffset();
auxSynSet=theAnnotationSet.get("SyntaxTreeNode",tokenStart,tokenEnd);
if(auxSynSet==null || auxSynSet.isEmpty())
{
// System.out.println("Token in " + tokenStart + " - " + tokenEnd + " without SyntaxTreeNode...");
try
{
theAnnotationSet.add(tokenStart,tokenEnd,"SyntaxTreeNode",fm_token);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
}
}
// AnnotationSet syntax = document.getAnnotations(getSyntaxSetName());
Iterator synIte = synOut1.iterator();
SynOutput auxSyn;
FeatureMap fm;
while(synIte.hasNext())
{
// System.out.println("Creating components in 'Syntax' set");
auxSyn = (SynOutput) synIte.next();
fm = new SimpleFeatureMapImpl();
fm.put("constituents",auxSyn.getConstituens());
fm.put("level",new Integer(auxSyn.getLevel()));
try
{
theAnnotationSet.add(new Long(auxSyn.getStart()), new Long(auxSyn.getEnd()),auxSyn.getCategory(),fm);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
}
}
public void createSyntacticAnnotations(ArrayList synOut)
{
AnnotationSet theAnnotationSet=document.getAnnotations();
AnnotationSet theTokens=theAnnotationSet.get("Token");
ArrayList tokens=new ArrayList(theTokens);
Collections.sort(tokens,new OffsetComparator());
ArrayList synOut1;
synOut1=new ArrayList(synOut);
while(synOut.size()>0)
{
// System.out.println("creating the annotations");
SynTreeBack(synOut,theAnnotationSet,new Integer(0));
}
/* for tokens without STreeNode we should create one */
Annotation auxToken;
Long tokenStart,tokenEnd;
AnnotationSet auxSynSet;
FeatureMap auxfm;
String tokenCat;
for(int i=0;i<tokens.size();i++)
{
auxToken=(Annotation) tokens.get(i);
auxfm=auxToken.getFeatures();
tokenCat=auxfm.get("category").toString();
FeatureMap fm_token=Factory.newFeatureMap();
fm_token.put("father",new ArrayList());
fm_token.put("consists",new ArrayList());
fm_token.put("cat",tokenCat);
tokenStart=auxToken.getStartNode().getOffset();
tokenEnd =auxToken.getEndNode().getOffset();
auxSynSet=theAnnotationSet.get("SyntaxTreeNode",tokenStart,tokenEnd);
if(auxSynSet==null || auxSynSet.isEmpty())
{
// System.out.println("Token in " + tokenStart + " - " + tokenEnd + " without SyntaxTreeNode...");
try
{
theAnnotationSet.add(tokenStart,tokenEnd,"SyntaxTreeNode",fm_token);
}
catch(InvalidOffsetException ioe)
{
ioe.printStackTrace();
}
}
}
}
}