package shef.nlp.supple; /** * * <p>Title: SUPPLE</p> * <p>Copyright: Copyright (c) 2003-2006</p> * @version 1.0 */ //gate stuff import gate.Annotation; import gate.AnnotationSet; import gate.Document; import gate.DocumentContent; import gate.Factory; import gate.FeatureMap; import gate.ProcessingResource; import gate.Resource; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.ResourceInstantiationException; import gate.gui.STreeNode; import gate.util.InvalidOffsetException; import gate.util.OffsetComparator; import gate.util.SimpleFeatureMapImpl; import gate.util.Files; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.Serializable; import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.Set; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import shef.nlp.supple.category.Chart; import shef.nlp.supple.prolog.Prolog; import shef.nlp.supple.utils.BestParseOutput; import shef.nlp.supple.utils.SemOutput; import shef.nlp.supple.utils.SynOutput; import shef.nlp.supple.utils.SynSemTriple; public class SUPPLE extends AbstractLanguageAnalyser implements ProcessingResource, Serializable { /** Name of the temp file prefix for the input buchart **/ protected static String InTempFileName = "SUPPLE--IN--"; /** Name of the temp file prefix for the output from buchart **/ protected static String OutTempFileName = "SUPPLE--OUT--"; /** Name of the temp file prefix for the semantic output from buchart **/ protected static String SemTempFileName = "SUPPLE--SEM--"; public File InTempFile, OutTempFile, SemTempFile; /** The name of the executable BuChart **/ private URL suppleFileUrl; private File suppleFile; public void setSUPPLEFile(URL suppleFile) { suppleFileUrl = suppleFile; } public URL getSUPPLEFile() { return suppleFileUrl; } /* protected String syntaxSetName; public void setSyntaxSetName(String n) { syntaxSetName = n; } public String getSyntaxSetName() { return syntaxSetName; } */ /* where to store the semantics of each parsed chunk */ protected String semanticsSetName; public void setSemanticsSetName(String n) { semanticsSetName = n; } public String getSemanticsSetName() { return semanticsSetName; } /** The document under analysis. */ protected Document document; public Document getDocument() { return document; } public void setDocument(Document doc) { document = doc; } /** the configuration file **/ public URL configFile; public void setConfigFile(URL configFile) {this.configFile = configFile; } public URL getConfigFile() { return configFile;} /** the feature table **/ public URL featureFile; public void setFeatureFile(URL featureFile) { this.featureFile = featureFile; } public URL getFeatureFile() { return featureFile;} /** only pass longest chunk to the chart parser **/ public Boolean longestMatch; public Boolean getLongestMatch() { return longestMatch;} public void setLongestMatch(Boolean m) { longestMatch=m;} /** Different Prolog Implementations **/ private Prolog prolog; private String prologImpl; public String getPrologImplementation() { return prologImpl;} public void setPrologImplementation(String prologImpl) { this.prologImpl=prologImpl; } /** Debug flag */ private Boolean debug; public Boolean getDebug() { return debug; } public void setDebug(Boolean debug) { this.debug = debug; } /** Gate specification **/ public ArrayList gateAnnotations; public ArrayList gateConstraints; public ArrayList gateVariables; /** Gate annotation sets and annotations to consider */ public Hashtable annotationSetTable; /** Buchart specification **/ private ArrayList buchartConstraints; private ArrayList buchartVariables; /** show mapping Gate-Buchart **/ public void showMapping() { for(int i=0;i<gateConstraints.size();i++) { System.out.println("Annotation " + i); System.out.println(gateAnnotations.get(i)); System.out.println("Constraint " ); System.out.println(gateConstraints.get(i)); System.out.println(buchartConstraints.get(i)); System.out.println("Variables "); System.out.println(gateVariables.get(i)); System.out.println(buchartVariables.get(i)); } } /** valid categories and their buchart attributes **/ private Hashtable buchartCategories; /** priorities associated with the buchart categories if longest match to be used */ public Hashtable priorityList; private static final String CAT_DELIMITER=";"; private static final String ATT_DELIMITER=";"; private static final String GATE_LINE="Gate"; private static final String SUPPLE_LINE="SUPPLE"; private static final String SUPPLE_CAT="category"; public static final String CONFIG_FILE_PAR="configFile"; public static final String FEATURE_FILE_PAR="featureFile"; public Resource init() throws ResourceInstantiationException { try { suppleFile = Files.fileFromURL(suppleFileUrl); } catch(IllegalArgumentException iae) { throw new ResourceInstantiationException( "SUPPLEFile parameter must be a valid file: URL"); } /** Check the specified prolog saved state **/ if (!suppleFile.exists() || !suppleFile.isFile()) { throw new ResourceInstantiationException("SUPPLEFile parameter does not point to a file"); } /** Check the specified prolog **/ try { Class c = Class.forName(prologImpl); prolog = (Prolog)c.newInstance(); prolog.init(suppleFile); } catch (Exception e) { e.printStackTrace(); throw new ResourceInstantiationException("Unable to correctly load and initialise the Prolog interface"); } /* read feature table */ StringTokenizer tokenizer; /* gate constraints and variables */ gateConstraints=new ArrayList(); gateVariables=new ArrayList(); gateAnnotations=new ArrayList(); /* buchart constraints and variables */ buchartConstraints=new ArrayList(); buchartVariables=new ArrayList(); /* annotations to pass */ annotationSetTable=new Hashtable(); /* the configuration file */ priorityList=new Hashtable(); int priority=0; try { BufferedReader in = new BufferedReader(new InputStreamReader(featureFile.openStream())); String line; String cat; String feature; buchartCategories=new Hashtable(); ArrayList features; while((line = in.readLine()) != null) { tokenizer=new StringTokenizer(line,CAT_DELIMITER); /* first element if the category */ if(tokenizer.hasMoreElements()) { cat=tokenizer.nextToken(); priorityList.put(cat,new Integer(priority)); priority++; /* rest elements are the features in edge order */ features=new ArrayList(); while(tokenizer.hasMoreElements()) { features.add(tokenizer.nextToken()); } /* create entry in table */ buchartCategories.put(cat,features); } } } catch(IOException ioe) { throw new ResourceInstantiationException(ioe + " while reading " + getFeatureFile()); } /* the mapping file */ try { BufferedReader in = new BufferedReader(new InputStreamReader(configFile.openStream())); String line; String attVal; String attribute; String value; FeatureMap constraints; FeatureMap variables; FeatureMap annotations; int nextMapping=0; int lnum=0; /** format expected is: gate line followed by a buchart line * anything else is ignored * **/ while((line = in.readLine()) != null) { lnum++; if(line.startsWith(GATE_LINE)) { constraints=Factory.newFeatureMap(); variables=Factory.newFeatureMap(); annotations=Factory.newFeatureMap(); processGateLine(line,annotations,constraints,variables); try { updateAnnotationSets(annotations, annotationSetTable); } catch(Exception e) { throw new ResourceInstantiationException(e.getMessage()+ " config file (" + lnum + ")"); } gateConstraints.add(constraints); gateVariables.add(variables); gateAnnotations.add(annotations); /* get next line */ line = in.readLine(); lnum++; if(line.startsWith(SUPPLE_LINE)) { constraints=Factory.newFeatureMap(); variables=Factory.newFeatureMap(); processLine(line,constraints,variables); buchartConstraints.add(constraints); buchartVariables.add(variables); } else { /* format error */ throw new ResourceInstantiationException("Format error in config file line " + lnum); } } } } catch(IOException ioe) { throw new ResourceInstantiationException(ioe + " while reading " + getConfigFile()); } /* initialize longest match at true */ if(longestMatch==null) { longestMatch=new Boolean(true);} if(debug==null) { debug=new Boolean(false);} return this; } /** the same for reinitialization **/ public Resource ReInit() throws ResourceInstantiationException { init(); return this; } /* updates info on annotation sets and annotations to consider */ public static void updateAnnotationSets(FeatureMap annotations, Hashtable table) throws Exception { /* annotations contains two keys 'AnnotationType' and 'AnnotationSet' */ String annSet; String annType; Set auxSet; if(annotations.containsKey("AnnotationSet")) { annSet=(String)annotations.get("AnnotationSet"); } else { annSet="Default"; } if(annotations.containsKey("AnnotationType")) { annType=(String)annotations.get("AnnotationType"); } else { throw new Exception("No 'AnnotationType' specified"); } if(table.containsKey(annSet)) { auxSet=(Set)table.get(annSet); } else { auxSet=new HashSet(); } auxSet.add(annType); table.put(annSet,auxSet); } /* creates the chart edges according to the information provided in the config files */ public void execute() throws ExecutionException { DocumentContent dc=document.getContent(); String stringContent=dc.toString(); PrintWriter out = null; try { InTempFile = File.createTempFile(InTempFileName, ""); OutTempFile = File.createTempFile(OutTempFileName, ""); //SemTempFile = File.createTempFile(SemTempFileName,""); out = new PrintWriter(new FileWriter(InTempFile)); } catch(IOException ioe) { throw new ExecutionException("Problems creating temporary files" + ioe.toString()); // ioe.printStackTrace(); // return; } // System.out.println("Annotation Sets and Types to be considered"); Iterator iteKey=annotationSetTable.keySet().iterator(); Set annotationTypes; String annotationSet; String annotationSetName; String annotationType; Iterator iteTypes; AnnotationSet auxSet; AnnotationSet all=document.getAnnotations(); AnnotationSet sentences=all.get("Sentence"); //AnnotationSets and types if(sentences==null || sentences.isEmpty()) { throw new ExecutionException("No sentences to parse"); } ArrayList sentList=new ArrayList(sentences); /* put them in order */ Collections.sort(sentList,new OffsetComparator()); /* now for each sentence */ Annotation sentence; Long startSent; Long endSent; ArrayList inSentenceList; Annotation auxAnnotation; Iterator iteAnnotation; AnnotationSet auxAnnotationSet; Set allInSentence; Annotation annotation; String type; FeatureMap fm; int index; Object[] pair; Chart[] charts=new Chart[sentList.size()]; for(int s=0;s<sentList.size();s++) { inSentenceList=new ArrayList(); sentence=(Annotation) sentList.get(s); startSent=sentence.getStartNode().getOffset(); endSent=sentence.getEndNode().getOffset(); /* create the empty chart */ charts[s]=new Chart(new Long(s+1),startSent,endSent,new SUPPLERecord("top",startSent,startSent,Factory.newFeatureMap())); /* for each annotation set get annotation types that span the offsets of the sentence */ iteKey=annotationSetTable.keySet().iterator(); while(iteKey.hasNext()) { annotationSet=(String)iteKey.next(); if(annotationSet.compareTo("Default")==0) { auxSet=document.getAnnotations(); } else { if (document.getNamedAnnotationSets().containsKey(annotationSet)) { auxSet = document.getAnnotations(annotationSet); if (auxSet == null || auxSet.isEmpty()) { throw new ExecutionException(annotationSet+" does not exist"); } } else { auxSet=null; } } annotationTypes=(Set) annotationSetTable.get(annotationSet); iteTypes=annotationTypes.iterator(); while(iteTypes.hasNext()) { annotationType=(String)iteTypes.next(); if(!(auxSet==null) && !auxSet.isEmpty()) { auxAnnotationSet=auxSet.get(annotationType, startSent,endSent); if(!(auxAnnotationSet==null) && !auxAnnotationSet.isEmpty()) { iteAnnotation = auxAnnotationSet.iterator(); while(iteAnnotation.hasNext()) { auxAnnotation =(Annotation)iteAnnotation.next(); pair=new Object[2]; pair[0]=annotationSet; pair[1]=auxAnnotation; inSentenceList.add(pair); } } } } } ArrayList workingList; Collections.sort(inSentenceList,pairComparator()); FeatureMap variables; Long annStart, annEnd; SUPPLERecord record; ArrayList buchartList=new ArrayList(); for(int a=0;a<inSentenceList.size();a++) { pair=(Object[])inSentenceList.get(a); annotationSetName=(String)pair[0]; annotation=(Annotation)pair[1]; annStart=annotation.getStartNode().getOffset(); annEnd=annotation.getEndNode().getOffset(); type=annotation.getType(); fm=annotation.getFeatures(); /** look for constraints **/ index=getIndex(annotationSetName,annotation,gateAnnotations,gateConstraints); if(index>=0) { /* instantiate variables and consider defaults for 'string' and 'text' */ FeatureMap fmvar=(FeatureMap)gateVariables.get(index); Iterator ite=fmvar.keySet().iterator(); String feature, var, val; variables=Factory.newFeatureMap(); while(ite.hasNext()) { feature=(String) ite.next(); var=(String)fmvar.get(feature); if(fm.containsKey(feature)) { val=(String)fm.get(feature); } else { if(feature.compareTo("text")==0) { val="body"; } else if(feature.compareTo("string")==0) { val=stringContent.substring(annStart.intValue(),annEnd.intValue()); } else { val = "_"; } } variables.put(var,val); } FeatureMap fmbuchartvars=(FeatureMap)buchartVariables.get(index); ite=fmbuchartvars.keySet().iterator(); /* create a feature map for the mapping */ FeatureMap buchartMap=Factory.newFeatureMap(); while(ite.hasNext()) { feature=(String) ite.next(); var=(String)fmbuchartvars.get(feature); if(variables.containsKey(var)) { buchartMap.put(feature,(String) variables.get(var)); } else { buchartMap.put(feature,"_"); } } buchartMap.putAll((FeatureMap)buchartConstraints.get(index)); record=new SUPPLERecord((String) buchartMap.get("category"),annStart,annEnd,buchartMap); buchartList.add(record); } else { // throw new ExecutionException("Restriction not found for annotation type " + type); } } /* sort the list of buchart categories by offset */ Collections.sort(buchartList,SUPPLERecord.SUPPLERecordComparator()); /* organise by offset and longest match */ if(getLongestMatch().booleanValue()) { /* keeps one element per valid start offset */ workingList=keepLongest(buchartList,priorityList); } else { workingList=buchartList; } /* list buchart format of each */ String cat; ArrayList outFeatures; for(int c=0;c<workingList.size();c++) { record=(SUPPLERecord) workingList.get(c); charts[s].setNext(record); cat=record.getCategory(); if(buchartCategories.containsKey(cat)) { outFeatures = (ArrayList) buchartCategories.get(cat); } else { throw new ExecutionException(cat + " without printable features!"); } } charts[s].setFinal(new SUPPLERecord("bottom",endSent,endSent,Factory.newFeatureMap())); out.println(charts[s].toSUPPLEFormat(buchartCategories)); out.flush(); } out.close(); /** Mark **/ prolog.parse(InTempFile,OutTempFile, debug.booleanValue()); /** Horacio **/ // callParser(InTempFile,OutTempFile,SemTempFile,"flag"); /* check instantiation of this resource */ SynSemTriple parserOutput=extractSynSem(); ArrayList synOut=parserOutput.getSyntax(); createSyntacticAnnotations(synOut); ArrayList list=parserOutput.getSemnatics(); ArrayList best=parserOutput.getBestParse(); SemOutput aux; Long semStart, semEnd; ArrayList semList; if(semanticsSetName != null && semanticsSetName.equals("")) semanticsSetName = null; AnnotationSet semAnnotationSet = (semanticsSetName == null) ? document.getAnnotations() : document.getAnnotations(semanticsSetName); FeatureMap semfm; for(int i=0;i<list.size();i++) { aux=(SemOutput) list.get(i); semStart=aux.getStart(); semEnd=aux.getEnd(); semList=aux.getSemantics(); semfm=Factory.newFeatureMap(); semfm.put("qlf",semList); try { semAnnotationSet.add(semStart,semEnd,"semantics",semfm); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } } BestParseOutput auxBest; String startBest,endBest; for(int i=0;i<best.size();i++) { auxBest=(BestParseOutput) best.get(i); startBest=auxBest.getStart(); endBest=auxBest.getEnd(); semfm=Factory.newFeatureMap(); semfm.put("best_parse",auxBest.getBestParse()); try { all.add(new Long(startBest),new Long(endBest),"parse",semfm); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } } if(!debug.booleanValue()) { InTempFile.delete(); OutTempFile.delete(); } } /* the list contains pairs (set name,annotation) and is sorted by annotation offset in ascending order */ public static ArrayList keepLongest(ArrayList list) { ArrayList working=new ArrayList(); Object[] pair; Object[] previous; Annotation preann; Annotation curann; long presize; long cursize; Long prestart, preend, start, end; int index=0; if(list.size()>0) { previous=(Object[])list.get(0); preann=(Annotation) previous[1]; prestart=preann.getStartNode().getOffset(); preend=preann.getEndNode().getOffset(); presize=preend.longValue()-prestart.longValue(); working.add(previous); for(int e=1;e<list.size();e++) { pair=(Object[]) list.get(e); curann=(Annotation)pair[1]; start=curann.getStartNode().getOffset(); end=curann.getEndNode().getOffset(); cursize=end.longValue()-start.longValue(); if(start.compareTo(prestart)==0) { if(cursize>presize) { working.remove(working.size()-1); working.add(pair); } } else if(start.compareTo(preend)>0) { working.add(pair); } previous=(Object[])working.get(working.size()-1); preann=(Annotation) previous[1]; prestart=preann.getStartNode().getOffset(); preend=preann.getEndNode().getOffset(); presize=preend.longValue()-prestart.longValue(); } } return working; } /* the list contains pairs (set name,annotation) and is sorted by annotation offset in ascending order */ public static ArrayList keepLongest(ArrayList list,Hashtable priorities) throws ExecutionException { ArrayList working=new ArrayList(); SUPPLERecord current; SUPPLERecord previous; long presize; long cursize; Long prestart, preend, start, end; int index=0; int prepriority; int curpriority; String auxCat; if(list.size()>0) { previous=(SUPPLERecord)list.get(0); auxCat=(String)previous.getCategory(); if(priorities.containsKey(auxCat)) { prepriority = ( (Integer) priorities.get(previous.getCategory())).intValue(); } else { throw new ExecutionException(auxCat + " not found in feature table"); } prestart=previous.getStart(); preend=previous.getEnd(); presize=preend.longValue()-prestart.longValue(); working.add(previous); for(int e=1;e<list.size();e++) { current=(SUPPLERecord) list.get(e); auxCat=(String)current.getCategory(); if(priorities.containsKey(auxCat)) { curpriority = ( (Integer) priorities.get(current.getCategory())).intValue(); } else { throw new ExecutionException(auxCat + " not found in feature table"); } start=current.getStart(); end=current.getEnd(); cursize=end.longValue()-start.longValue(); if(start.compareTo(prestart)==0) { if(cursize>presize) { working.remove(working.size()-1); working.add(current); } else if(cursize==presize) { if(curpriority<prepriority) { working.remove(working.size()-1); working.add(current); } } } else if(start.compareTo(preend)>=0) { working.add(current); } previous=(SUPPLERecord)working.get(working.size()-1); prepriority=((Integer) priorities.get(previous.getCategory())).intValue(); prestart=previous.getStart(); preend=previous.getEnd(); presize=preend.longValue()-prestart.longValue(); } } return working; } public static int getIndex(String annotationSet,Annotation annotation, ArrayList annotations, ArrayList constraints) { int index=-1; String type=annotation.getType(); FeatureMap fm=annotation.getFeatures(); // System.out.println(" >>" + annotationSet + "<<" ); FeatureMap annfm; FeatureMap consfm; String auxType; String auxSet; for(int i=0;i<annotations.size();i++) { annfm=(FeatureMap)annotations.get(i); consfm=(FeatureMap)constraints.get(i); auxType=(String)annfm.get("AnnotationType"); if(annfm.containsKey("AnnotationSet")) { auxSet = (String) annfm.get("AnnotationSet"); } else { auxSet = "Default"; } if(auxType.compareTo(type)==0 && auxSet.compareTo(annotationSet)==0) { if(fm.subsumes(consfm)) { return i; } } } return index; } public static Comparator pairComparator() { Comparator comp=new Comparator() { public int compare(Object o1, Object o2) { Object[] a1 = (Object[]) o1; Object[] a2 = (Object[]) o2; Annotation an1 = (Annotation) a1[1]; Annotation an2 = (Annotation) a2[1]; return an1.getStartNode().getOffset().compareTo(an2.getStartNode().getOffset()); } }; return comp; } public static void processGateLine(String line, FeatureMap annotations, FeatureMap constraints, FeatureMap variables) { /* attribute value pattern */ Pattern attValPat=Pattern.compile("(.*)=(.*)"); /* read feature table */ StringTokenizer tokenizer; String attVal; String attribute; String value; tokenizer=new StringTokenizer(line,ATT_DELIMITER); /* consume firet element */ tokenizer.nextElement(); while(tokenizer.hasMoreElements()) { attVal=(String)tokenizer.nextToken(); Matcher m=attValPat.matcher(attVal); if(m.matches()) { attribute=m.group(1); value=m.group(2); if(attribute.compareTo("AnnotationSet")==0 || attribute.compareTo("AnnotationType")==0) { annotations.put(attribute,value); } else { /* variable or value */ if (value.indexOf("&") == 0) { variables.put(attribute, value); } else { constraints.put(attribute, value); } } } } } public static void processLine(String line, FeatureMap constraints, FeatureMap variables) { /* attribute value pattern */ Pattern attValPat=Pattern.compile("(.*)=(.*)"); /* read feature table */ StringTokenizer tokenizer; String attVal; String attribute; String value; tokenizer=new StringTokenizer(line,ATT_DELIMITER); /* consume firet element */ tokenizer.nextElement(); while(tokenizer.hasMoreElements()) { attVal=(String)tokenizer.nextToken(); Matcher m=attValPat.matcher(attVal); if(m.matches()) { attribute=m.group(1); value=m.group(2); /* variable or value */ if(value.indexOf("&")==0) { variables.put(attribute,value); } else { constraints.put(attribute,value); } } } } public SynSemTriple extractSynSem() { DocumentContent dc=document.getContent(); /* stack for the best parse */ SynSemTriple outTriple; String start, end, category, constituents; Pattern offsets=Pattern.compile("semantics (\\d+) (\\d+)"); Matcher match; ArrayList list=new ArrayList(); ArrayList outList=new ArrayList(); ArrayList bestParse=new ArrayList(); String startSem, endSem; startSem="null"; endSem="null"; String startBest, endBest; startBest=null; endBest=null; SemOutput singleSem=null; ArrayList output = new ArrayList(); ArrayList bestParseOut= new ArrayList(); String SYN="syntax"; StringTokenizer tokeniser; boolean first=true; int level=0; if(OutTempFile.isFile()) { try { String TempName = OutTempFile.getAbsolutePath(); BufferedReader in = new BufferedReader(new FileReader(TempName)); String line; while((line = in.readLine()) != null) { tokeniser = new StringTokenizer(line," "); if(tokeniser.hasMoreTokens()) { String test = tokeniser.nextToken(); if(test.compareTo(SYN)==0) { if(first) { first=false;level=0; } else { level++; } /* next two are offsets */ start = tokeniser.nextToken(); end = tokeniser.nextToken(); if(first) { startBest=start; endBest=end;} category = tokeniser.nextToken(); /* skip one */ tokeniser.nextToken(); /* constituents */ constituents = tokeniser.nextToken(); output.add(new SynOutput(start,end,category,constituents,level)); bestParse.add(new SynOutput(start,end,category,constituents,level)); } else { match=offsets.matcher(line); if(match.matches()) { SynOutput aux; ArrayList stack=new ArrayList(); int consti; String categ; String text; for(int b=bestParse.size()-1;b>=0;b--) { aux=(SynOutput)bestParse.get(b); categ=aux.getCategory(); consti=(new Integer(aux.getConstituens())).intValue(); if(consti==0) { /* to the stack */ try { stack.add(0, "( " + categ + " \"" + dc.getContent(new Long(aux.getStart()), new Long(aux.getEnd())) + "\"" + " )"); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } // dc.getContent(new Long(aux.getStart()),new Long(aux.getEnd())); } else { /* create (cat (c1) (c2) .... (cn)) and put it into the stack */ String element=""; for(int c=0;c<consti;c++) { element=element + " " + (String) stack.get(0); stack.remove(0); } element="( " +categ+ " " + element + " )"; stack.add(0,element); } // System.out.println(aux.getCategory() + " " + aux.getConstituens()); } bestParse=new ArrayList(); /* if(first) { first=false; } else { singleSem.setSemantics(list); outList.add(singleSem); } */ list=new ArrayList(); singleSem=new SemOutput(); startSem=match.group(1); endSem=match.group(2); singleSem.setStart(new Long(startSem)); singleSem.setEnd(new Long(endSem)); bestParseOut.add(new BestParseOutput(startSem,endSem,(String)stack.get(0))); /* loop on each term */ boolean inSem=true; while(inSem && (line = in.readLine()) != null) { if(line.length()!=0 && !line.startsWith(" ") && line!="" && !line.startsWith(SYN)) { list.add(line); } else { inSem=false; } } singleSem.setSemantics(list); outList.add(singleSem); } /* re start with syntax */ first=true; } } } } catch(IOException ioe) { ioe.printStackTrace(); } } outTriple=new SynSemTriple(output,outList,bestParseOut); return outTriple; } public ArrayList readSynFile() { String start, end, category, constituents; ArrayList output = new ArrayList(); String SYN="syntax"; StringTokenizer tokeniser; boolean first=true; int level=0; if(OutTempFile.isFile()) { // System.out.println("Trying to read buchart output file!"); try { String TempName = OutTempFile.getAbsolutePath(); BufferedReader in = new BufferedReader(new FileReader(TempName)); String line; while((line = in.readLine()) != null) { // System.out.println(line); tokeniser = new StringTokenizer(line," "); if(tokeniser.hasMoreTokens()) { String test = tokeniser.nextToken(); if(test.compareTo(SYN)==0) { if(first) { first=false;level=0; } else { level++; } /* next two are offsets */ start = tokeniser.nextToken(); end = tokeniser.nextToken(); category = tokeniser.nextToken(); /* skip one */ tokeniser.nextToken(); /* constituents */ constituents = tokeniser.nextToken(); output.add(new SynOutput(start,end,category,constituents,level)); } else { first=true; } } } } catch(IOException ioe) { ioe.printStackTrace(); } } return output; } public static Integer SynTreeBack(ArrayList constituents,AnnotationSet docAnnotations,Integer yourFather) { // System.out.println("Syntactic tree back..."); ArrayList father; Integer son; Integer id=null; STreeNode node=null; FeatureMap fm; ArrayList components = new ArrayList(); if(constituents.size()==0) System.out.println("Error!!!"); SynOutput next = (SynOutput) constituents.get(0); String category=next.getCategory(); Integer NRO=new Integer(next.getConstituens()); int nro=NRO.intValue(); String cat=next.getCategory(); Long start = new Long(next.getStart()); Long end = new Long(next.getEnd()); constituents.remove(0); /* for each component, construct a tree recursively */ if(nro>0) { fm=Factory.newFeatureMap(); try { id=docAnnotations.add(start,end,"SyntaxTreeNode",fm); node= new STreeNode(docAnnotations.get(id)); node.setAllowsChildren(true); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } for(int i=0; i<nro; i++) { son=SynTreeBack(constituents,docAnnotations,id); // node.add(son); components.add(son); } fm.put("consists",components); fm.put("cat",category); father=new ArrayList(); if(!(yourFather.compareTo(new Integer(0))==0)) { father.add(yourFather); } else { /* root */ } fm.put("father",father); } else { AnnotationSet auxTokens=docAnnotations.get("Token").get(start,end); if(auxTokens.size()>1) { //if(category.compareTo("sem_cat")==0) { /* is a semantic category, simulate a 'tree' structure */ ArrayList neComponents=new ArrayList(); /* get all tokens spanning the start and end */ ArrayList auxList=new ArrayList(auxTokens); Annotation auxToken; String tokenCat; FeatureMap tokenfm; Long startToken,endToken; Integer id1; fm=Factory.newFeatureMap(); father=new ArrayList(); if(!(yourFather.compareTo(new Integer(0))==0)) { father.add(yourFather); } else { /* root */ } fm.put("father",father); try { id=docAnnotations.add(start,end,"SyntaxTreeNode",fm); node = new STreeNode(docAnnotations.get(id)); node.setAllowsChildren(true); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } STreeNode node1; for(int h=0;h<auxList.size();h++) { auxToken=(Annotation) auxList.get(h); tokenfm=auxToken.getFeatures(); startToken=auxToken.getStartNode().getOffset(); endToken=auxToken.getEndNode().getOffset(); FeatureMap fm1=Factory.newFeatureMap(); if(tokenfm.containsKey("category")) { tokenCat=(String) tokenfm.get("category"); tokenCat=tokenCat.toLowerCase(); } else { tokenCat=(String) tokenfm.get("string"); } father=new ArrayList(); father.add(id); fm1.put("consists",new ArrayList()); fm1.put("cat",tokenCat); fm1.put("father",father); try { id1=docAnnotations.add(startToken,endToken,"SyntaxTreeNode",fm1); //System.out.println(id1); node1 = new STreeNode(docAnnotations.get(id1)); node1.setAllowsChildren(false); neComponents.add(id1); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } } fm.put("consists",neComponents); fm.put("cat",category); } else { /* is a token */ fm=Factory.newFeatureMap(); fm.put("consists",new ArrayList()); fm.put("cat",category); father=new ArrayList(); if(!(yourFather.compareTo(new Integer(0))==0)) { father.add(yourFather); } else { /* root */ } fm.put("father",father); try { id=docAnnotations.add(start,end,"SyntaxTreeNode",fm); node = new STreeNode(docAnnotations.get(id)); node.setAllowsChildren(false); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } } } return id; } public ArrayList ExtractQLF() { Pattern offsets=Pattern.compile("semantics (\\d+) (\\d+)"); Matcher match; ArrayList list=new ArrayList(); ArrayList outList=new ArrayList(); String start, end; start="null"; end="null"; SemOutput singleSem=null; if(SemTempFile.isFile()) { try { String TempName = SemTempFile.getAbsolutePath(); BufferedReader in = new BufferedReader(new FileReader(TempName)); String line; boolean first=true; while ( (line = in.readLine()) != null) { match=offsets.matcher(line); if(match.matches()) { if(first) { first=false; } else { singleSem.setSemantics(list); outList.add(singleSem); } list=new ArrayList(); singleSem=new SemOutput(); start=match.group(1); end=match.group(2); singleSem.setStart(new Long(start)); singleSem.setEnd(new Long(end)); } else { list.add(line); } } singleSem.setSemantics(list); outList.add(singleSem); } catch (IOException ioe) { ioe.printStackTrace(); } } else { System.out.println("Can't read semantic output!!!"); } return outList; } public void createSyntacticAnnotations() { AnnotationSet theAnnotationSet=document.getAnnotations(); AnnotationSet theTokens=theAnnotationSet.get("Token"); ArrayList tokens=new ArrayList(theTokens); Collections.sort(tokens,new OffsetComparator()); ArrayList synOut = new ArrayList(); ArrayList synOut1; try { synOut= readSynFile(); } catch(Exception e) { e.printStackTrace(); } synOut1=new ArrayList(synOut); while(synOut.size()>0) { // System.out.println("creating the annotations"); SynTreeBack(synOut,theAnnotationSet,new Integer(0)); } /* for tokens without STreeNode we should create one */ Annotation auxToken; Long tokenStart,tokenEnd; AnnotationSet auxSynSet; FeatureMap auxfm; String tokenCat; for(int i=0;i<tokens.size();i++) { auxToken=(Annotation) tokens.get(i); auxfm=auxToken.getFeatures(); tokenCat=auxfm.get("category").toString(); FeatureMap fm_token=Factory.newFeatureMap(); fm_token.put("father",new ArrayList()); fm_token.put("consists",new ArrayList()); fm_token.put("cat",tokenCat); tokenStart=auxToken.getStartNode().getOffset(); tokenEnd =auxToken.getEndNode().getOffset(); auxSynSet=theAnnotationSet.get("SyntaxTreeNode",tokenStart,tokenEnd); if(auxSynSet==null || auxSynSet.isEmpty()) { // System.out.println("Token in " + tokenStart + " - " + tokenEnd + " without SyntaxTreeNode..."); try { theAnnotationSet.add(tokenStart,tokenEnd,"SyntaxTreeNode",fm_token); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } } } // AnnotationSet syntax = document.getAnnotations(getSyntaxSetName()); Iterator synIte = synOut1.iterator(); SynOutput auxSyn; FeatureMap fm; while(synIte.hasNext()) { // System.out.println("Creating components in 'Syntax' set"); auxSyn = (SynOutput) synIte.next(); fm = new SimpleFeatureMapImpl(); fm.put("constituents",auxSyn.getConstituens()); fm.put("level",new Integer(auxSyn.getLevel())); try { theAnnotationSet.add(new Long(auxSyn.getStart()), new Long(auxSyn.getEnd()),auxSyn.getCategory(),fm); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } } } public void createSyntacticAnnotations(ArrayList synOut) { AnnotationSet theAnnotationSet=document.getAnnotations(); AnnotationSet theTokens=theAnnotationSet.get("Token"); ArrayList tokens=new ArrayList(theTokens); Collections.sort(tokens,new OffsetComparator()); ArrayList synOut1; synOut1=new ArrayList(synOut); while(synOut.size()>0) { // System.out.println("creating the annotations"); SynTreeBack(synOut,theAnnotationSet,new Integer(0)); } /* for tokens without STreeNode we should create one */ Annotation auxToken; Long tokenStart,tokenEnd; AnnotationSet auxSynSet; FeatureMap auxfm; String tokenCat; for(int i=0;i<tokens.size();i++) { auxToken=(Annotation) tokens.get(i); auxfm=auxToken.getFeatures(); tokenCat=auxfm.get("category").toString(); FeatureMap fm_token=Factory.newFeatureMap(); fm_token.put("father",new ArrayList()); fm_token.put("consists",new ArrayList()); fm_token.put("cat",tokenCat); tokenStart=auxToken.getStartNode().getOffset(); tokenEnd =auxToken.getEndNode().getOffset(); auxSynSet=theAnnotationSet.get("SyntaxTreeNode",tokenStart,tokenEnd); if(auxSynSet==null || auxSynSet.isEmpty()) { // System.out.println("Token in " + tokenStart + " - " + tokenEnd + " without SyntaxTreeNode..."); try { theAnnotationSet.add(tokenStart,tokenEnd,"SyntaxTreeNode",fm_token); } catch(InvalidOffsetException ioe) { ioe.printStackTrace(); } } } } }