/* * CopyAS2AnoDocMain.java * * Yaoyong Li 08/10/2007 * * $Id: CopyAS2AnoDocMain.java, v 1.0 2009-05-10 11:44:16 +0000 yaoyong $ */ package gate.copyAS2AnoDoc; import gate.Annotation; import gate.AnnotationSet; import gate.Document; import gate.Factory; import gate.ProcessingResource; import gate.creole.AbstractLanguageAnalyser; import gate.creole.ExecutionException; import gate.creole.ResourceInstantiationException; import gate.util.ExtensionFileFilter; import gate.util.InvalidOffsetException; import java.io.File; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; import java.util.Comparator; import java.util.HashSet; import java.util.List; public class CopyAS2AnoDocMain extends AbstractLanguageAnalyser implements ProcessingResource { URL sourceFilesURL = null; private String inputASName; private String outputASName; private List annotationTypes; /** Initialise this resource, and return it. */ public gate.Resource init() throws ResourceInstantiationException { return this; } // init() /** * Run the resource. * * @throws ExecutionException */ File[] xmlFiles = null; boolean [] fileNotUsedYet = null; public void execute() throws ExecutionException { // now we need to see if the corpus is provided if(corpus == null) throw new ExecutionException("Provided corpus is null!"); if(corpus.size() == 0) throw new ExecutionException("No Document found in corpus!"); int positionDoc = corpus.indexOf(document); // first document in the corpus if(positionDoc == 0) { System.out.println("\n\n------------ new session starts ------------\n"); System.out.println("Copy the Annotation Set "+inputASName +" from the files in" +sourceFilesURL.getPath() + " to the files in the corpus as AS "+ outputASName); //collect all the file names in the source dir ExtensionFileFilter fileFilter = null; xmlFiles = new File(this.sourceFilesURL.getPath()) .listFiles(fileFilter); Arrays.sort(xmlFiles, new Comparator<File>() { public int compare(File a, File b) { return a.getName().compareTo(b.getName()); } }); fileNotUsedYet = new boolean[xmlFiles.length]; for(int i=0; i<fileNotUsedYet.length; ++i) fileNotUsedYet[i] = true; } //for current document in the corpus, find the corresponding document in the source dir int filePos = findCorresFile(document.getName(), xmlFiles, fileNotUsedYet); if(filePos<0) { System.out.println("Cannot find a corresponding file in the source dir for" + " the current document "+document.getName()); return; } else { fileNotUsedYet[filePos] = false; } //load the corresponding file to GATE Document docCorres; try { docCorres = Factory.newDocument(xmlFiles[filePos].toURI().toURL(), "UTF-8"); AnnotationSet sourceAS = null; if(inputASName == null || inputASName.length()==0) sourceAS = docCorres.getAnnotations(); else sourceAS = docCorres.getAnnotations(inputASName); AnnotationSet asToCopy = null; if (annotationTypes != null && annotationTypes.size() > 0) { //String [] annTypes = annotationTypes.split(";"); asToCopy = sourceAS.get(new HashSet(annotationTypes)); //asToCopy = sourceAS.get(); } else { // transfer everything asToCopy = sourceAS.get(); } System.out.println("Copying from "+xmlFiles[filePos].getName() +" to "+ document.getName()); //get the target annotation set AnnotationSet targetAS = null; if(outputASName == null || outputASName.length()==0) targetAS = document.getAnnotations(); else targetAS = document.getAnnotations(outputASName); //copy the annotations from source file to target file for(Object obj:asToCopy) { Annotation oneAnn = (Annotation)obj; targetAS.add(oneAnn.getStartNode().getOffset(), oneAnn.getEndNode().getOffset(), oneAnn.getType(), oneAnn.getFeatures()); } Factory.deleteResource(docCorres); } catch(ResourceInstantiationException e) { e.printStackTrace(); } catch(MalformedURLException e) { e.printStackTrace(); } catch(InvalidOffsetException e) { e.printStackTrace(); } } private int findCorresFile(String fileName, File [] xmlFiles, boolean [] notUsedYet) { int fpos =-1; int sameLenMax = 0; char [] fnChars = fileName.toCharArray(); for(int i=0; i<xmlFiles.length; ++i) { //System.out.println(i+", "+xmlFiles[i].getName()+", notusedYet="+notUsedYet[i]+"."); if(!notUsedYet[i]) continue; char [] fnXmlFChars = xmlFiles[i].getName().toCharArray(); int lenStr = fnChars.length; if(lenStr>fnXmlFChars.length) lenStr = fnXmlFChars.length; int sameLen = lenStr; for(int j=0; j<lenStr; ++j) if(fnChars[j] != fnXmlFChars[j]) { sameLen = j; break; } if(sameLenMax <sameLen) { fpos = i; sameLenMax = sameLen; } } //System.out.println("source file="+xmlFiles[fpos].getName()+", target="+fileName); return fpos; } public void setInputASName(String iasn) { this.inputASName = iasn; } public String getInputASName() { return this.inputASName; } public void setOutputASName(String iasn) { this.outputASName = iasn; } public String getOutputASName() { return this.outputASName; } public void setSourceFilesURL(URL modelU) { this.sourceFilesURL = modelU; } public URL getSourceFilesURL() { return this.sourceFilesURL; } public List getAnnotationTypes() { return this.annotationTypes; } public void setAnnotationTypes(List newTypes) { annotationTypes = newTypes; } }