GATE.ac.uk - releases/gate-5.1-beta2-build3402-ALL/plugins/Copy_Annots_Between_Docs/src/gate/copyAS2AnoDoc/CopyAS2AnoDocMain.java

/*
 *  CopyAS2AnoDocMain.java
 * 
 *  Yaoyong Li 08/10/2007
 *
 *  $Id: CopyAS2AnoDocMain.java, v 1.0 2009-05-10 11:44:16 +0000 yaoyong $
 */
package gate.copyAS2AnoDoc;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.Factory;
import gate.ProcessingResource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.ExtensionFileFilter;
import gate.util.InvalidOffsetException;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;

public class CopyAS2AnoDocMain extends AbstractLanguageAnalyser implements
ProcessingResource {

  URL sourceFilesURL = null;
  private String inputASName;
  private String outputASName;
  private List annotationTypes;
  

  /** Initialise this resource, and return it. */
  public gate.Resource init() throws ResourceInstantiationException {
    return this;
  } // init()

  /**
   * Run the resource.
   * 
   * @throws ExecutionException
   */
  File[] xmlFiles = null;
  boolean [] fileNotUsedYet = null;
  public void execute() throws ExecutionException {
    
    //  now we need to see if the corpus is provided
    if(corpus == null)
      throw new ExecutionException("Provided corpus is null!");

    if(corpus.size() == 0)
      throw new ExecutionException("No Document found in corpus!");
    
    int positionDoc = corpus.indexOf(document);

    // first document in the corpus
    
    if(positionDoc == 0) {
      System.out.println("\n\n------------ new session starts ------------\n");
      System.out.println("Copy the Annotation Set "+inputASName +" from the files in"
        +sourceFilesURL.getPath() + " to the files in the corpus as AS "+ outputASName);
      
      //collect all the file names in the source dir
      ExtensionFileFilter fileFilter = null;
      xmlFiles = new File(this.sourceFilesURL.getPath())
        .listFiles(fileFilter);
      Arrays.sort(xmlFiles, new Comparator<File>() {
        public int compare(File a, File b) {
          return a.getName().compareTo(b.getName());
        }
      });
      fileNotUsedYet = new boolean[xmlFiles.length];
      for(int i=0; i<fileNotUsedYet.length; ++i)
        fileNotUsedYet[i] = true;
    }
    
    //for current document in the corpus, find the corresponding document in the source dir
    int filePos = findCorresFile(document.getName(), xmlFiles, fileNotUsedYet);
    if(filePos<0) {
      System.out.println("Cannot find a corresponding file in the source dir for"
      + " the current document "+document.getName());
      return;
    } else {
      fileNotUsedYet[filePos] = false;
    }
    
    //load the corresponding file to GATE
    Document docCorres;
    try {
      docCorres = Factory.newDocument(xmlFiles[filePos].toURI().toURL(), "UTF-8");
      AnnotationSet sourceAS = null;
      if(inputASName == null || inputASName.length()==0)
        sourceAS = docCorres.getAnnotations();
      else 
        sourceAS = docCorres.getAnnotations(inputASName);
      AnnotationSet asToCopy = null;
      if (annotationTypes != null && annotationTypes.size() > 0) {
        //String [] annTypes = annotationTypes.split(";");
        asToCopy = sourceAS.get(new HashSet(annotationTypes));
        //asToCopy = sourceAS.get();
      } else {
        // transfer everything
        asToCopy = sourceAS.get();
      }
      System.out.println("Copying from "+xmlFiles[filePos].getName() +" to "+ 
        document.getName());
      
      //get the target annotation set
      AnnotationSet targetAS = null;
      if(outputASName == null || outputASName.length()==0)
        targetAS = document.getAnnotations();
      else
        targetAS = document.getAnnotations(outputASName);
      
      //copy the annotations from source file to target file
      for(Object obj:asToCopy) {
        Annotation oneAnn = (Annotation)obj;
        targetAS.add(oneAnn.getStartNode().getOffset(), oneAnn.getEndNode().getOffset(),
          oneAnn.getType(), oneAnn.getFeatures());
      }
      
      Factory.deleteResource(docCorres);

    }
    catch(ResourceInstantiationException e) {
      e.printStackTrace();
    }
    catch(MalformedURLException e) {
      e.printStackTrace();
    }
    catch(InvalidOffsetException e) {
      e.printStackTrace();
    }
   
    
  }
  
  private int findCorresFile(String fileName, File [] xmlFiles, boolean [] notUsedYet) {
    int fpos =-1;
    int sameLenMax = 0;
    char [] fnChars = fileName.toCharArray();
    for(int i=0; i<xmlFiles.length; ++i) {
      //System.out.println(i+", "+xmlFiles[i].getName()+", notusedYet="+notUsedYet[i]+".");
      
      if(!notUsedYet[i]) continue;
      char [] fnXmlFChars = xmlFiles[i].getName().toCharArray();
      int lenStr = fnChars.length;
      if(lenStr>fnXmlFChars.length)
        lenStr = fnXmlFChars.length;
      int sameLen = lenStr;
      for(int j=0; j<lenStr; ++j)
        if(fnChars[j] != fnXmlFChars[j]) {
          sameLen = j;
          break;
        }
      if(sameLenMax <sameLen) {
        fpos = i;
        sameLenMax = sameLen;
      }
    }
    //System.out.println("source file="+xmlFiles[fpos].getName()+", target="+fileName);
    
    return fpos;
  }

  public void setInputASName(String iasn) {
    this.inputASName = iasn;
  }

  public String getInputASName() {
    return this.inputASName;
  }

  public void setOutputASName(String iasn) {
    this.outputASName = iasn;
  }

  public String getOutputASName() {
    return this.outputASName;
  }

  public void setSourceFilesURL(URL modelU) {
    this.sourceFilesURL = modelU;
  }

  public URL getSourceFilesURL() {
    return this.sourceFilesURL;
  }
  
  public List getAnnotationTypes() {
    return this.annotationTypes;
  }

  public void setAnnotationTypes(List newTypes) {
    annotationTypes = newTypes;
  }

}