GATE.ac.uk - releases/gate-5.1-beta2-build3402-ALL/plugins/Alignment/src/gate/compound/impl/AbstractCompoundDocument.java

package gate.compound.impl;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import gate.AnnotationSet;
import gate.DataStore;
import gate.Document;
import gate.DocumentContent;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.Resource;
import gate.alignment.Alignment;
import gate.annotation.AnnotationSetImpl;
import gate.compound.CompoundDocument;
import gate.corpora.DocumentContentImpl;
import gate.corpora.DocumentImpl;
import gate.creole.ResourceInstantiationException;
import gate.event.CreoleEvent;
import gate.event.DatastoreEvent;
import gate.event.DocumentListener;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.SimpleFeatureMapImpl;
import gate.util.Strings;

/**
 * This is an abstract implementation of the AbstractAlignedDocument
 * This class overrides the methods of DocumentImpl and provide generic
 * implementation of some of the methods of AlignedDocument
 * 
 * @author niraj
 */
public abstract class AbstractCompoundDocument extends DocumentImpl implements
                                                                   CompoundDocument {

  /** The encoding of the source of the document content */
  protected String encoding = null;

  /**
   * If you set this flag to true the repositioning information for the
   * document will be kept in the document feature. <br>
   * Default value is false to avoid the unnecessary waste of time and
   * memory
   */
  protected Boolean collectRepositioningInfo = new Boolean(false);

  /**
   * If you set this flag to true the original content of the document
   * will be kept in the document feature. <br>
   * Default value is false to avoid the unnecessary waste of memory
   */
  protected Boolean preserveOriginalContent = new Boolean(false);

  /**
   * Languages
   */
  protected List<String> documentIDs;

  /** The source URL */
  protected URL sourceUrl;

  /**
   * Curent Document
   */
  protected Document currentDocument;

  /**
   * Available documents
   */
  protected Map<String, Document> documents;

  /** Freeze the serialization UID. */
  static final long serialVersionUID = -8456893608311510260L;

  private transient Vector<DocumentListener> documentListeners;

  /**
   * The start of the range that the content comes from at the source
   * URL (or null if none).
   */
  protected Long sourceUrlStartOffset;

  /**
   * The end of the range that the content comes from at the source URL
   * (or null if none).
   */
  protected Long sourceUrlEndOffset;

  /** Is the document markup-aware? */
  protected Boolean markupAware = new Boolean(false);

  /** Clear all the data members of the object. */
  public void cleanup() {
    Iterator<Document> iter = documents.values().iterator();
    while(iter.hasNext()) {
      Document doc = iter.next();
      doc.cleanup();
    }
  } // cleanup()

  /** Cover unpredictable Features creation */
  public FeatureMap getFeatures() {
    if(currentDocument == null) {
      if(features == null) {
        features = new SimpleFeatureMapImpl();
      }
      return features;
    }
    else {
      return currentDocument.getFeatures();
    }
  }

  /** Documents are identified by URLs */
  public URL getSourceUrl() {
    if(currentDocument == null) {
      return sourceUrl;
    }
    return currentDocument.getSourceUrl();
  }

  /** Set method for the document's URL */
  public void setSourceUrl(URL sourceUrl) {
    if(currentDocument == null) {
      this.sourceUrl = sourceUrl;
    }
    else {
      currentDocument.setSourceUrl(sourceUrl);
    }
  } // setSourceUrl

  /**
   * Documents may be packed within files; in this case an optional pair
   * of offsets refer to the location of the document.
   */
  public Long[] getSourceUrlOffsets() {
    if(currentDocument == null) {
      return new Long[] {sourceUrlStartOffset, sourceUrlEndOffset};
    }
    return currentDocument.getSourceUrlOffsets();
  } // getSourceUrlOffsets

  /**
   * Allow/disallow preserving of the original document content. If is
   * <B>true</B> the original content will be retrieved from the
   * DocumentContent object and preserved as document feature.
   */
  public void setPreserveOriginalContent(Boolean b) {
    if(currentDocument == null) {
      this.preserveOriginalContent = b;
    }
    else {
      currentDocument.setPreserveOriginalContent(b);
    }
  } // setPreserveOriginalContent

  /**
   * Get the preserving of content status of the Document.
   * 
   * @return whether the Document should preserve it's original content.
   */
  public Boolean getPreserveOriginalContent() {
    if(currentDocument == null) {
      return preserveOriginalContent;
    }
    else {
      return currentDocument.getPreserveOriginalContent();
    }
  } // getPreserveOriginalContent

  /**
   * Allow/disallow collecting of repositioning information. If is
   * <B>true</B> information will be retrieved and preserved as
   * document feature.<BR>
   * Preserving of repositioning information give the possibilities for
   * converting of coordinates between the original document content and
   * extracted from the document text.
   */
  public void setCollectRepositioningInfo(Boolean b) {
    if(currentDocument == null) {
      collectRepositioningInfo = b;
    }
    else {
      currentDocument.setCollectRepositioningInfo(b);
    }
  } // setCollectRepositioningInfo

  /**
   * Get the collectiong and preserving of repositioning information for
   * the Document. <BR>
   * Preserving of repositioning information give the possibilities for
   * converting of coordinates between the original document content and
   * extracted from the document text.
   * 
   * @return whether the Document should collect and preserve
   *         information.
   */
  public Boolean getCollectRepositioningInfo() {
    if(currentDocument == null) {
      return collectRepositioningInfo;
    }
    else {
      return currentDocument.getCollectRepositioningInfo();
    }
  } // getCollectRepositioningInfo

  /**
   * Documents may be packed within files; in this case an optional pair
   * of offsets refer to the location of the document. This method gets
   * the start offset.
   */
  public Long getSourceUrlStartOffset() {
    if(currentDocument == null) {
      return sourceUrlStartOffset;
    }
    else {
      return currentDocument.getSourceUrlStartOffset();
    }
  }

  /**
   * Documents may be packed within files; in this case an optional pair
   * of offsets refer to the location of the document. This method sets
   * the start offset.
   */
  public void setSourceUrlStartOffset(Long sourceUrlStartOffset) {
    if(currentDocument == null) {
      this.sourceUrlStartOffset = sourceUrlStartOffset;
    }
    else {
      currentDocument.setSourceUrlStartOffset(sourceUrlStartOffset);
    }

  } // setSourceUrlStartOffset

  /**
   * Documents may be packed within files; in this case an optional pair
   * of offsets refer to the location of the document. This method gets
   * the end offset.
   */
  public Long getSourceUrlEndOffset() {
    if(currentDocument == null) {
      return sourceUrlEndOffset;
    }
    else {
      return currentDocument.getSourceUrlEndOffset();
    }
  }

  /**
   * Documents may be packed within files; in this case an optional pair
   * of offsets refer to the location of the document. This method sets
   * the end offset.
   */
  public void setSourceUrlEndOffset(Long sourceUrlEndOffset) {
    if(currentDocument == null) {
      this.sourceUrlEndOffset = sourceUrlEndOffset;
    }
    else {
      currentDocument.setSourceUrlEndOffset(sourceUrlEndOffset);
    }
  } // setSourceUrlStartOffset

  /**
   * The content of the document: a String for text; MPEG for video;
   * etc.
   */
  public DocumentContent getContent() {
    if(currentDocument == null) {
      // throw new RuntimeException(
      // "CompoundDocumentImpl does not contain any text but its member
      // does!"
      // + " Please use the setDocument(String documentID) to set a
      // specific document!");
      return new DocumentContentImpl("");
    }
    else {
      return currentDocument.getContent();
    }
  }

  /** Set method for the document content */
  public void setContent(DocumentContent content) {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not have any content but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
    }
    else {
      currentDocument.setContent(content);
    }
  }

  /** Get the encoding of the document content source */
  public String getEncoding() {
    if(currentDocument == null) {
      return this.encoding;
    }
    else {
      return ((DocumentImpl)currentDocument).getEncoding();
    }
  }

  /** Set the encoding of the document content source */
  public void setEncoding(String encoding) {
    if(currentDocument == null) {
      this.encoding = encoding;
    }
    else {
      ((DocumentImpl)currentDocument).setEncoding(encoding);
    }
  }

  /**
   * Get the default set of annotations. The set is created if it
   * doesn't exist yet.
   */
  public AnnotationSet getAnnotations() {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not have any annotationSet but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
      return new AnnotationSetImpl(this);
    }
    else {
      return currentDocument.getAnnotations();
    }
  } // getAnnotations()

  /**
   * Get a named set of annotations. Creates a new set if one with this
   * name doesn't exist yet. If the provided name is null then it
   * returns the default annotation set.
   */
  public AnnotationSet getAnnotations(String name) {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not have any annotationSet but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
      return new AnnotationSetImpl(this);
    }
    else {
      return currentDocument.getAnnotations(name);
    }
  } // getAnnotations(name)

  /**
   * Make the document markup-aware. This will trigger the creation of a
   * DocumentFormat object at Document initialisation time; the
   * DocumentFormat object will unpack the markup in the Document and
   * add it as annotations. Documents are <B>not</B> markup-aware by
   * default.
   * 
   * @param newMarkupAware markup awareness status.
   */
  public void setMarkupAware(Boolean newMarkupAware) {
    if(currentDocument == null) {
      this.markupAware = newMarkupAware;
    }
    else {
      currentDocument.setMarkupAware(newMarkupAware);
    }
  }

  /**
   * Get the markup awareness status of the Document. <B>Documents are
   * markup-aware by default.</B>
   * 
   * @return whether the Document is markup aware.
   */
  public Boolean getMarkupAware() {
    if(currentDocument == null) {
      return this.markupAware;
    }
    else {
      return currentDocument.getMarkupAware();
    }
  }

  /**
   * Returns an XML document aming to preserve the original markups( the
   * original markup will be in the same place and format as it was
   * before processing the document) and include (if possible) the
   * annotations specified in the aSourceAnnotationSet. It is equivalent
   * to toXml(aSourceAnnotationSet, true).
   */
  public String toXml(Set aSourceAnnotationSet) {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not implement toXml(Set) but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
      return currentDocument.toXml(new AnnotationSetImpl(this));
    }
    else {
      return currentDocument.toXml(aSourceAnnotationSet);
    }
  }

  /**
   * Returns an XML document aming to preserve the original markups( the
   * original markup will be in the same place and format as it was
   * before processing the document) and include (if possible) the
   * annotations specified in the aSourceAnnotationSet. <b>Warning:</b>
   * Annotations from the aSourceAnnotationSet will be lost if they will
   * cause a crosed over situation.
   * 
   * @param aSourceAnnotationSet is an annotation set containing all the
   *          annotations that will be combined with the original marup
   *          set. If the param is <code>null</code> it will only dump
   *          the original markups.
   * @param includeFeatures is a boolean that controls whether the
   *          annotation features should be included or not. If false,
   *          only the annotation type is included in the tag.
   * @return a string representing an XML document containing the
   *         original markup + dumped annotations form the
   *         aSourceAnnotationSet
   */
  public String toXml(Set aSourceAnnotationSet, boolean includeFeatures) {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not implement toXml(Set, boolean) but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
      return currentDocument.toXml(new AnnotationSetImpl(this));
    }
    else {
      return currentDocument.toXml(aSourceAnnotationSet, includeFeatures);
    }
  }// End toXml()

  /**
   * Returns a GateXml document that is a custom XML format for wich
   * there is a reader inside GATE called gate.xml.GateFormatXmlHandler.
   * What it does is to serialize a GATE document in an XML format.
   * 
   * @return a string representing a Gate Xml document.
   */
  public String toXml() {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not implement toXml() but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
      return currentDocument.toXml(new AnnotationSetImpl(this));
    }
    else {
      return currentDocument.toXml();
    }
  }// toXml

  /**
   * Gives a single XML representation for the entire document.
   * 
   * @param aCompoundDoc
   * @return
   */
  public static String toXmlAsASingleDocument(CompoundDocument aCompoundDoc) {
    Map<String, String> docXmls = new HashMap<String, String>();
    Map<String, Object> globalMap = new HashMap<String, Object>();

    for(String id : aCompoundDoc.getDocumentIDs()) {
      docXmls.put(id, aCompoundDoc.getDocument(id).toXml());
    }

    // add document xmls
    globalMap.put("docXmls", docXmls);

    // we would use XStream library to store annic patterns
    com.thoughtworks.xstream.XStream xstream = new com.thoughtworks.xstream.XStream();

    // Saving is accomplished just using XML serialization of the map.
    StringWriter stringToReturn = new StringWriter();

    // other features
    Map<String, Object> features = new HashMap<String, Object>();
    features.put("encoding", aCompoundDoc.getEncoding());
    features.put("collectRepositioningInfo", aCompoundDoc
            .getCollectRepositioningInfo());
    features.put("preserveOriginalContent", aCompoundDoc
            .getPreserveOriginalContent());
    features.put("documentIDs", aCompoundDoc.getDocumentIDs());
    features.put("markupAware", new Boolean(true));
    features.put("name", aCompoundDoc.getName());
    globalMap.put("feats", features);

    Document aDoc = aCompoundDoc.getCurrentDocument();
    aCompoundDoc.setCurrentDocument(null);
    globalMap.put("docFeats", aCompoundDoc.getFeatures());
    if(aDoc != null) aCompoundDoc.setCurrentDocument(aDoc.getName());

    xstream.toXML(globalMap, stringToReturn);
    return stringToReturn.toString();
  }

  /**
   * Loads the compound document with given xmlString. Please note that
   * the string should have been generated with the
   * toXmlAsASingleDocument method.
   * 
   * @param xmlString
   * @return
   */
  public static CompoundDocument fromXml(String xmlString) {
    StringReader reader = new StringReader(xmlString);
    com.thoughtworks.xstream.XStream xstream = new com.thoughtworks.xstream.XStream(
            new com.thoughtworks.xstream.io.xml.StaxDriver());

    // asking the xstream library to use gate class loader
    xstream.setClassLoader(Gate.getClassLoader());

    // reading the xml object
    Map<String, Object> globalMap = (HashMap<String, Object>)xstream
            .fromXML(reader);

    // now we read individual information
    Map<String, String> docXmls = (HashMap<String, String>)globalMap
            .get("docXmls");
    Map<String, Object> features = (Map<String, Object>)globalMap.get("feats");
    String encoding = (String)features.get("encoding");

    try {
      File tempFile = File.createTempFile("example", ".xml");
      File tempFolder = new File(tempFile.getParentFile(), "temp"
              + Gate.genSym());
      
      if(!tempFolder.exists() && !tempFolder.mkdirs()) {
        throw new GateRuntimeException("Temporary folder "
                + tempFolder.getAbsolutePath() + " could not be created");
      }
      tempFile.deleteOnExit();
      tempFolder.deleteOnExit();

      URL sourceUrl = null;
      List<String> docIDs = new ArrayList<String>();
      for(String id : docXmls.keySet()) {
        docIDs.add(id);
        File newFile = new File("X." + id + ".xml");
        newFile.deleteOnExit();
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(newFile), encoding));
        bw.write(docXmls.get(id));
        bw.flush();
        bw.close();
        sourceUrl = newFile.toURI().toURL();
      }

      features.put("sourceUrl", sourceUrl);
      String name = (String)features.get("name");
      features.remove("name");
      FeatureMap fets = Factory.newFeatureMap();
      for(String s : features.keySet()) {
        fets.put(s, features.get(s));
      }

      FeatureMap hideFeats = Factory.newFeatureMap();
      //Gate.setHiddenAttribute(hideFeats, true);
//      CompoundDocument cd = new gate.compound.impl.CompoundDocumentImpl();
//      cd.setName(name);
//      cd.setSourceUrl(sourceUrl);
//      ((CompoundDocumentImpl) cd).setEncoding(encoding);
//      ((CompoundDocumentImpl) cd).setDocumentIDs(docIDs);
//      cd.init();
      CompoundDocument cd = (CompoundDocument)Factory.createResource(
              "gate.compound.impl.CompoundDocumentImpl", fets, hideFeats);
      cd.setName(name);
      Document aDoc = cd.getCurrentDocument();
      cd.setCurrentDocument(null);
      FeatureMap docFeatures = (FeatureMap)globalMap.get("docFeats");
      for(Object key : docFeatures.keySet()) {
        Object value = docFeatures.get(key);
        if(value instanceof Alignment) {
          ((Alignment)value).setSourceDocument(cd);
        }
      }
      
      cd.setFeatures(docFeatures);
      
      if(aDoc != null) cd.setCurrentDocument(aDoc.getName());

      
      return cd;
    }
    catch(IOException ioe) {
      throw new GateRuntimeException(ioe);
    }
    catch(ResourceInstantiationException rie) {
      throw new GateRuntimeException(rie);
    } finally {
      if(reader != null)
        reader.close();
    }

  }

  /**
   * Returns a map with the named annotation sets. It returns
   * <code>null</code> if no named annotaton set exists.
   */
  public Map<String, AnnotationSet> getNamedAnnotationSets() {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not implement getNamedAnnotationSets() but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
      return new HashMap<String, AnnotationSet>();
    }
    else {
      return currentDocument.getNamedAnnotationSets();
    }
  } // getNamedAnnotationSets

  public Set<String> getAnnotationSetNames() {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not implement getAnnotationSetNames() but its member does!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
      return new HashSet<String>();
    }
    else {
      return currentDocument.getAnnotationSetNames();
    }
  }

  /**
   * Removes one of the named annotation sets. Note that the default
   * annotation set cannot be removed.
   * 
   * @param name the name of the annotation set to be removed
   */
  public void removeAnnotationSet(String name) {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not have any annotationSets!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
    }
    else {
      currentDocument.removeAnnotationSet(name);
    }
  }

  /** Propagate edit changes to the document content and annotations. */
  public void edit(Long start, Long end, DocumentContent replacement)
          throws InvalidOffsetException {
    if(currentDocument == null) {
      System.err
              .println("CompoundDocumentImpl does not have any content!"
                      + " Please use the setDocument(String documentID) to set a specific document!");
    }
    else {
      currentDocument.edit(start, end, replacement);
    }
  } // edit(start,end,replacement)

  /** Ordering based on URL.toString() and the URL offsets (if any) */
  public int compareTo(Object o) throws ClassCastException {
    CompoundDocument other = (CompoundDocument)o;
    return toString().compareTo(other.toString());
  } // compareTo

  /** String respresentation */
  public String toString() {
    if(currentDocument == null) {
      String n = Strings.getNl();
      StringBuffer s = new StringBuffer("CompoundDocumentImpl: " + n);
      s.append("  encoding:" + encoding + n);
      s.append("  features:" + features + n);
      s.append("  markupAware:" + markupAware + n);
      s.append("  sourceUrl:" + sourceUrl + n);
      s.append(n);
      return s.toString();
    }
    else {
      return currentDocument.toString();
    }
  } // toString

  public void removeDocument(String documentID) {
    Document doc = (Document)documents.get(documentID);
    if(doc == null) return;
    Factory.deleteResource(doc);
  }

  public synchronized void removeDocumentListener(DocumentListener l) {
    if(currentDocument != null) {
      currentDocument.removeDocumentListener(l);
    }
    else {
      if(documentListeners != null && documentListeners.contains(l)) {
        Vector v = (Vector)documentListeners.clone();
        v.removeElement(l);
        documentListeners = v;
      }
    }
  }

  public synchronized void addDocumentListener(DocumentListener l) {
    if(currentDocument != null) {
      currentDocument.addDocumentListener(l);
    }
    else {
      Vector v = documentListeners == null
              ? new Vector<DocumentListener>(2)
              : (Vector)documentListeners.clone();
      if(!v.contains(l)) {
        v.addElement(l);
        documentListeners = v;
      }
    }
  }

  public void resourceLoaded(CreoleEvent e) {
  }

  public void resourceUnloaded(CreoleEvent e) {
  }

  public void datastoreOpened(CreoleEvent e) {
  }

  public void datastoreCreated(CreoleEvent e) {
  }

  public void resourceRenamed(Resource resource, String oldName, String newName) {
  }

  private void deleteAllDocs() {
    Set keys = documents.keySet();
    Iterator iter = keys.iterator();
    while(iter.hasNext()) {
      Object key = iter.next();
      Document doc = (Document)documents.get(key);
      Factory.deleteResource(doc);
    }
  }

  public void datastoreClosed(CreoleEvent e) {
    if(!e.getDatastore().equals(this.getDataStore())) return;
    // we also remove other documents
    deleteAllDocs();
    // close this lr, since it cannot stay open when the DS it comes
    // from
    // is closed
    Factory.deleteResource(this);
  }

  public void setLRPersistenceId(Object lrID) {
    super.setLRPersistenceId(lrID);
    // make persistent documents listen to the creole register
    // for events about their DS
    Gate.getCreoleRegister().addCreoleListener(this);
  }

  public void resourceAdopted(DatastoreEvent evt) {
  }

  public void resourceDeleted(DatastoreEvent evt) {
    if(!evt.getSource().equals(this.getDataStore())) return;
    // if an open document is deleted from a DS, then
    // it must close itself immediately, as is no longer valid
    if(evt.getResourceID().equals(this.getLRPersistenceId())) {
      deleteAllDocs();
      Factory.deleteResource(this);
    }
  }

  public void resourceWritten(DatastoreEvent evt) {
  }

  public void setDataStore(DataStore dataStore)
          throws gate.persist.PersistenceException {
    super.setDataStore(dataStore);
    if(this.dataStore != null) this.dataStore.addDatastoreListener(this);
  }

  public Document getCurrentDocument() {
    return currentDocument;
  }

  public Document getDocument(String documentID) {
    Object obj = documents.get(documentID);
    if(obj == null) {
      return this;
    }
    else {
      return (Document)obj;
    }
  }

  public void setCurrentDocument(String documentID) {
    if(documentID == null) {
      currentDocument = null;
      return;
    }

    Object obj = documents.get(documentID);
    if(obj == null) {
      currentDocument = null;
    }
    else {
      currentDocument = (Document)obj;
    }
  }

  public Map getDocuments() {
    return documents;
  }

  public List<String> getDocumentIDs() {
    return documentIDs;
  }

  public void setDocumentIDs(List<String> docIDs) {
    if(docIDs != null) {
      this.documentIDs = new ArrayList<String>();
      this.documentIDs.addAll(docIDs);
    }
    else {
      this.documentIDs = null;
    }
  }
}