package gate.composite.impl;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.composite.CombiningMethod;
import gate.composite.CombiningMethodException;
import gate.composite.CompositeDocument;
import gate.composite.OffsetDetails;
import gate.compound.CompoundDocument;
import gate.creole.ResourceInstantiationException;
import gate.util.InvalidOffsetException;
/**
* Abstract implementation of the combining method. Classes extending this class
* must use startDocument() before adding any content (i.e. addContent) and must
* finalizeDocument() at the end of all additions.
*
* @author niraj
*/
public abstract class AbstractCombiningMethod implements CombiningMethod {
protected HashMap<String, List<OffsetDetails>> offsetMappings;
protected StringBuffer documentContent;
protected String toAdd;
protected CompoundDocument containerDocument;
protected List<OffsetDetails> annotations;
protected List<OffsetDetails> offsets;
protected Set<String> annotationTypesToCopy;
private boolean startDocumentCalled = false;
/**
* User must call this method to start a composite document
*
* @param containerDocument
* - instance of compound document that the new composite is going to
* become member of.
* @param annotationTypesToCopy
* - list of types of annotations to copy underlying the unit
* annotation. Supply null to copy all the annotations. Supply an
* empty set to copy nothing.
*/
protected void startDocument(CompoundDocument containerDocument, Set<String> annotationTypesToCopy)
throws CombiningMethodException {
offsetMappings = new HashMap<String, List<OffsetDetails>>();
this.containerDocument = containerDocument;
this.annotationTypesToCopy = annotationTypesToCopy;
this.annotations = new ArrayList<OffsetDetails>();
this.offsets = new ArrayList<OffsetDetails>();
documentContent = new StringBuffer();
toAdd = "<?xml version=\"1.0\"?><composite>";
startDocumentCalled = true;
}
protected CompositeDocument finalizeDocument()
throws CombiningMethodException {
if(!startDocumentCalled)
throw new CombiningMethodException(
"CompositeDocument is not initialized - please "
+ "call the startDocument() method to initialize the "
+ "composite document");
documentContent = documentContent.insert(0, toAdd);
documentContent.append("</composite>");
FeatureMap features = Factory.newFeatureMap();
features.put("collectRepositioningInfo", containerDocument
.getCollectRepositioningInfo());
features.put("encoding", containerDocument.getEncoding());
features.put("markupAware", new Boolean(true));
features.put("preserveOriginalContent", containerDocument
.getPreserveOriginalContent());
features.put("stringContent", documentContent.toString());
FeatureMap subFeatures = Factory.newFeatureMap();
Gate.setHiddenAttribute(subFeatures, true);
CompositeDocument doc = null;
try {
doc =
(CompositeDocument)Factory.createResource(
"gate.composite.impl.CompositeDocumentImpl", features, subFeatures);
}
catch(ResourceInstantiationException e1) {
throw new CombiningMethodException(e1);
}
((gate.composite.impl.CompositeDocumentImpl)doc).disableListener = true;
// lets add all annotations now
for(OffsetDetails od : annotations) {
// obtain annotation set to add annotations to
AnnotationSet aSet =
od.getAsName() == null || od.getAsName().trim().length() == 0
? doc.getAnnotations()
: doc.getAnnotations(od.getAsName());
String type = od.getOriginalAnnotation().getType();
gate.FeatureMap f = od.getOriginalAnnotation().getFeatures();
Integer id;
try {
id =
aSet.add(new Long(od.getNewStartOffset()), new Long(od
.getNewEndOffset()), type, f);
od.setNewAnnotation(aSet.get(id));
}
catch(InvalidOffsetException e) {
throw new CombiningMethodException(e);
}
}
((gate.composite.impl.CompositeDocumentImpl)doc).disableListener = false;
doc.setName(CompositeDocument.COMPOSITE_DOC_NAME);
doc.setCombiningMethod(this);
doc.setOffsetMappingInformation(offsetMappings);
doc.setCombinedDocumentsIds(new HashSet<String>(containerDocument
.getDocumentIDs()));
doc.setCompoundDocument(containerDocument);
return doc;
}
/**
* Returns the Ids of combined documents
*
* @return
*/
public Set<String> getCombinedDocumentsIds() {
return offsetMappings.keySet();
}
/**
* This method returns the new offset for where the content was added
*
* @param srcDocument
* @param inputAS
* @param unitAnnotation
* @param copyUnderlyingAnnotations
* @return
*/
protected long[] addContent(Document srcDocument, Annotation unitAnnotation)
throws CombiningMethodException {
if(!startDocumentCalled)
throw new CombiningMethodException(
"CompositeDocument is not initialized - please "
+ "call the startDocument() method to initialize the "
+ "composite document");
String documentID = srcDocument.getName();
offsets = offsetMappings.get(documentID);
if(offsets == null) {
offsets = new ArrayList<OffsetDetails>();
offsetMappings.put(documentID, offsets);
}
OffsetDetails offset = new OffsetDetails();
offset.setOldStartOffset(unitAnnotation.getStartNode().getOffset()
.longValue());
offset.setOldEndOffset(unitAnnotation.getEndNode().getOffset().longValue());
offset.setNewStartOffset(documentContent.length());
try {
documentContent.append(srcDocument.getContent().getContent(
unitAnnotation.getStartNode().getOffset(),
unitAnnotation.getEndNode().getOffset()));
}
catch(InvalidOffsetException e2) {
throw new CombiningMethodException(e2);
}
offset.setNewEndOffset(documentContent.length());
offset.setOriginalAnnotation(unitAnnotation);
offsets.add(offset);
annotations.add(offset);
OffsetDetails unitAnnotDetails = new OffsetDetails();
unitAnnotDetails.setOldStartOffset(offset.getOldStartOffset());
unitAnnotDetails.setOldEndOffset(offset.getOldEndOffset());
unitAnnotDetails.setNewStartOffset(offset.getNewStartOffset());
unitAnnotDetails.setNewEndOffset(offset.getNewEndOffset());
offsets.add(unitAnnotDetails);
if(annotationTypesToCopy == null || !annotationTypesToCopy.isEmpty()) {
// copy annotations under the default annotation set
copyAnnotations(srcDocument.getAnnotations(), unitAnnotation, offset);
// copy annotations from all the named annotation set
Map<String, AnnotationSet> annotationSets =
srcDocument.getNamedAnnotationSets();
if(annotationSets != null) {
for(String asName : annotationSets.keySet()) {
copyAnnotations(srcDocument.getAnnotations(asName), unitAnnotation,
offset);
}
}
}
documentContent.append("\n");
offsetMappings.put(documentID, offsets);
return new long[]{unitAnnotDetails.getNewStartOffset(),
unitAnnotDetails.getNewEndOffset()};
}
private void copyAnnotations(AnnotationSet inputAS,
Annotation unitAnnotation, OffsetDetails boundaries) {
AnnotationSet tempSet =
inputAS.getContained(unitAnnotation.getStartNode().getOffset(),
unitAnnotation.getEndNode().getOffset());
if(annotationTypesToCopy != null && !annotationTypesToCopy.isEmpty()) {
tempSet = tempSet.get(annotationTypesToCopy);
}
Iterator<Annotation> iter = tempSet.iterator();
while(iter.hasNext()) {
Annotation anAnnot = iter.next();
OffsetDetails anOffset = new OffsetDetails();
anOffset
.setOldStartOffset(anAnnot.getStartNode().getOffset().longValue());
anOffset.setOldEndOffset(anAnnot.getEndNode().getOffset().longValue());
long stDiff =
anOffset.getOldStartOffset() - boundaries.getOldStartOffset();
long enDiff = anOffset.getOldEndOffset() - boundaries.getOldEndOffset();
anOffset.setNewStartOffset(boundaries.getNewStartOffset() + stDiff);
anOffset.setNewEndOffset(boundaries.getNewEndOffset() + enDiff);
anOffset.setOriginalAnnotation(anAnnot);
// this will be interned - making it easier to store and less expensive
anOffset.setAsName(inputAS.getName());
offsets.add(anOffset);
annotations.add(anOffset);
}
}
}