1   /*
2    *  DocumentImpl.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 11/Feb/2000
12   *
13   *  $Id: DocumentImpl.java,v 1.124 2003/07/29 11:36:02 valyt Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.util.*;
19  import java.net.*;
20  import java.io.*;
21  
22  import gate.*;
23  import gate.annotation.*;
24  import gate.util.*;
25  import gate.creole.*;
26  import gate.gui.*;
27  import gate.event.*;
28  
29  /** Represents the commonalities between all sorts of documents.
30    *
31    * <H2>Editing</H2>
32    *
33    * <P>
34    * The DocumentImpl class implements the Document interface.
35    * The DocumentContentImpl class models the textual or audio-visual
36    * materials which are the source and content of Documents.
37    * The AnnotationSetImpl class supplies annotations on Documents.
38    *
39    * <P>
40    * Abbreviations:
41    *
42    * <UL>
43    * <LI>
44    * DC = DocumentContent
45    * <LI>
46    * D = Document
47    * <LI>
48    * AS = AnnotationSet
49    * </UL>
50    *
51    * <P>
52    * We add an edit method to each of these classes; for DC and AS
53    * the methods are package private; D has the public method.
54    *
55    * <PRE>
56    *   void edit(Long start, Long end, DocumentContent replacement)
57    *   throws InvalidOffsetException;
58    * </PRE>
59    *
60    * <P>
61    * D receives edit requests and forwards them to DC and AS.
62    * On DC, this method makes a change to the content - e.g. replacing
63    * a String range from start to end with replacement. (Deletions
64    * are catered for by having replacement = null.) D then calls
65    * AS.edit on each of its annotation sets.
66    *
67    * <P>
68    * On AS, edit calls replacement.size() (i.e. DC.size()) to
69    * figure out how long the replacement is (0 for null). It then
70    * considers annotations that terminate (start or end) in
71    * the altered or deleted range as invalid; annotations that
72    * terminate after the range have their offsets adjusted.
73    * I.e.:
74    * <UL>
75    * <LI>
76    * the nodes that pointed inside the old modified area are invalid now and
77    * will be deleted along with the connected annotations;
78    * <LI>
79    * the nodes that are before the start of the modified area remain
80    * untouched;
81    * <LI>
82    * the nodes that are after the end of the affected area will have the
83    * offset changed according to the formula below.
84    * </UL>
85    *
86    * <P>
87    * A note re. AS and annotations: annotations no longer have
88    * offsets as in the old model, they now have nodes, and nodes
89    * have offsets.
90    *
91    * <P>
92    * To implement AS.edit, we have several indices:
93    * <PRE>
94    *   HashMap annotsByStartNode, annotsByEndNode;
95    * </PRE>
96    * which map node ids to annotations;
97    * <PRE>
98    *   RBTreeMap nodesByOffset;
99    * </PRE>
100   * which maps offset to Nodes.
101   *
102   * <P>
103   * When we get an edit request, we traverse that part of the
104   * nodesByOffset tree representing the altered or deleted
105   * range of the DC. For each node found, we delete any annotations
106   * that terminate on the node, and then delete the node itself.
107   * We then traverse the rest of the tree, changing the offset
108   * on all remaining nodes by:
109   * <PRE>
110   *   newOffset =
111   *     oldOffset -
112   *     (
113   *       (end - start) -                                     // size of mod
114   *       ( (replacement == null) ? 0 : replacement.size() )  // size of repl
115   *     );
116   * </PRE>
117   * Note that we use the same convention as e.g. java.lang.String: start
118   * offsets are inclusive; end offsets are exclusive. I.e. for string "abcd"
119   * range 1-3 = "bc". Examples, for a node with offset 4:
120   * <PRE>
121   * edit(1, 3, "BC");
122   * newOffset = 4 - ( (3 - 1) - 2 ) = 4
123   *
124   * edit(1, 3, null);
125   * newOffset = 4 - ( (3 - 1) - 0 ) = 2
126   *
127   * edit(1, 3, "BBCC");
128   * newOffset = 4 - ( (3 - 1) - 4 ) = 6
129   * </PRE>
130   */
131 public class DocumentImpl
132 extends AbstractLanguageResource implements TextualDocument, CreoleListener,
133                                             DatastoreListener {
134   /** Debug flag */
135   private static final boolean DEBUG = false;
136 
137   /** If you set this flag to true the original content of the document will
138    *  be kept in the document feature. <br>
139    *  Default value is false to avoid the unnecessary waste of memory */
140   private Boolean preserveOriginalContent = new Boolean(false);
141 
142   /** If you set this flag to true the repositioning information for
143    *  the document will be kept in the document feature. <br>
144    *  Default value is false to avoid the unnecessary waste of time and memory
145    */
146   private Boolean collectRepositioningInfo = new Boolean(false);
147 
148   /**
149    * This is a variable which contains the latest crossed over annotation
150    * found during export with preserving format, i.e., toXml(annotations)
151    * method.
152    */
153   private Annotation crossedOverAnnotation = null;
154 
155   /** Default construction. Content left empty. */
156   public DocumentImpl() {
157     content = new DocumentContentImpl();
158   } // default construction
159 
160   /** Initialise this resource, and return it. */
161   public Resource init() throws ResourceInstantiationException {
162     // set up the source URL and create the content
163     if(sourceUrl == null) {
164       if(stringContent == null) {
165         throw new ResourceInstantiationException(
166           "The sourceURL and document's content were null."
167         );
168       }
169 
170       content = new DocumentContentImpl(stringContent);
171       getFeatures().put("gate.SourceURL", "created from String");
172     } else {
173       try {
174         content = new DocumentContentImpl(
175           sourceUrl, getEncoding(), sourceUrlStartOffset, sourceUrlEndOffset);
176         getFeatures().put("gate.SourceURL", sourceUrl.toExternalForm());
177       } catch(IOException e) {
178         e.printStackTrace();
179         throw new ResourceInstantiationException("DocumentImpl.init: " + e);
180       }
181 
182       if(preserveOriginalContent.booleanValue() && content != null) {
183         String originalContent = new String(
184           ((DocumentContentImpl) content).getOriginalContent());
185         getFeatures().put(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME,
186                       originalContent);
187       } // if
188     }
189 
190     // set up a DocumentFormat if markup unpacking required
191     if(getMarkupAware().booleanValue()) {
192       DocumentFormat docFormat =
193         DocumentFormat.getDocumentFormat(this, sourceUrl);
194       try {
195         if(docFormat != null){
196           StatusListener sListener = (StatusListener)
197                                       gate.gui.MainFrame.getListeners().
198                                       get("gate.event.StatusListener");
199           if(sListener != null) docFormat.addStatusListener(sListener);
200 
201           // set the flag if true and if the document format support collecting
202           docFormat.setShouldCollectRepositioning(collectRepositioningInfo);
203 
204           if(docFormat.getShouldCollectRepositioning().booleanValue()) {
205             // unpack with collectiong of repositioning information
206             RepositioningInfo info = new RepositioningInfo();
207 
208             String origContent = (String) getFeatures().get(
209                 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
210 
211             RepositioningInfo ampCodingInfo = new RepositioningInfo();
212             if(origContent != null) {
213               boolean shouldCorrectCR = docFormat instanceof XmlDocumentFormat;
214               collectInformationForAmpCodding(origContent, ampCodingInfo,
215                                               shouldCorrectCR);
216               if(docFormat instanceof HtmlDocumentFormat) {
217                 collectInformationForWS(origContent, ampCodingInfo);
218               } // if
219             } // if
220 
221             docFormat.unpackMarkup(this, info, ampCodingInfo);
222 
223             if(origContent != null
224                 && docFormat instanceof XmlDocumentFormat) {
225               // CRLF correction of RepositioningInfo
226               correctRepositioningForCRLFInXML(origContent, info);
227             } // if
228 
229             getFeatures().put(
230                 GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME, info);
231           }
232           else {
233             // normal old fashioned unpack
234             docFormat.unpackMarkup(this);
235           }
236           docFormat.removeStatusListener(sListener);
237        } //if format != null
238       } catch(DocumentFormatException e) {
239         throw new ResourceInstantiationException(
240           "Couldn't unpack markup in document " + sourceUrl.toExternalForm() +
241           " " + e
242         );
243       }
244     } // if markup aware
245 
246 //try{
247 //  FileWriter fw = new FileWriter("d:/temp/doccontent.txt");
248 //  fw.write(getContent().toString());
249 //  fw.flush();
250 //  fw.close();
251 //}catch(IOException ioe){
252 //  ioe.printStackTrace();
253 //}
254 
255     return this;
256   } // init()
257 
258   /**
259    * Correct repositioning information for substitution of "\r\n" with "\n"
260    */
261   private void correctRepositioningForCRLFInXML(String content,
262                                             RepositioningInfo info) {
263     int index = -1;
264 
265     do {
266       index = content.indexOf("\r\n", index+1);
267       if(index != -1) {
268         info.correctInformationOriginalMove(index, 1);
269       } // if
270     } while(index != -1);
271   } // correctRepositioningForCRLF
272 
273   /**
274    * Collect information for substitution of "&xxx;" with "y"
275    *
276    * It couldn't be collected a position information about
277    * some unicode and &-coded symbols during parsing. The parser "hide" the
278    * information about the position of such kind of parsed text.
279    * So, there is minimal chance to have &-coded symbol inside the covered by
280    * repositioning records area. The new record should be created for every
281    * coded symbol outside the existing records.
282    * <BR>
283    * If <code>shouldCorrectCR</code> flag is <code>true</code> the correction
284    * for CRLF substitution is performed.
285    */
286   private void collectInformationForAmpCodding(String content,
287                                             RepositioningInfo info,
288                                             boolean shouldCorrectCR) {
289 
290     if(content == null || info == null) return;
291 
292     int ampIndex = -1;
293     int semiIndex;
294 
295     do {
296       ampIndex = content.indexOf('&', ampIndex+1);
297       if(ampIndex != -1) {
298         semiIndex = content.indexOf(';', ampIndex+1);
299         // have semicolon and it is near enough for amp codding
300         if(semiIndex != -1 && (semiIndex-ampIndex) < 8) {
301           info.addPositionInfo(ampIndex, semiIndex-ampIndex+1, 0, 1);
302         }
303         else {
304           // no semicolon or it is too far
305           // analyse for amp codding without semicolon
306           int maxEnd = Math.min(ampIndex+8, content.length());
307           String ampCandidate = content.substring(ampIndex, maxEnd);
308           int ampCodingSize = analyseAmpCodding(ampCandidate);
309 
310           if(ampCodingSize != -1) {
311             info.addPositionInfo(ampIndex, ampCodingSize, 0, 1);
312           } // if
313 
314         } // if - semicolon found
315       } // if - ampersand found
316     } while (ampIndex != -1);
317 
318     // correct the collected information to adjust it's positions
319     // with reported by the parser
320     int index = -1;
321 
322     if(shouldCorrectCR) {
323       do {
324         index = content.indexOf("\r\n", index+1);
325         if(index != -1) {
326           info.correctInformationOriginalMove(index, -1);
327         } // if
328       } while(index != -1);
329     } // if
330   } // collectInformationForAmpCodding
331 
332   /**
333    * This function compute size of the ampersand codded sequence when
334    * semicolin is not present.
335    */
336   private int analyseAmpCodding(String content) {
337     int result = -1;
338 
339     try {
340       char ch = content.charAt(1);
341 
342       switch(ch) {
343         case 'l' : // &lt
344         case 'L' : // &lt
345           if(content.charAt(2) == 't' || content.charAt(2) == 'T') {
346             result = 3;
347           } // if
348           break;
349         case 'g' : // &gt
350         case 'G' : // &gt
351           if(content.charAt(2) == 't' || content.charAt(2) == 'T') {
352             result = 3;
353           } // if
354           break;
355         case 'a' : // &amp
356         case 'A' : // &amp
357           if(content.substring(2, 4).equalsIgnoreCase("mp")) {
358             result = 4;
359           } // if
360           break;
361         case 'q' : // &quot
362         case 'Q' : // &quot
363           if(content.substring(2, 5).equalsIgnoreCase("uot")) {
364             result = 5;
365           } // if
366           break;
367         case '#' : // #number (example &#145, &#x4C38)
368           int endIndex = 2;
369           boolean hexCoded = false;
370           if(content.charAt(2) == 'x' || content.charAt(2) == 'X') {
371             // Hex codding
372             ++endIndex;
373             hexCoded = true;
374           } // if
375 
376           while (endIndex < 8
377                   && isNumber(content.charAt(endIndex), hexCoded) ) {
378             ++endIndex;
379           } // while
380           result = endIndex;
381           break;
382       } // switch
383     } catch (StringIndexOutOfBoundsException ex) {
384       // do nothing
385     } // catch
386 
387     return result;
388   } // analyseAmpCodding
389 
390   /** Check for numeric range. If hex is true the A..F range is included */
391   private boolean isNumber(char ch, boolean hex) {
392     if(ch >= '0' && ch <= '9') return true;
393 
394     if(hex) {
395       if(ch >= 'A' && ch <= 'F') return true;
396       if(ch >= 'a' && ch <= 'f') return true;
397     } // if
398 
399     return false;
400   } // isNumber
401 
402   /** HTML parser perform substitution of multiple whitespaces (WS) with
403    *  a single WS. To create correct repositioning information structure we
404    *  should keep the information for such multiple WS.
405    *  <BR>
406    *  The criteria for WS is <code>(ch <= ' ')</code>.
407    */
408   private void collectInformationForWS(String content, RepositioningInfo info) {
409 
410     if(content == null || info == null) return;
411 
412     // analyse the content and correct the repositioning information
413     char ch;
414     int startWS, endWS;
415 
416     startWS = endWS = -1;
417     int contentLength = content.length();
418 
419     for(int i=0; i<contentLength; ++i) {
420       ch = content.charAt(i);
421 
422       // is whitespace
423       if(ch <= ' ') {
424         if(startWS == -1) {
425           startWS = i;
426         } // if
427         endWS = i;
428       }
429       else {
430         if(endWS - startWS > 0) {
431           // put the repositioning information about the WS substitution
432           info.addPositionInfo(
433             (long)startWS, (long)(endWS - startWS + 1), 0, 1);
434         } // if
435         // clear positions
436         startWS = endWS = -1;
437       }// if
438     } // for
439   } // collectInformationForWS
440 
441   /** Clear all the data members of the object. */
442   public void cleanup() {
443 
444     defaultAnnots = null;
445     if ( (namedAnnotSets != null) && (!namedAnnotSets.isEmpty()))
446         namedAnnotSets.clear();
447     if (DEBUG) Out.prln("Document cleanup called");
448     if (this.lrPersistentId != null)
449       Gate.getCreoleRegister().removeCreoleListener(this);
450     if(this.getDataStore() != null)
451       this.getDataStore().removeDatastoreListener(this);
452   } // cleanup()
453 
454 
455   /** Documents are identified by URLs */
456   public URL getSourceUrl() { return sourceUrl; }
457 
458   /** Set method for the document's URL */
459   public void setSourceUrl(URL sourceUrl) {
460     this.sourceUrl = sourceUrl;
461   } // setSourceUrl
462 
463   /** Documents may be packed within files; in this case an optional pair of
464     * offsets refer to the location of the document.
465     */
466   public Long[] getSourceUrlOffsets() {
467     Long[] sourceUrlOffsets = new Long[2];
468     sourceUrlOffsets[0] = sourceUrlStartOffset;
469     sourceUrlOffsets[1] = sourceUrlEndOffset;
470     return sourceUrlOffsets;
471   } // getSourceUrlOffsets
472 
473   /**
474    * Allow/disallow preserving of the original document content.
475    * If is <B>true</B> the original content will be retrieved from
476    * the DocumentContent object and preserved as document feature.
477    */
478   public void setPreserveOriginalContent(Boolean b) {
479     preserveOriginalContent = b;
480   } // setPreserveOriginalContent
481 
482   /** Get the preserving of content status of the Document.
483    *
484    *  @return whether the Document should preserve it's original content.
485    */
486   public Boolean getPreserveOriginalContent() {
487     return preserveOriginalContent;
488   } // getPreserveOriginalContent
489 
490   /**
491    *  Allow/disallow collecting of repositioning information.
492    *  If is <B>true</B> information will be retrieved and preserved
493    *  as document feature.<BR>
494    *  Preserving of repositioning information give the possibilities
495    *  for converting of coordinates between the original document content and
496    *  extracted from the document text.
497    */
498   public void setCollectRepositioningInfo(Boolean b) {
499     collectRepositioningInfo = b;
500   } // setCollectRepositioningInfo
501 
502   /** Get the collectiong and preserving of repositioning information
503    *  for the Document. <BR>
504    *  Preserving of repositioning information give the possibilities
505    *  for converting of coordinates between the original document content and
506    *  extracted from the document text.
507    *
508    *  @return whether the Document should collect and preserve information.
509    */
510   public Boolean getCollectRepositioningInfo() {
511     return collectRepositioningInfo;
512   } // getCollectRepositioningInfo
513 
514   /** Documents may be packed within files; in this case an optional pair of
515     * offsets refer to the location of the document. This method gets the
516     * start offset.
517     */
518   public Long getSourceUrlStartOffset() { return sourceUrlStartOffset; }
519 
520   /** Documents may be packed within files; in this case an optional pair of
521     * offsets refer to the location of the document. This method sets the
522     * start offset.
523     */
524   public void setSourceUrlStartOffset(Long sourceUrlStartOffset) {
525     this.sourceUrlStartOffset = sourceUrlStartOffset;
526   } // setSourceUrlStartOffset
527 
528   /** Documents may be packed within files; in this case an optional pair of
529     * offsets refer to the location of the document. This method gets the
530     * end offset.
531     */
532   public Long getSourceUrlEndOffset() { return sourceUrlEndOffset; }
533 
534   /** Documents may be packed within files; in this case an optional pair of
535     * offsets refer to the location of the document. This method sets the
536     * end offset.
537     */
538   public void setSourceUrlEndOffset(Long sourceUrlEndOffset) {
539     this.sourceUrlEndOffset = sourceUrlEndOffset;
540   } // setSourceUrlStartOffset
541 
542   /** The content of the document: a String for text; MPEG for video; etc. */
543   public DocumentContent getContent() { return content; }
544 
545   /** Set method for the document content */
546   public void setContent(DocumentContent content) { this.content = content; }
547 
548   /** Get the encoding of the document content source */
549   public String getEncoding() {
550     //we need to make sure we ALWAYS have an encoding
551     if(encoding == null || encoding.trim().length() == 0){
552       //no encoding definded: use the platform default
553       encoding = java.nio.charset.Charset.forName(
554           System.getProperty("file.encoding")).name();
555     }
556     return encoding;
557   }
558 
559   /** Set the encoding of the document content source */
560   public void setEncoding(String encoding) { this.encoding = encoding; }
561 
562   /** Get the default set of annotations. The set is created if it
563     * doesn't exist yet.
564     */
565   public AnnotationSet getAnnotations() {
566     if(defaultAnnots == null){
567       defaultAnnots = new AnnotationSetImpl(this);
568       fireAnnotationSetAdded(new DocumentEvent(
569            this, DocumentEvent.ANNOTATION_SET_ADDED, null));
570     }//if
571     return defaultAnnots;
572   } // getAnnotations()
573 
574   /** Get a named set of annotations. Creates a new set if one with this
575     * name doesn't exist yet.
576     * If the provided name is null then it returns the default annotation set.
577     */
578   public AnnotationSet getAnnotations(String name) {
579     if(name == null) return getAnnotations();
580     if(namedAnnotSets == null)
581       namedAnnotSets = new HashMap();
582     AnnotationSet namedSet = (AnnotationSet) namedAnnotSets.get(name);
583 
584     if(namedSet == null) {
585       namedSet = new AnnotationSetImpl(this, name);
586       namedAnnotSets.put(name, namedSet);
587 
588       DocumentEvent evt = new DocumentEvent(
589         this, DocumentEvent.ANNOTATION_SET_ADDED, name
590       );
591       fireAnnotationSetAdded(evt);
592     }
593     return namedSet;
594   } // getAnnotations(name)
595 
596   /** Make the document markup-aware. This will trigger the creation
597    *  of a DocumentFormat object at Document initialisation time; the
598    *  DocumentFormat object will unpack the markup in the Document and
599    *  add it as annotations. Documents are <B>not</B> markup-aware by default.
600    *
601    *  @param b markup awareness status.
602    */
603   public void setMarkupAware(Boolean newMarkupAware) {
604       this.markupAware = newMarkupAware;
605   }
606 
607   /** Get the markup awareness status of the Document.
608    *  <B>Documents are markup-aware by default.</B>
609    *  @return whether the Document is markup aware.
610    */
611   public Boolean getMarkupAware() { return markupAware; }
612 
613   /** Returns an XML document aming to preserve the original markups(
614     * the original markup will be in the same place and format as it was
615     * before processing the document) and include (if possible)
616     * the annotations specified in the aSourceAnnotationSet.
617     * It is equivalent to toXml(aSourceAnnotationSet, true).
618     */
619   public String toXml(Set aSourceAnnotationSet){
620     return toXml(aSourceAnnotationSet, true);
621   }
622 
623   /** Returns an XML document aming to preserve the original markups(
624     * the original markup will be in the same place and format as it was
625     * before processing the document) and include (if possible)
626     * the annotations specified in the aSourceAnnotationSet.
627     * <b>Warning:</b> Annotations from the aSourceAnnotationSet will be lost
628     * if they will cause a crosed over situation.
629     * @param aSourceAnnotationSet is an annotation set containing all the
630     * annotations that will be combined with the original marup set. If the
631     * param is <code>null</code> it will only dump the original markups.
632     * @param includeFeatures is a boolean that controls whether the annotation
633     * features should be included or not. If false, only the annotation type
634     * is included in the tag.
635     * @return a string representing an XML document containing the original
636     * markup + dumped annotations form the aSourceAnnotationSet
637     */
638   public String toXml(Set aSourceAnnotationSet, boolean includeFeatures){
639 
640     if(hasOriginalContentFeatures()) {
641       return saveAnnotationSetAsXmlInOrig(aSourceAnnotationSet,includeFeatures);
642     } // if
643 
644     AnnotationSet originalMarkupsAnnotSet =
645             this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
646 
647     // Create a dumping annotation set on the document. It will be used for
648     // dumping annotations...
649 //    AnnotationSet dumpingSet = new AnnotationSetImpl((Document) this);
650     List dumpingList = new ArrayList(originalMarkupsAnnotSet.size());
651 
652     // This set will be constructed inside this method. If is not empty, the
653     // annotation contained will be lost.
654 /*    if (!dumpingSet.isEmpty()){
655       Out.prln("WARNING: The dumping annotation set was not empty."+
656       "All annotation it contained were lost.");
657       dumpingSet.clear();
658     }// End if
659 */
660     StatusListener sListener = (StatusListener)
661                                gate.gui.MainFrame.getListeners().
662                                get("gate.event.StatusListener");
663     // Construct the dumping set in that way that all annotations will verify
664     // the condition that there are not annotations which are crossed.
665     // First add all annotation from the original markups
666     if(sListener != null)
667       sListener.statusChanged("Constructing the dumping annotation set.");
668 //    dumpingSet.addAll(originalMarkupsAnnotSet);
669     dumpingList.addAll(originalMarkupsAnnotSet);
670     // Then take all the annotations from aSourceAnnotationSet and verify if
671     // they can be inserted safely into the dumpingSet. Where not possible,
672     // report.
673     if (aSourceAnnotationSet != null){
674       Iterator iter = aSourceAnnotationSet.iterator();
675       while (iter.hasNext()){
676         Annotation currentAnnot = (Annotation) iter.next();
677         if(insertsSafety(dumpingList,currentAnnot)){
678 //          dumpingSet.add(currentAnnot);
679           dumpingList.add(currentAnnot);
680         }else if (crossedOverAnnotation != null && DEBUG){
681           try {
682             Out.prln("Warning: Annotations were found to violate the " +
683             "crossed over condition: \n" +
684             "1. [" +
685             getContent().getContent(
686                            crossedOverAnnotation.getStartNode().getOffset(),
687                            crossedOverAnnotation.getEndNode().getOffset()) +
688             " (" + crossedOverAnnotation.getType() + ": " +
689             crossedOverAnnotation.getStartNode().getOffset() +
690             ";" + crossedOverAnnotation.getEndNode().getOffset() +
691             ")]\n" +
692             "2. [" +
693             getContent().getContent(
694                            currentAnnot.getStartNode().getOffset(),
695                            currentAnnot.getEndNode().getOffset()) +
696             " (" + currentAnnot.getType() + ": " +
697             currentAnnot.getStartNode().getOffset() +
698             ";" + currentAnnot.getEndNode().getOffset() +
699             ")]\nThe second one will be discarded.\n"  );
700           } catch (gate.util.InvalidOffsetException ex) {
701             throw new GateRuntimeException(ex.getMessage());
702           }
703         }// End if
704       }// End while
705     }// End if
706 
707     //kalina: order the dumping list by start offset
708     Collections.sort(dumpingList, new gate.util.OffsetComparator());
709 
710     // The dumpingSet is ready to be exported as XML
711     // Here we go.
712     if(sListener != null) sListener.statusChanged("Dumping annotations as XML");
713     StringBuffer xmlDoc = new StringBuffer(
714           DOC_SIZE_MULTIPLICATION_FACTOR*(this.getContent().size().intValue()));
715 
716     // Add xml header if original format was xml
717     String mimeType = getFeatures() == null ?
718                       null :
719                       (String)getFeatures().get("MimeType");
720     boolean wasXML = mimeType != null && mimeType.equalsIgnoreCase("text/xml");
721 
722     if(wasXML){
723       xmlDoc.append("<?xml version=\"1.0\" encoding=\"");
724       xmlDoc.append(getEncoding());
725       xmlDoc.append("\" ?>");
726       xmlDoc.append(Strings.getNl());
727     }// ENd if
728     // Identify and extract the root annotation from the dumpingSet.
729     theRootAnnotation = identifyTheRootAnnotation(dumpingList);
730     // If a root annotation has been identified then add it eplicitley at the
731     // beginning of the document
732     if (theRootAnnotation != null){
733       dumpingList.remove(theRootAnnotation);
734       xmlDoc.append(writeStartTag(theRootAnnotation,includeFeatures));
735     }// End if
736     // Construct and append the rest of the document
737     xmlDoc.append(saveAnnotationSetAsXml(dumpingList, includeFeatures));
738     // If a root annotation has been identified then add it eplicitley at the
739     // end of the document
740     if (theRootAnnotation != null){
741       xmlDoc.append(writeEndTag(theRootAnnotation));
742     }// End if
743 
744     if(sListener != null) sListener.statusChanged("Done.");
745     return xmlDoc.toString();
746   }//End toXml()
747 
748   /** This method verifies if aSourceAnnotation can ve inserted safety into the
749     * aTargetAnnotSet. Safety means that it doesn't violate the crossed over
750     * contition with any annotation from the aTargetAnnotSet.
751     * @param aTargetAnnotSet the annotation set to include the aSourceAnnotation
752     * @param aSourceAnnotation the annotation to be inserted into the
753     * aTargetAnnotSet
754     * @return true if the annotation inserts safety, or false otherwise.
755     */
756   private boolean insertsSafety(AnnotationSet aTargetAnnotSet,
757                                                 Annotation aSourceAnnotation){
758 
759     if (aTargetAnnotSet == null || aSourceAnnotation == null) {
760       this.crossedOverAnnotation = null;
761       return false;
762     }
763     if (aSourceAnnotation.getStartNode() == null ||
764         aSourceAnnotation.getStartNode().getOffset()== null) {
765       this.crossedOverAnnotation = null;
766       return false;
767     }
768     if (aSourceAnnotation.getEndNode() == null ||
769         aSourceAnnotation.getEndNode().getOffset()== null) {
770       this.crossedOverAnnotation = null;
771       return false;
772     }
773 
774     // Get the start and end offsets
775     Long start = aSourceAnnotation.getStartNode().getOffset();
776     Long end =   aSourceAnnotation.getEndNode().getOffset();
777     // Read aSourceAnnotation offsets long
778     long s2 = start.longValue();
779     long e2 = end.longValue();
780 
781     // Obtain a set with all annotations annotations that overlap
782     // totaly or partially with the interval defined by the two provided offsets
783     AnnotationSet as = aTargetAnnotSet.get(start,end);
784 
785     // Investigate all the annotations from as to see if there is one that
786     // comes in conflict with aSourceAnnotation
787     Iterator it = as.iterator();
788     while(it.hasNext()){
789       Annotation ann = (Annotation) it.next();
790       // Read ann offsets
791       long s1 = ann.getStartNode().getOffset().longValue();
792       long e1 = ann.getEndNode().getOffset().longValue();
793 
794       if (s1<s2 && s2<e1 && e1<e2) {
795         this.crossedOverAnnotation = ann;
796         return false;
797       }
798       if (s2<s1 && s1<e2 && e2<e1) {
799         this.crossedOverAnnotation = ann;
800         return false;
801       }
802     }// End while
803     return true;
804   }// insertsSafety()
805 
806   private boolean insertsSafety(List aTargetAnnotList,
807                                                 Annotation aSourceAnnotation){
808 
809     if (aTargetAnnotList == null || aSourceAnnotation == null) {
810       this.crossedOverAnnotation = null;
811       return false;
812     }
813     if (aSourceAnnotation.getStartNode() == null ||
814         aSourceAnnotation.getStartNode().getOffset()== null) {
815       this.crossedOverAnnotation = null;
816       return false;
817     }
818     if (aSourceAnnotation.getEndNode() == null ||
819         aSourceAnnotation.getEndNode().getOffset()== null) {
820       this.crossedOverAnnotation = null;
821       return false;
822     }
823 
824     // Get the start and end offsets
825     Long start = aSourceAnnotation.getStartNode().getOffset();
826     Long end =   aSourceAnnotation.getEndNode().getOffset();
827     // Read aSourceAnnotation offsets long
828     long s2 = start.longValue();
829     long e2 = end.longValue();
830 
831     // Obtain a set with all annotations annotations that overlap
832     // totaly or partially with the interval defined by the two provided offsets
833     List as = new ArrayList();
834     for (int i=0; i < aTargetAnnotList.size(); i++) {
835       Annotation annot = (Annotation) aTargetAnnotList.get(i);
836       if (annot.getStartNode().getOffset().longValue() >= s2
837           &&
838           annot.getStartNode().getOffset().longValue() <= e2)
839         as.add(annot);
840       else if (annot.getEndNode().getOffset().longValue() >= s2
841           &&
842           annot.getEndNode().getOffset().longValue() <= e2)
843         as.add(annot);
844     }
845 
846     // Investigate all the annotations from as to see if there is one that
847     // comes in conflict with aSourceAnnotation
848     Iterator it = as.iterator();
849     while(it.hasNext()){
850       Annotation ann = (Annotation) it.next();
851       // Read ann offsets
852       long s1 = ann.getStartNode().getOffset().longValue();
853       long e1 = ann.getEndNode().getOffset().longValue();
854 
855       if (s1<s2 && s2<e1 && e1<e2) {
856         this.crossedOverAnnotation = ann;
857         return false;
858       }
859       if (s2<s1 && s1<e2 && e2<e1) {
860         this.crossedOverAnnotation = ann;
861         return false;
862       }
863     }// End while
864     return true;
865   }// insertsSafety()
866 
867   /** This method saves all the annotations from aDumpAnnotSet and combines
868     * them with the document content.
869     * @param aDumpAnnotationSet is a GATE annotation set prepared to be used
870     * on the raw text from document content. If aDumpAnnotSet is <b>null<b>
871     * then an empty string will be returned.
872     * @param includeFeatures is a boolean, which controls whether the annotation
873     * features and gate ID are included or not.
874     * @return The XML document obtained from raw text + the information from
875     * the dump annotation set.
876     */
877   private String saveAnnotationSetAsXml(AnnotationSet aDumpAnnotSet,
878                                         boolean includeFeatures){
879     String content = null;
880     if (this.getContent()== null)
881       content = new String("");
882     else
883       content = this.getContent().toString();
884     StringBuffer docContStrBuff = filterNonXmlChars(new StringBuffer(content));
885     if (aDumpAnnotSet == null)   return docContStrBuff.toString();
886 
887     TreeMap offsets2CharsMap = new TreeMap();
888     if (this.getContent().size().longValue() != 0){
889       // Fill the offsets2CharsMap with all the indices where
890       // special chars appear
891       buildEntityMapFromString(content,offsets2CharsMap);
892     }//End if
893     // The saving alghorithm is as follows:
894     ///////////////////////////////////////////
895     // Construct a set of annot with all IDs in asc order.
896     // All annotations that end at that offset swap their place in descending
897     // order. For each node write all the tags from left to right.
898 
899     // Construct the node set
900     TreeSet offsets = new TreeSet();
901     Iterator iter = aDumpAnnotSet.iterator();
902     while (iter.hasNext()){
903       Annotation annot = (Annotation) iter.next();
904       offsets.add(annot.getStartNode().getOffset());
905       offsets.add(annot.getEndNode().getOffset());
906     }// End while
907 
908     // ofsets is sorted in ascending order.
909     // Iterate this set in descending order and remove an offset at each
910     // iteration
911     while (!offsets.isEmpty()){
912       Long offset = (Long)offsets.last();
913       // Remove the offset from the set
914       offsets.remove(offset);
915       // Now, use it.
916       // Returns a list with annotations that needs to be serialized in that
917       // offset.
918       List annotations = getAnnotationsForOffset(aDumpAnnotSet,offset);
919       // Attention: the annotation are serialized from left to right
920 //      StringBuffer tmpBuff = new StringBuffer("");
921       StringBuffer tmpBuff = new StringBuffer(
922           DOC_SIZE_MULTIPLICATION_FACTOR*(this.getContent().size().intValue()));
923       Stack stack = new Stack();
924       // Iterate through all these annotations and serialize them
925       Iterator it = annotations.iterator();
926       while(it.hasNext()){
927         Annotation a = (Annotation) it.next();
928         it.remove();
929         // Test if a Ends at offset
930         if ( offset.equals(a.getEndNode().getOffset()) ){
931           // Test if a Starts at offset
932           if ( offset.equals(a.getStartNode().getOffset()) ){
933             // Here, the annotation a Starts and Ends at the offset
934             if ( null != a.getFeatures().get("isEmptyAndSpan") &&
935                  "true".equals((String)a.getFeatures().get("isEmptyAndSpan"))){
936 
937               // Assert: annotation a with start == end and isEmptyAndSpan
938               tmpBuff.append(writeStartTag(a, includeFeatures));
939               stack.push(a);
940             }else{
941               // Assert annotation a with start == end and an empty tag
942               tmpBuff.append(writeEmptyTag(a));
943               // The annotation is removed from dumped set
944               aDumpAnnotSet.remove(a);
945             }// End if
946           }else{
947             // Here the annotation a Ends at the offset.
948             // In this case empty the stack and write the end tag
949             if (!stack.isEmpty()){
950               while(!stack.isEmpty()){
951                 Annotation a1 = (Annotation)stack.pop();
952                 tmpBuff.append(writeEndTag(a1));
953               }// End while
954             }// End if
955             tmpBuff.append(writeEndTag(a));
956           }// End if
957         }else{
958           // The annotation a does NOT end at the offset. Let's see if it starts
959           // at the offset
960           if ( offset.equals(a.getStartNode().getOffset()) ){
961             // The annotation a starts at the offset.
962             // In this case empty the stack and write the end tag
963             if (!stack.isEmpty()){
964               while(!stack.isEmpty()){
965                 Annotation a1 = (Annotation)stack.pop();
966                 tmpBuff.append(writeEndTag(a1));
967               }// End while
968             }// End if
969             tmpBuff.append(writeStartTag(a, includeFeatures));
970             // The annotation is removed from dumped set
971             aDumpAnnotSet.remove(a);
972           }// End if ( offset.equals(a.getStartNode().getOffset()) )
973         }// End if ( offset.equals(a.getEndNode().getOffset()) )
974       }// End while(it.hasNext()){
975 
976       // In this case empty the stack and write the end tag
977       if (!stack.isEmpty()){
978         while(!stack.isEmpty()){
979           Annotation a1 = (Annotation)stack.pop();
980           tmpBuff.append(writeEndTag(a1));
981         }// End while
982       }// End if
983 
984       // Before inserting tmpBuff into docContStrBuff we need to check
985       // if there are chars to be replaced and if there are, they would be
986       // replaced.
987       if (!offsets2CharsMap.isEmpty()){
988         Long offsChar = (Long) offsets2CharsMap.lastKey();
989         while( !offsets2CharsMap.isEmpty() &&
990                        offsChar.intValue() >= offset.intValue()){
991           // Replace the char at offsChar with its corresponding entity form
992           // the entitiesMap.
993           docContStrBuff.replace(offsChar.intValue(),offsChar.intValue()+1,
994           (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
995           // Discard the offsChar after it was used.
996           offsets2CharsMap.remove(offsChar);
997           // Investigate next offsChar
998           if (!offsets2CharsMap.isEmpty())
999             offsChar = (Long) offsets2CharsMap.lastKey();
1000        }// End while
1001      }// End if
1002      // Insert tmpBuff to the location where it belongs in docContStrBuff
1003      docContStrBuff.insert(offset.intValue(),tmpBuff.toString());
1004    }// End while(!offsets.isEmpty())
1005    // Need to replace the entities in the remaining text, if there is any text
1006    // So, if there are any more items in offsets2CharsMap they need to be
1007    // replaced
1008    while (!offsets2CharsMap.isEmpty()){
1009      Long offsChar = (Long) offsets2CharsMap.lastKey();
1010      // Replace the char with its entity
1011      docContStrBuff.replace(offsChar.intValue(),offsChar.intValue()+1,
1012      (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
1013      // remove the offset from the map
1014      offsets2CharsMap.remove(offsChar);
1015    }// End while
1016    return docContStrBuff.toString();
1017  }// saveAnnotationSetAsXml()
1018
1019  private String saveAnnotationSetAsXml(List aDumpAnnotList,
1020                                        boolean includeFeatures){
1021    String content = null;
1022    if (this.getContent()== null)
1023      content = new String("");
1024    else
1025      content = this.getContent().toString();
1026    StringBuffer docContStrBuff = filterNonXmlChars(new StringBuffer(content));
1027    if (aDumpAnnotList == null)   return docContStrBuff.toString();
1028
1029    StringBuffer resultStrBuff = new StringBuffer(
1030        DOC_SIZE_MULTIPLICATION_FACTOR*(this.getContent().size().intValue()));
1031    // last offset position used to extract portions of text
1032    Long lastOffset = new Long(0);
1033
1034    TreeMap offsets2CharsMap = new TreeMap();
1035    HashMap annotsForOffset = new HashMap(100);
1036    if (this.getContent().size().longValue() != 0){
1037      // Fill the offsets2CharsMap with all the indices where
1038      // special chars appear
1039      buildEntityMapFromString(content,offsets2CharsMap);
1040    }//End if
1041    // The saving alghorithm is as follows:
1042    ///////////////////////////////////////////
1043    // Construct a set of annot with all IDs in asc order.
1044    // All annotations that end at that offset swap their place in descending
1045    // order. For each node write all the tags from left to right.
1046
1047    // Construct the node set
1048    TreeSet offsets = new TreeSet();
1049    Iterator iter = aDumpAnnotList.iterator();
1050    Annotation annot;
1051    Long start;
1052    Long end;
1053    while (iter.hasNext()){
1054      annot = (Annotation) iter.next();
1055      start = annot.getStartNode().getOffset();
1056      end = annot.getEndNode().getOffset();
1057      offsets.add(start);
1058      offsets.add(end);
1059      if (annotsForOffset.containsKey(start)) {
1060        ((List) annotsForOffset.get(start)).add(annot);
1061      } else {
1062        List newList = new ArrayList(10);
1063        newList.add(annot);
1064        annotsForOffset.put(start, newList);
1065      }
1066      if (annotsForOffset.containsKey(end)) {
1067        ((List) annotsForOffset.get(end)).add(annot);
1068      } else {
1069        List newList = new ArrayList(10);
1070        newList.add(annot);
1071        annotsForOffset.put(end, newList);
1072      }
1073    }// End while
1074
1075    // ofsets is sorted in ascending order.
1076    // Iterate this set in descending order and remove an offset at each
1077    // iteration
1078    Iterator offsetIt = offsets.iterator();
1079    Long offset;
1080    List annotations;
1081    // This don't have to be a large buffer - just for tags
1082    StringBuffer tmpBuff = new StringBuffer(255);
1083    Stack stack = new Stack();
1084    while (offsetIt.hasNext()){
1085      offset = (Long)offsetIt.next();
1086      // Now, use it.
1087      // Returns a list with annotations that needs to be serialized in that
1088      // offset.
1089      annotations = (List) annotsForOffset.get(offset);
1090      // order annotations in list for offset to print tags in correct order
1091      annotations = getAnnotationsForOffset(annotations, offset);
1092      // clear structures
1093      tmpBuff.setLength(0);
1094      stack.clear();
1095
1096      // Iterate through all these annotations and serialize them
1097      Iterator it = annotations.iterator();
1098      Annotation a;
1099      Annotation annStack;
1100      while(it.hasNext()){
1101        a = (Annotation) it.next();
1102        // Test if a Ends at offset
1103        if ( offset.equals(a.getEndNode().getOffset()) ){
1104          // Test if a Starts at offset
1105          if ( offset.equals(a.getStartNode().getOffset()) ){
1106            // Here, the annotation a Starts and Ends at the offset
1107            if ( null != a.getFeatures().get("isEmptyAndSpan") &&
1108                 "true".equals((String)a.getFeatures().get("isEmptyAndSpan"))){
1109
1110              // Assert: annotation a with start == end and isEmptyAndSpan
1111              tmpBuff.append(writeStartTag(a, includeFeatures));
1112              stack.push(a);
1113            }else{
1114              // Assert annotation a with start == end and an empty tag
1115              tmpBuff.append(writeEmptyTag(a));
1116              // The annotation is removed from dumped set
1117              aDumpAnnotList.remove(a);
1118            }// End if
1119          }else{
1120            // Here the annotation a Ends at the offset.
1121            // In this case empty the stack and write the end tag
1122            if (!stack.isEmpty()){
1123              while(!stack.isEmpty()){
1124                annStack = (Annotation)stack.pop();
1125                tmpBuff.append(writeEndTag(annStack));
1126              }// End while
1127            }// End if
1128            tmpBuff.append(writeEndTag(a));
1129          }// End if
1130        }else{
1131          // The annotation a does NOT end at the offset. Let's see if it starts
1132          // at the offset
1133          if ( offset.equals(a.getStartNode().getOffset()) ){
1134            // The annotation a starts at the offset.
1135            // In this case empty the stack and write the end tag
1136            if (!stack.isEmpty()){
1137              while(!stack.isEmpty()){
1138                annStack = (Annotation)stack.pop();
1139                tmpBuff.append(writeEndTag(annStack));
1140              }// End while
1141            }// End if
1142            tmpBuff.append(writeStartTag(a, includeFeatures));
1143            // The annotation is removed from dumped set
1144          }// End if ( offset.equals(a.getStartNode().getOffset()) )
1145        }// End if ( offset.equals(a.getEndNode().getOffset()) )
1146      }// End while(it.hasNext()){
1147
1148      // In this case empty the stack and write the end tag
1149      if (!stack.isEmpty()){
1150        while(!stack.isEmpty()){
1151          annStack = (Annotation)stack.pop();
1152          tmpBuff.append(writeEndTag(annStack));
1153        }// End while
1154      }// End if
1155
1156      // extract text from content and replace spec chars
1157      StringBuffer partText = new StringBuffer();
1158      SortedMap offsetsInRange =
1159          offsets2CharsMap.subMap(lastOffset, offset);
1160      Long tmpOffset;
1161      Long tmpLastOffset = lastOffset;
1162      String replacement;
1163
1164      // Before inserting tmpBuff into the buffer we need to check
1165      // if there are chars to be replaced in range
1166      if(!offsetsInRange.isEmpty()) {
1167        tmpOffset = (Long) offsetsInRange.firstKey();
1168        replacement =
1169            (String)entitiesMap.get((Character)offsets2CharsMap.get(tmpOffset));
1170        partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1171                                               tmpOffset.intValue()));
1172        partText.append(replacement);
1173        tmpLastOffset = new Long(tmpOffset.longValue()+1);
1174      }
1175      partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1176                                               offset.intValue()));
1177      resultStrBuff.append(partText);
1178      // Insert tmpBuff to the result string
1179      resultStrBuff.append(tmpBuff.toString());
1180      lastOffset = offset;
1181    }// End while(!offsets.isEmpty())
1182
1183    // get text to the end of content
1184    // extract text from content and replace spec chars
1185    StringBuffer partText = new StringBuffer();
1186    SortedMap offsetsInRange =
1187        offsets2CharsMap.subMap(lastOffset, new Long(docContStrBuff.length()));
1188    Long tmpOffset;
1189    Long tmpLastOffset = lastOffset;
1190    String replacement;
1191
1192    // Need to replace the entities in the remaining text, if there is any text
1193    // So, if there are any more items in offsets2CharsMap for remaining text
1194    // they need to be replaced
1195    if(!offsetsInRange.isEmpty()) {
1196      tmpOffset = (Long) offsetsInRange.firstKey();
1197      replacement =
1198          (String)entitiesMap.get((Character)offsets2CharsMap.get(tmpOffset));
1199      partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1200                                             tmpOffset.intValue()));
1201      partText.append(replacement);
1202      tmpLastOffset = new Long(tmpOffset.longValue()+1);
1203    }
1204    partText.append(docContStrBuff.substring(tmpLastOffset.intValue(),
1205                                             docContStrBuff.length()));
1206    resultStrBuff.append(partText);
1207
1208    return resultStrBuff.toString();
1209  }// saveAnnotationSetAsXml()
1210
1211/* Old method created by Cristian. Create content backward.
1212
1213    private String saveAnnotationSetAsXml(List aDumpAnnotList,
1214                                          boolean includeFeatures){
1215      String content = null;
1216      if (this.getContent()== null)
1217        content = new String("");
1218      else
1219        content = this.getContent().toString();
1220      StringBuffer docContStrBuff = filterNonXmlChars(new StringBuffer(content));
1221      if (aDumpAnnotList == null)   return docContStrBuff.toString();
1222
1223      TreeMap offsets2CharsMap = new TreeMap();
1224      HashMap annotsForOffset = new HashMap(100);
1225      if (this.getContent().size().longValue() != 0){
1226        // Fill the offsets2CharsMap with all the indices where
1227        // special chars appear
1228        buildEntityMapFromString(content,offsets2CharsMap);
1229      }//End if
1230      // The saving alghorithm is as follows:
1231      ///////////////////////////////////////////
1232      // Construct a set of annot with all IDs in asc order.
1233      // All annotations that end at that offset swap their place in descending
1234      // order. For each node write all the tags from left to right.
1235
1236      // Construct the node set
1237      TreeSet offsets = new TreeSet();
1238      Iterator iter = aDumpAnnotList.iterator();
1239      while (iter.hasNext()){
1240        Annotation annot = (Annotation) iter.next();
1241        offsets.add(annot.getStartNode().getOffset());
1242        offsets.add(annot.getEndNode().getOffset());
1243        if (annotsForOffset.containsKey(annot.getStartNode().getOffset())) {
1244          ((List) annotsForOffset.get(annot.getStartNode().getOffset())).add(annot);
1245        } else {
1246          List newList = new ArrayList(10);
1247          newList.add(annot);
1248          annotsForOffset.put(annot.getStartNode().getOffset(), newList);
1249        }
1250        if (annotsForOffset.containsKey(annot.getEndNode().getOffset())) {
1251          ((List) annotsForOffset.get(annot.getEndNode().getOffset())).add(annot);
1252        } else {
1253          List newList = new ArrayList(10);
1254          newList.add(annot);
1255          annotsForOffset.put(annot.getEndNode().getOffset(), newList);
1256        }
1257      }// End while
1258
1259      // ofsets is sorted in ascending order.
1260      // Iterate this set in descending order and remove an offset at each
1261      // iteration
1262      while (!offsets.isEmpty()){
1263        Long offset = (Long)offsets.last();
1264        // Remove the offset from the set
1265        offsets.remove(offset);
1266        // Now, use it.
1267        // Returns a list with annotations that needs to be serialized in that
1268        // offset.
1269//      List annotations = getAnnotationsForOffset(aDumpAnnotList,offset);
1270        List annotations = (List) annotsForOffset.get(offset);
1271        annotations = getAnnotationsForOffset(annotations,offset);
1272        // Attention: the annotation are serialized from left to right
1273//      StringBuffer tmpBuff = new StringBuffer("");
1274        StringBuffer tmpBuff = new StringBuffer(
1275            DOC_SIZE_MULTIPLICATION_FACTOR*(this.getContent().size().intValue()));
1276        Stack stack = new Stack();
1277        // Iterate through all these annotations and serialize them
1278        Iterator it = annotations.iterator();
1279        while(it.hasNext()){
1280          Annotation a = (Annotation) it.next();
1281          it.remove();
1282          // Test if a Ends at offset
1283          if ( offset.equals(a.getEndNode().getOffset()) ){
1284            // Test if a Starts at offset
1285            if ( offset.equals(a.getStartNode().getOffset()) ){
1286              // Here, the annotation a Starts and Ends at the offset
1287              if ( null != a.getFeatures().get("isEmptyAndSpan") &&
1288                   "true".equals((String)a.getFeatures().get("isEmptyAndSpan"))){
1289
1290                // Assert: annotation a with start == end and isEmptyAndSpan
1291                tmpBuff.append(writeStartTag(a, includeFeatures));
1292                stack.push(a);
1293              }else{
1294                // Assert annotation a with start == end and an empty tag
1295                tmpBuff.append(writeEmptyTag(a));
1296                // The annotation is removed from dumped set
1297                aDumpAnnotList.remove(a);
1298              }// End if
1299            }else{
1300              // Here the annotation a Ends at the offset.
1301              // In this case empty the stack and write the end tag
1302              if (!stack.isEmpty()){
1303                while(!stack.isEmpty()){
1304                  Annotation a1 = (Annotation)stack.pop();
1305                  tmpBuff.append(writeEndTag(a1));
1306                }// End while
1307              }// End if
1308              tmpBuff.append(writeEndTag(a));
1309            }// End if
1310          }else{
1311            // The annotation a does NOT end at the offset. Let's see if it starts
1312            // at the offset
1313            if ( offset.equals(a.getStartNode().getOffset()) ){
1314              // The annotation a starts at the offset.
1315              // In this case empty the stack and write the end tag
1316              if (!stack.isEmpty()){
1317                while(!stack.isEmpty()){
1318                  Annotation a1 = (Annotation)stack.pop();
1319                  tmpBuff.append(writeEndTag(a1));
1320                }// End while
1321              }// End if
1322              tmpBuff.append(writeStartTag(a, includeFeatures));
1323              // The annotation is removed from dumped set
1324              aDumpAnnotList.remove(a);
1325            }// End if ( offset.equals(a.getStartNode().getOffset()) )
1326          }// End if ( offset.equals(a.getEndNode().getOffset()) )
1327        }// End while(it.hasNext()){
1328
1329        // In this case empty the stack and write the end tag
1330        if (!stack.isEmpty()){
1331          while(!stack.isEmpty()){
1332            Annotation a1 = (Annotation)stack.pop();
1333            tmpBuff.append(writeEndTag(a1));
1334          }// End while
1335        }// End if
1336
1337        // Before inserting tmpBuff into docContStrBuff we need to check
1338        // if there are chars to be replaced and if there are, they would be
1339        // replaced.
1340        if (!offsets2CharsMap.isEmpty()){
1341          Long offsChar = (Long) offsets2CharsMap.lastKey();
1342          while( !offsets2CharsMap.isEmpty() &&
1343                         offsChar.intValue() >= offset.intValue()){
1344            // Replace the char at offsChar with its corresponding entity form
1345            // the entitiesMap.
1346            docContStrBuff.replace(offsChar.intValue(),offsChar.intValue()+1,
1347            (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
1348            // Discard the offsChar after it was used.
1349            offsets2CharsMap.remove(offsChar);
1350            // Investigate next offsChar
1351            if (!offsets2CharsMap.isEmpty())
1352              offsChar = (Long) offsets2CharsMap.lastKey();
1353          }// End while
1354        }// End if
1355        // Insert tmpBuff to the location where it belongs in docContStrBuff
1356        docContStrBuff.insert(offset.intValue(),tmpBuff.toString());
1357      }// End while(!offsets.isEmpty())
1358      // Need to replace the entities in the remaining text, if there is any text
1359      // So, if there are any more items in offsets2CharsMap they need to be
1360      // replaced
1361      while (!offsets2CharsMap.isEmpty()){
1362        Long offsChar = (Long) offsets2CharsMap.lastKey();
1363        // Replace the char with its entity
1364        docContStrBuff.replace(offsChar.intValue(),offsChar.intValue()+1,
1365        (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
1366        // remove the offset from the map
1367        offsets2CharsMap.remove(offsChar);
1368      }// End while
1369      return docContStrBuff.toString();
1370    }// saveAnnotationSetAsXml()
1371*/
1372
1373  /**
1374   *  Return true only if the document has features for original content and
1375   *  repositioning information.
1376   */
1377  private boolean hasOriginalContentFeatures() {
1378    FeatureMap features = getFeatures();
1379    boolean result = false;
1380
1381    result =
1382    (features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME) != null)
1383      &&
1384    (features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME)
1385      != null);
1386
1387    return result;
1388  } // hasOriginalContentFeatures
1389
1390  /** This method saves all the annotations from aDumpAnnotSet and combines
1391    * them with the original document content, if preserved as feature.
1392    * @param aDumpAnnotationSet is a GATE annotation set prepared to be used
1393    * on the raw text from document content. If aDumpAnnotSet is <b>null<b>
1394    * then an empty string will be returned.
1395    * @param includeFeatures is a boolean, which controls whether the annotation
1396    * features and gate ID are included or not.
1397    * @return The XML document obtained from raw text + the information from
1398    * the dump annotation set.
1399    */
1400  private String saveAnnotationSetAsXmlInOrig(Set aSourceAnnotationSet,
1401                                        boolean includeFeatures){
1402    StringBuffer docContStrBuff;
1403
1404    String origContent;
1405
1406    origContent =
1407     (String)features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
1408    if(origContent == null) {
1409      origContent = "";
1410    } // if
1411
1412    long originalContentSize = origContent.length();
1413
1414    RepositioningInfo repositioning = (RepositioningInfo)
1415      getFeatures().get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);
1416
1417    docContStrBuff = new StringBuffer(origContent);
1418    if (aSourceAnnotationSet == null) return docContStrBuff.toString();
1419
1420    StatusListener sListener = (StatusListener)
1421                               gate.gui.MainFrame.getListeners().
1422                               get("gate.event.StatusListener");
1423
1424    AnnotationSet originalMarkupsAnnotSet =
1425            this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
1426    // Create a dumping annotation set on the document. It will be used for
1427    // dumping annotations...
1428    AnnotationSet dumpingSet = new AnnotationSetImpl((Document) this);
1429    if(sListener != null)
1430      sListener.statusChanged("Constructing the dumping annotation set.");
1431    // Then take all the annotations from aSourceAnnotationSet and verify if
1432    // they can be inserted safely into the dumpingSet. Where not possible,
1433    // report.
1434    if (aSourceAnnotationSet != null){
1435      Iterator iter = aSourceAnnotationSet.iterator();
1436      Annotation currentAnnot;
1437      while (iter.hasNext()){
1438        currentAnnot = (Annotation) iter.next();
1439        if(insertsSafety(originalMarkupsAnnotSet, currentAnnot)
1440            && insertsSafety(dumpingSet, currentAnnot)){
1441          dumpingSet.add(currentAnnot);
1442        }else{
1443          Out.prln("Warning: Annotation with ID=" + currentAnnot.getId() +
1444          ", startOffset=" + currentAnnot.getStartNode().getOffset() +
1445          ", endOffset=" + currentAnnot.getEndNode().getOffset() +
1446          ", type=" + currentAnnot.getType()+ " was found to violate the" +
1447          " crossed over condition. It will be discarded");
1448        }// End if
1449      }// End while
1450    }// End if
1451
1452    // The dumpingSet is ready to be exported as XML
1453    // Here we go.
1454    if(sListener != null) sListener.statusChanged("Dumping annotations as XML");
1455
1456    ///////////////////////////////////////////
1457    // Construct a set of annot with all IDs in asc order.
1458    // All annotations that end at that offset swap their place in descending
1459    // order. For each node write all the tags from left to right.
1460
1461    // Construct the node set
1462    TreeSet offsets = new TreeSet();
1463    Iterator iter = aSourceAnnotationSet.iterator();
1464    while (iter.hasNext()){
1465      Annotation annot = (Annotation) iter.next();
1466      offsets.add(annot.getStartNode().getOffset());
1467      offsets.add(annot.getEndNode().getOffset());
1468    }// End while
1469
1470    // ofsets is sorted in ascending order.
1471    // Iterate this set in descending order and remove an offset at each
1472    // iteration
1473    while (!offsets.isEmpty()){
1474      Long offset = (Long)offsets.last();
1475      // Remove the offset from the set
1476      offsets.remove(offset);
1477      // Now, use it.
1478      // Returns a list with annotations that needs to be serialized in that
1479      // offset.
1480      List annotations = getAnnotationsForOffset(aSourceAnnotationSet,offset);
1481      // Attention: the annotation are serialized from left to right
1482      StringBuffer tmpBuff = new StringBuffer("");
1483      Stack stack = new Stack();
1484      // Iterate through all these annotations and serialize them
1485      Iterator it = annotations.iterator();
1486      Annotation a = null;
1487      while(it.hasNext()) {
1488        a = (Annotation) it.next();
1489        it.remove();
1490        // Test if a Ends at offset
1491        if ( offset.equals(a.getEndNode().getOffset()) ){
1492          // Test if a Starts at offset
1493          if ( offset.equals(a.getStartNode().getOffset()) ){
1494            // Here, the annotation a Starts and Ends at the offset
1495            if ( null != a.getFeatures().get("isEmptyAndSpan") &&
1496                 "true".equals((String)a.getFeatures().get("isEmptyAndSpan"))){
1497
1498              // Assert: annotation a with start == end and isEmptyAndSpan
1499              tmpBuff.append(writeStartTag(a, includeFeatures, false));
1500              stack.push(a);
1501            }else{
1502              // Assert annotation a with start == end and an empty tag
1503              tmpBuff.append(writeEmptyTag(a, false));
1504              // The annotation is removed from dumped set
1505              aSourceAnnotationSet.remove(a);
1506            }// End if
1507          }else{
1508            // Here the annotation a Ends at the offset.
1509            // In this case empty the stack and write the end tag
1510            while(!stack.isEmpty()){
1511              Annotation a1 = (Annotation)stack.pop();
1512              tmpBuff.append(writeEndTag(a1));
1513            }// End while
1514            tmpBuff.append(writeEndTag(a));
1515          }// End if
1516        }else{
1517          // The annotation a does NOT end at the offset. Let's see if it starts
1518          // at the offset
1519          if ( offset.equals(a.getStartNode().getOffset()) ){
1520            // The annotation a starts at the offset.
1521            // In this case empty the stack and write the end tag
1522            while(!stack.isEmpty()){
1523              Annotation a1 = (Annotation)stack.pop();
1524              tmpBuff.append(writeEndTag(a1));
1525            }// End while
1526
1527            tmpBuff.append(writeStartTag(a, includeFeatures, false));
1528            // The annotation is removed from dumped set
1529            aSourceAnnotationSet.remove(a);
1530          }// End if ( offset.equals(a.getStartNode().getOffset()) )
1531        }// End if ( offset.equals(a.getEndNode().getOffset()) )
1532      }// End while(it.hasNext()){
1533
1534      // In this case empty the stack and write the end tag
1535      while(!stack.isEmpty()){
1536        Annotation a1 = (Annotation)stack.pop();
1537        tmpBuff.append(writeEndTag(a1));
1538      }// End while
1539
1540      long originalPosition = -1;
1541      boolean backPositioning =
1542        a != null && offset.equals(a.getEndNode().getOffset());
1543      if ( backPositioning ) {
1544        // end of the annotation correction
1545        originalPosition =
1546          repositioning.getOriginalPos(offset.intValue(), true);
1547      } // if
1548
1549      if(originalPosition == -1) {
1550        originalPosition = repositioning.getOriginalPos(offset.intValue());
1551      } // if
1552
1553      // Insert tmpBuff to the location where it belongs in docContStrBuff
1554      if(originalPosition != -1 && originalPosition <= originalContentSize ) {
1555        docContStrBuff.insert((int) originalPosition, tmpBuff.toString());
1556      }
1557      else {
1558        Out.prln("Error in the repositioning. The offset ("+offset.intValue()
1559        +") could not be positioned in the original document. \n"
1560        +"Calculated position is: "+originalPosition
1561        +" placed back: "+backPositioning);
1562      } // if
1563
1564    }// End while(!offsets.isEmpty())
1565    if (theRootAnnotation != null)
1566      docContStrBuff.append(writeEndTag(theRootAnnotation));
1567    return docContStrBuff.toString();
1568  } // saveAnnotationSetAsXmlInOrig()
1569
1570  /** This method returns a list with annotations ordered that way that
1571    * they can be serialized from left to right, at the offset. If one of the
1572    * params is null then an empty list will be returned.
1573    * @param aDumpAnnotSet is a set containing all annotations that will be
1574    * dumped.
1575    * @param offset represent the offset at witch the annotation must start
1576    * AND/OR end.
1577    * @return a list with those annotations that need to be serialized.
1578    */
1579  private List getAnnotationsForOffset(Set aDumpAnnotSet, Long offset){
1580    List annotationList = new LinkedList();
1581    if (aDumpAnnotSet == null || offset == null) return annotationList;
1582    Set annotThatStartAtOffset = new TreeSet(
1583                          new AnnotationComparator(ORDER_ON_END_OFFSET,DESC));
1584    Set annotThatEndAtOffset = new TreeSet(
1585                          new AnnotationComparator(ORDER_ON_START_OFFSET,DESC));
1586    Set annotThatStartAndEndAtOffset = new TreeSet(
1587                          new AnnotationComparator(ORDER_ON_ANNOT_ID,ASC));
1588
1589    // Fill these tree lists with annotation tat start, end or start and
1590    // end at the offset.
1591    Iterator iter = aDumpAnnotSet.iterator();
1592    while(iter.hasNext()){
1593      Annotation ann = (Annotation) iter.next();
1594      if (offset.equals(ann.getStartNode().getOffset())){
1595        if (offset.equals(ann.getEndNode().getOffset()))
1596          annotThatStartAndEndAtOffset.add(ann);
1597        else
1598          annotThatStartAtOffset.add(ann);
1599      }else{
1600        if (offset.equals(ann.getEndNode().getOffset()))
1601          annotThatEndAtOffset.add(ann);
1602      }// End if
1603    }// End while
1604    annotationList.addAll(annotThatEndAtOffset);
1605    annotThatEndAtOffset = null;
1606    annotationList.addAll(annotThatStartAtOffset);
1607    annotThatStartAtOffset = null;
1608    iter = annotThatStartAndEndAtOffset.iterator();
1609    while(iter.hasNext()){
1610      Annotation ann = (Annotation) iter.next();
1611      Iterator it = annotationList.iterator();
1612      boolean breaked = false;
1613      while (it.hasNext()){
1614        Annotation annFromList = (Annotation) it.next();
1615        if (annFromList.getId().intValue() > ann.getId().intValue()){
1616          annotationList.add(annotationList.indexOf(annFromList),ann);
1617          breaked = true;
1618          break;
1619        }// End if
1620      }// End while
1621      if (!breaked)
1622        annotationList.add(ann);
1623      iter.remove();
1624    }// End while
1625    return annotationList;
1626  }// getAnnotationsForOffset()
1627
1628  private List getAnnotationsForOffset(List aDumpAnnotList, Long offset){
1629    List annotationList = new ArrayList();
1630    if (aDumpAnnotList == null || offset == null) return annotationList;
1631    Set annotThatStartAtOffset;
1632    Set annotThatEndAtOffset;
1633    Set annotThatStartAndEndAtOffset;
1634    annotThatStartAtOffset = new TreeSet(
1635        new AnnotationComparator(ORDER_ON_END_OFFSET, DESC));
1636    annotThatEndAtOffset = new TreeSet(
1637        new AnnotationComparator(ORDER_ON_START_OFFSET, DESC));
1638    annotThatStartAndEndAtOffset = new TreeSet(
1639        new AnnotationComparator(ORDER_ON_ANNOT_ID, ASC));
1640
1641    // Fill these tree lists with annotation tat start, end or start and
1642    // end at the offset.
1643    Iterator iter = aDumpAnnotList.iterator();
1644    while(iter.hasNext()){
1645      Annotation ann = (Annotation) iter.next();
1646      if (offset.equals(ann.getStartNode().getOffset())){
1647        if (offset.equals(ann.getEndNode().getOffset()))
1648          annotThatStartAndEndAtOffset.add(ann);
1649        else
1650          annotThatStartAtOffset.add(ann);
1651      }else{
1652        if (offset.equals(ann.getEndNode().getOffset()))
1653          annotThatEndAtOffset.add(ann);
1654      }// End if
1655    }// End while
1656
1657    annotationList.addAll(annotThatEndAtOffset);
1658    annotationList.addAll(annotThatStartAtOffset);
1659    annotThatEndAtOffset = null;
1660    annotThatStartAtOffset = null;
1661
1662    iter = annotThatStartAndEndAtOffset.iterator();
1663    while(iter.hasNext()){
1664      Annotation ann = (Annotation) iter.next();
1665      Iterator it = annotationList.iterator();
1666      boolean breaked = false;
1667      while (it.hasNext()){
1668        Annotation annFromList = (Annotation) it.next();
1669        if (annFromList.getId().intValue() > ann.getId().intValue()){
1670          annotationList.add(annotationList.indexOf(annFromList),ann);
1671          breaked = true;
1672          break;
1673        }// End if
1674      }// End while
1675      if (!breaked)
1676        annotationList.add(ann);
1677      iter.remove();
1678    }// End while
1679    return annotationList;
1680  }// getAnnotationsForOffset()
1681
1682  private String writeStartTag(Annotation annot, boolean includeFeatures){
1683    return writeStartTag(annot, includeFeatures, true);
1684  } // writeStartTag
1685
1686  /** Returns a string representing a start tag based on the input annot*/
1687  private String writeStartTag(Annotation annot, boolean includeFeatures,
1688                                boolean includeNamespace){
1689    AnnotationSet originalMarkupsAnnotSet =
1690            this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
1691
1692    StringBuffer strBuff = new StringBuffer("");
1693    if (annot == null) return strBuff.toString();
1694//    if (!addGatePreserveFormatTag && isRootTag){
1695      if (theRootAnnotation != null && annot.getId().equals(theRootAnnotation.getId())){
1696      //the features are included either if desired or if that's an annotation
1697      //from the original markup of the document. We don't want for example to
1698      //spoil all links in an HTML file!
1699      if (includeFeatures) {
1700        strBuff.append("<");
1701        strBuff.append(annot.getType());
1702        strBuff.append(" ");
1703        if(includeNamespace) {
1704          strBuff.append(" xmlns:gate=\"http://www.gate.ac.uk\"");
1705          strBuff.append(" gate:");
1706        }
1707        strBuff.append("gateId=\"");
1708        strBuff.append(annot.getId());
1709        strBuff.append("\"");
1710        strBuff.append(" ");
1711        if(includeNamespace) {
1712          strBuff.append("gate:");
1713        }
1714        strBuff.append("annotMaxId=\"");
1715        strBuff.append(nextAnnotationId);
1716        strBuff.append("\"");
1717        strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1718        strBuff.append(">");
1719      }
1720      else if (originalMarkupsAnnotSet.contains(annot)) {
1721          strBuff.append("<");
1722          strBuff.append(annot.getType());
1723          strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1724          strBuff.append(">");
1725        }
1726      else {
1727        strBuff.append("<");
1728        strBuff.append(annot.getType());
1729        strBuff.append(">");
1730      }
1731
1732    }else{
1733      //the features are included either if desired or if that's an annotation
1734      //from the original markup of the document. We don't want for example to
1735      //spoil all links in an HTML file!
1736      if (includeFeatures) {
1737        strBuff.append("<");
1738        strBuff.append(annot.getType());
1739        strBuff.append(" ");
1740        if(includeNamespace) {
1741          strBuff.append("gate:");
1742        } // if includeNamespaces
1743        strBuff.append("gateId=\"");
1744        strBuff.append(annot.getId());
1745        strBuff.append("\"");
1746        strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1747        strBuff.append(">");
1748      }
1749      else if (originalMarkupsAnnotSet.contains(annot)) {
1750        strBuff.append("<");
1751        strBuff.append(annot.getType());
1752        strBuff.append(writeFeatures(annot.getFeatures(), includeNamespace));
1753        strBuff.append(">");
1754      }
1755      else {
1756        strBuff.append("<");
1757        strBuff.append(annot.getType());
1758        strBuff.append(">");
1759      }
1760    }// End if
1761    return strBuff.toString();
1762  }// writeStartTag()
1763
1764  /**
1765   * Identifies the root annotations inside an annotation set.
1766   * The root annotation is the one that starts at offset 0, and has the
1767   * greatest span. If there are more than one with this function, then the
1768   * annotation with the smalled ID wil be selected as root.
1769   * If none is identified it will return null.
1770   * @param anAnnotationSet The annotation set possibly containing
1771   *  the root annotation.
1772   * @return The root annotation or null is it fails
1773   */
1774  private Annotation identifyTheRootAnnotation(AnnotationSet anAnnotationSet){
1775    if (anAnnotationSet == null) return null;
1776    // If the starting node of this annotation is not null, then the annotation
1777    // set will not have a root annotation.
1778    Node startNode = anAnnotationSet.firstNode();
1779    Node endNode = anAnnotationSet.lastNode();
1780    // This is placed here just to speed things up. The alghorithm bellow can
1781    // can identity the annotation that span over the entire set and with the
1782    // smallest ID. However the root annotation will have to have the start
1783    // offset equal to 0.
1784    if (startNode.getOffset().longValue() != 0) return null;
1785    // Go anf find the annotation.
1786    Annotation theRootAnnotation = null;
1787    // Check if there are annotations starting at offset 0. If there are, then
1788    // check all of them to see which one has the greatest span. Basically its
1789    // END offset should be the bigest offset from the input annotation set.
1790    long start = startNode.getOffset().longValue();
1791    long end = endNode.getOffset().longValue();
1792    for(Iterator it = anAnnotationSet.iterator(); it.hasNext();){
1793      Annotation currentAnnot = (Annotation) it.next();
1794      // If the currentAnnot has both its Start and End equals to the Start and
1795      // end of the AnnotationSet then check to see if its ID is the smallest.
1796      if (
1797          (start == currentAnnot.getStartNode().getOffset().longValue()) &&
1798          (end   == currentAnnot.getEndNode().getOffset().longValue())
1799         ){
1800          // The currentAnnotation has is a potencial root one.
1801          if (theRootAnnotation == null)
1802            theRootAnnotation = currentAnnot;
1803          else{
1804            // If its ID is greater that the currentAnnot then update the root
1805            if ( theRootAnnotation.getId().intValue() > currentAnnot.getId().intValue())
1806              theRootAnnotation = currentAnnot;
1807          }// End if
1808      }// End if
1809    }// End for
1810    return theRootAnnotation;
1811  }// End identifyTheRootAnnotation()
1812
1813  private Annotation identifyTheRootAnnotation(List anAnnotationList){
1814    if (anAnnotationList == null || anAnnotationList.isEmpty()) return null;
1815    // If the first annotation in the list (which is sorted by start offset)
1816    //does not have an offset = 0, then there's no root tag.
1817    if(((Annotation)anAnnotationList.get(0)).
1818       getStartNode().getOffset().longValue() > 0) return null;
1819
1820    //find the limits
1821    long start = 0; //we know this already
1822    long end = 0; //end = 0  will be improved by the next loop
1823    for(int i = 0; i < anAnnotationList.size(); i++){
1824      Annotation anAnnotation = (Annotation)anAnnotationList.get(i);
1825      long localEnd = anAnnotation.getEndNode().getOffset().longValue();
1826      if(localEnd > end) end = localEnd;
1827    }
1828
1829    // Go and find the annotation.
1830    //look at all annotations that start at 0 and end at end
1831    //if there are several, choose the one with the smallest ID
1832    Annotation theRootAnnotation = null;
1833    for(int i = 0; i < anAnnotationList.size(); i++){
1834      Annotation currentAnnot = (Annotation) anAnnotationList.get(i);
1835      long localStart = currentAnnot.getStartNode().getOffset().longValue();
1836      long localEnd = currentAnnot.getEndNode().getOffset().longValue();
1837      // If the currentAnnot has both its Start and End equals to the Start and
1838      // end of the AnnotationSet then check to see if its ID is the smallest.
1839      if (
1840          (start == localStart) && (end == localEnd)){
1841          // The currentAnnotation has is a potential root one.
1842          if (theRootAnnotation == null) theRootAnnotation = currentAnnot;
1843          else{
1844            // If root's ID is greater that the currentAnnot then update the root
1845            if (theRootAnnotation.getId().intValue() > currentAnnot.getId().intValue())
1846              theRootAnnotation = currentAnnot;
1847          }// End if
1848      }// End if
1849    }// End for
1850    return theRootAnnotation;
1851  }// End identifyTheRootAnnotation()
1852
1853
1854  /** This method takes aScanString and searches for those chars from
1855    * entitiesMap that appear in the string. A tree map(offset2Char) is filled
1856    * using as key the offsets where those Chars appear and the Char.
1857    * If one of the params is null the method simply returns.
1858    */
1859  private void buildEntityMapFromString(String aScanString, TreeMap aMapToFill){
1860    if (aScanString == null || aMapToFill == null) return;
1861    if (entitiesMap == null || entitiesMap.isEmpty()){
1862      Err.prln("WARNING: Entities map was not initialised !");
1863      return;
1864    }// End if
1865    // Fill the Map with the offsets of the special chars
1866    Iterator entitiesMapIterator = entitiesMap.keySet().iterator();
1867    Character c;
1868    int fromIndex;
1869    while(entitiesMapIterator.hasNext()){
1870      c = (Character) entitiesMapIterator.next();
1871      fromIndex = 0;
1872      while (-1 != fromIndex){
1873        fromIndex = aScanString.indexOf(c.charValue(),fromIndex);
1874        if (-1 != fromIndex){
1875          aMapToFill.put(new Long(fromIndex),c);
1876          fromIndex ++;
1877        }// End if
1878      }// End while
1879    }// End while
1880  }//buildEntityMapFromString();
1881
1882  private String writeEmptyTag(Annotation annot){
1883    return writeEmptyTag(annot, true);
1884  } // writeEmptyTag
1885
1886  /** Returns a string representing an empty tag based on the input annot*/
1887  private String writeEmptyTag(Annotation annot, boolean includeNamespace){
1888    StringBuffer strBuff = new StringBuffer("");
1889    if (annot == null) return strBuff.toString();
1890
1891    strBuff.append("<");
1892    strBuff.append(annot.getType());
1893
1894    AnnotationSet originalMarkupsAnnotSet =
1895            this.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
1896    if (! originalMarkupsAnnotSet.contains(annot)) {
1897      strBuff.append(" gateId=\"");
1898      strBuff.append(annot.getId());
1899      strBuff.append("\"");
1900    }
1901    strBuff.append(writeFeatures(annot.getFeatures(),includeNamespace));
1902    strBuff.append("/>");
1903
1904    return strBuff.toString();
1905  }// writeEmptyTag()
1906
1907  /** Returns a string representing an end tag based on the input annot*/
1908  private String writeEndTag(Annotation annot){
1909    StringBuffer strBuff = new StringBuffer("");
1910    if (annot == null) return strBuff.toString();
1911/*
1912    if (annot.getType().indexOf(" ") != -1)
1913      Out.prln("Warning: Truncating end tag to first word for annot type \""
1914      +annot.getType()+ "\". ");
1915*/
1916    strBuff.append("</"+annot.getType()+">");
1917
1918    return strBuff.toString();
1919  }// writeEndTag()
1920
1921  /** Returns a string representing a FeatureMap serialized as XML attributes*/
1922  private String writeFeatures(FeatureMap feat, boolean includeNamespace){
1923    StringBuffer strBuff = new StringBuffer("");
1924    if (feat == null) return strBuff.toString();
1925    Iterator it = feat.keySet().iterator();
1926    while (it.hasNext()){
1927      Object key = it.next();
1928      Object value = feat.get(key);
1929      if ( (key != null) && (value != null) ){
1930        // Eliminate a feature inserted at reading time and which help to
1931        // take some decissions at saving time
1932        if ("isEmptyAndSpan".equals(key.toString()))
1933          continue;
1934        if( !(String.class.isAssignableFrom(key.getClass()) ||
1935              Number.class.isAssignableFrom(key.getClass()))){
1936
1937            Out.prln("Warning:Found a feature NAME("+key+") that doesn't came"+
1938                             " from String or Number.(feature discarded)");
1939            continue;
1940        }// End if
1941        if ( !(String.class.isAssignableFrom(value.getClass()) ||
1942               Number.class.isAssignableFrom(value.getClass()) ||
1943               java.util.Collection.class.isAssignableFrom(value.getClass()))){
1944
1945            Out.prln("Warning:Found a feature VALUE("+value+") that doesn't came"+
1946                       " from String, Number or Collection.(feature discarded)");
1947            continue;
1948        }// End if
1949        if ("matches".equals(key)) {
1950          strBuff.append(" ");
1951          if(includeNamespace) {
1952            strBuff.append("gate:");
1953          }
1954//          strBuff.append(key);
1955          // replace non XML chars in attribute name
1956          strBuff.append(
1957            filterNonXmlChars(replaceCharsWithEntities(key.toString())));
1958          strBuff.append("=\"");
1959        }
1960        else {
1961          strBuff.append(" ");
1962//          strBuff.append(key);
1963          // replace non XML chars in attribute name
1964          strBuff.append(
1965            filterNonXmlChars(replaceCharsWithEntities(key.toString())));
1966          strBuff.append("=\"");
1967        }
1968        if (java.util.Collection.class.isAssignableFrom(value.getClass())){
1969          Iterator valueIter = ((Collection)value).iterator();
1970          while(valueIter.hasNext()){
1971            Object item = valueIter.next();
1972            if (!(String.class.isAssignableFrom(item.getClass()) ||
1973                  Number.class.isAssignableFrom(item.getClass())))
1974                  continue;
1975//            strBuff.append(item);
1976            // replace non XML chars in collection item
1977            strBuff.append(
1978              filterNonXmlChars(replaceCharsWithEntities(item.toString())));
1979            strBuff.append(";");
1980          }// End while
1981          if (strBuff.charAt(strBuff.length()-1) == ';')
1982            strBuff.deleteCharAt(strBuff.length()-1);
1983        }else{
1984//          strBuff.append(value);
1985          // replace non XML chars in attribute value
1986          strBuff.append(
1987            filterNonXmlChars(replaceCharsWithEntities(value.toString())));
1988        }// End if
1989        strBuff.append("\"");
1990      }// End if
1991    }// End while
1992    return strBuff.toString();
1993  }// writeFeatures()
1994
1995  /** Returns a GateXml document that is a custom XML format for wich there is
1996    * a reader inside GATE called gate.xml.GateFormatXmlHandler.
1997    * What it does is to serialize a GATE document in an XML format.
1998    * @return a string representing a Gate Xml document.
1999    */
2000  public String toXml(){
2001    // Initialize the xmlContent with 3 time the size of the current document.
2002    // This is because of the tags size. This measure is made to increase the
2003    // performance of StringBuffer.
2004    StringBuffer xmlContent = new StringBuffer(
2005         DOC_SIZE_MULTIPLICATION_FACTOR*(getContent().size().intValue()));
2006    // Add xml header
2007    xmlContent.append("<?xml version=\"1.0\" encoding=\"");
2008    xmlContent.append(getEncoding());
2009    xmlContent.append("\" ?>");
2010    xmlContent.append(Strings.getNl());
2011
2012    // Add the root element
2013    xmlContent.append("<GateDocument>\n");
2014    xmlContent.append("<!-- The document's features-->\n\n");
2015    xmlContent.append("<GateDocumentFeatures>\n");
2016
2017    xmlContent.append(featuresToXml(this.getFeatures()));
2018    xmlContent.append("</GateDocumentFeatures>\n");
2019    xmlContent.append("<!-- The document content area with serialized"+
2020                      " nodes -->\n\n");
2021    // Add plain text element
2022    xmlContent.append("<TextWithNodes>");
2023    xmlContent.append(textWithNodes(this.getContent().toString()));
2024    xmlContent.append("</TextWithNodes>\n");
2025    // Serialize as XML all document's annotation sets
2026    // Serialize the default AnnotationSet
2027    StatusListener sListener = (StatusListener)
2028                               gate.gui.MainFrame.getListeners().
2029                               get("gate.event.StatusListener");
2030    if(sListener != null)
2031      sListener.statusChanged("Saving the default annotation set ");
2032    xmlContent.append("<!-- The default annotation set -->\n\n");
2033    xmlContent.append(annotationSetToXml(this.getAnnotations()));
2034    // Serialize all others AnnotationSets
2035    // namedAnnotSets is a Map containing all other named Annotation Sets.
2036    if (namedAnnotSets != null){
2037      Iterator iter = namedAnnotSets.values().iterator();
2038      while(iter.hasNext()){
2039        AnnotationSet annotSet = (AnnotationSet) iter.next();
2040        xmlContent.append("<!-- Named annotation set -->\n\n");
2041        // Serialize it as XML
2042        if(sListener != null) sListener.statusChanged("Saving " +
2043                                                      annotSet.getName()+
2044                                                      " annotation set ");
2045        xmlContent.append(annotationSetToXml(annotSet));
2046      }// End while
2047    }// End if
2048    // Add the end of GateDocument
2049    xmlContent.append("</GateDocument>");
2050    if(sListener != null) sListener.statusChanged("Done !");
2051    // return the XmlGateDocument
2052    return xmlContent.toString();
2053  }// toXml
2054
2055  /** This method filters any non XML char
2056    * see: http://www.w3c.org/TR/2000/REC-xml-20001006#charsets
2057    * All non XML chars will be replaced with 0x20 (space char) This assures
2058    * that the next time the document is loaded there won't be any problems.
2059    * @param aStrBuffer represents the input String that is filtred. If the
2060    * aStrBuffer is null then an empty string will be returend
2061    * @return the "purified" StringBuffer version of the aStrBuffer
2062    */
2063  private StringBuffer filterNonXmlChars(StringBuffer aStrBuffer){
2064    if (aStrBuffer == null) return new StringBuffer("");
2065//    String space = new String(" ");
2066    char space = ' ';
2067    for (int i=aStrBuffer.length()-1;i>=0; i--){
2068      if (!isXmlChar(aStrBuffer.charAt(i)))
2069        aStrBuffer.setCharAt(i, space);
2070    }// End for
2071    return aStrBuffer;
2072  }// filterNonXmlChars()
2073
2074  /** This method decide if a char is a valid XML one or not
2075    * @param ch the char to be tested
2076    * @return true if is a valid XML char and fals if is not.
2077    */
2078  public static boolean isXmlChar(char ch){
2079    if (ch == 0x9 || ch == 0xA || ch ==0xD) return true;
2080    if ((0x20 <= ch) && (ch <= 0xD7FF)) return true;
2081    if ((0xE000 <= ch) && (ch <= 0xFFFD)) return true;
2082    if ((0x10000 <= ch) && (ch <= 0x10FFFF)) return true;
2083    return false;
2084  }// End isXmlChar()
2085
2086  /** This method saves a FeatureMap as XML elements.
2087    * @ param aFeatureMap the feature map that has to be saved as XML.
2088    * @ return a String like this: <Feature><Name>...</Name>
2089    * <Value>...</Value></Feature><Feature>...</Feature>
2090    */
2091  private String featuresToXml(FeatureMap aFeatureMap){
2092    StringBuffer str = new StringBuffer("");
2093
2094    if (aFeatureMap == null) return str.toString();
2095
2096    Set keySet = aFeatureMap.keySet();
2097    Iterator keyIterator = keySet.iterator();
2098    while(keyIterator.hasNext()){
2099      Object key = keyIterator.next();
2100      Object value = aFeatureMap.get(key);
2101      if ((key != null) && (value != null)){
2102        String keyClassName = null;
2103        String keyItemClassName = null;
2104        String valueClassName = null;
2105        String valueItemClassName = null;
2106        String key2String = key.toString();
2107        String value2String = value.toString();
2108
2109        Object item = null;
2110        // Test key if it is String, Number or Collection
2111        if (key instanceof java.lang.String ||
2112            key instanceof java.lang.Number ||
2113            key instanceof java.util.Collection)
2114          keyClassName = key.getClass().getName();
2115
2116        // Test value if it is String, Number or Collection
2117        if (value instanceof java.lang.String ||
2118            value instanceof java.lang.Number ||
2119            value instanceof java.util.Collection)
2120          valueClassName = value.getClass().getName();
2121
2122        // Features and values that are not Strings, Numbers or collections
2123        // will be discarded.
2124        if (keyClassName == null || valueClassName == null) continue;
2125
2126        // If key is collection serialize the colection in a specific format
2127        if (key instanceof java.util.Collection){
2128          StringBuffer keyStrBuff = new StringBuffer("");
2129          Iterator iter = ((Collection) key).iterator();
2130          if (iter.hasNext()){
2131            item = iter.next();
2132            if (item instanceof java.lang.Number)
2133              keyItemClassName = item.getClass().getName();
2134            else
2135              keyItemClassName = String.class.getName();
2136            keyStrBuff.append(item.toString());
2137          }// End if
2138          while (iter.hasNext()){
2139            item = iter.next();
2140            keyStrBuff.append(";" + item.toString());
2141          }// End while
2142          key2String = keyStrBuff.toString();
2143        }// End if
2144        // If key is collection serialize the colection in a specific format
2145        if (value instanceof java.util.Collection){
2146          StringBuffer valueStrBuff = new StringBuffer("");
2147          Iterator iter = ((Collection) value).iterator();
2148          if (iter.hasNext()){
2149            item = iter.next();
2150            if (item instanceof java.lang.Number)
2151              valueItemClassName = item.getClass().getName();
2152            else
2153              valueItemClassName = String.class.getName();
2154            valueStrBuff.append(item.toString());
2155          }// End if
2156          while (iter.hasNext()){
2157            item = iter.next();
2158            valueStrBuff.append(";" + item.toString());
2159          }// End while
2160          value2String = valueStrBuff.toString();
2161        }// End if
2162        str.append("<Feature>\n  <Name");
2163        if (keyClassName != null)
2164          str.append(" className=\""+keyClassName+"\"");
2165        if (keyItemClassName != null)
2166          str.append(" itemClassName=\""+keyItemClassName+"\"");
2167        str.append(">");
2168        str.append(filterNonXmlChars(replaceCharsWithEntities(key2String)));
2169        str.append("</Name>\n  <Value");
2170        if (valueClassName != null)
2171          str.append(" className=\"" + valueClassName + "\"");
2172        if (valueItemClassName != null)
2173          str.append(" itemClassName=\"" + valueItemClassName + "\"");
2174        str.append(">");
2175        str.append(filterNonXmlChars(replaceCharsWithEntities(value2String)));
2176        str.append("</Value>\n</Feature>\n");
2177      }// End if
2178    }// end While
2179    return str.toString();
2180  }//featuresToXml
2181
2182  /** This method replace all chars that appears in the anInputString and also
2183    * that are in the entitiesMap with their corresponding entity
2184    * @param anInputString the string analyzed. If it is null then returns the
2185    *  empty string
2186    * @return a string representing the input string with chars replaced with
2187    *  entities
2188    */
2189  private StringBuffer replaceCharsWithEntities(String anInputString){
2190    if (anInputString == null) return new StringBuffer("");
2191    StringBuffer strBuff = new StringBuffer(anInputString);
2192    for (int i=strBuff.length()-1; i>=0; i--){
2193      Character ch = new Character(strBuff.charAt(i));
2194      if (entitiesMap.keySet().contains(ch)){
2195        strBuff.replace(i,i+1,(String) entitiesMap.get(ch));
2196      }// End if
2197    }// End for
2198    return strBuff;
2199  }//replaceCharsWithEntities()
2200
2201  /** This method creates Node XML elements and inserts them at the
2202    * corresponding offset inside the text. Nodes are created from the default
2203    * annotation set, as well as from all existing named annotation sets.
2204    * @param aText The text representing the document's plain text.
2205    * @return The text with empty <Node id="NodeId"/> elements.
2206    */
2207  private String textWithNodes(String aText){
2208    if (aText == null) return new String("");
2209    StringBuffer textWithNodes = filterNonXmlChars(new StringBuffer(aText));
2210
2211    // Construct a map from offsets to Chars
2212    TreeMap offsets2CharsMap = new TreeMap();
2213    if (aText.length()!= 0){
2214      // Fill the offsets2CharsMap with all the indices where special chars appear
2215      buildEntityMapFromString(aText,offsets2CharsMap);
2216    }//End if
2217    // Construct the offsetsSet for all nodes belonging to this document
2218    TreeSet offsetsSet = new TreeSet();
2219    Iterator annotSetIter = this.getAnnotations().iterator();
2220    while (annotSetIter.hasNext()){
2221      Annotation annot = (Annotation) annotSetIter.next();
2222      offsetsSet.add(annot.getStartNode().getOffset());
2223      offsetsSet.add(annot.getEndNode().getOffset());
2224    }// end While
2225    // Get the nodes from all other named annotation sets.
2226    if (namedAnnotSets != null){
2227      Iterator iter = namedAnnotSets.values().iterator();
2228      while(iter.hasNext()){
2229        AnnotationSet annotSet = (AnnotationSet) iter.next();
2230        Iterator iter2 = annotSet.iterator();
2231        while(iter2.hasNext()){
2232          Annotation annotTmp = (Annotation) iter2.next();
2233          offsetsSet.add(annotTmp.getStartNode().getOffset());
2234          offsetsSet.add(annotTmp.getEndNode().getOffset());
2235        }// End while
2236      }// End while
2237    }// End if
2238    // offsetsSet is ordered in ascending order because the structure
2239    // is a TreeSet
2240
2241    if (offsetsSet.isEmpty()){
2242      return replaceCharsWithEntities(aText).toString();
2243    }// End if
2244    // Iterate through all nodes from anAnnotSet and transform them to
2245    // XML elements. Then insert those elements at the node's offset into the
2246    // textWithNodes .
2247    while (!offsetsSet.isEmpty()){
2248      Long offset = (Long) offsetsSet.last();
2249      // Eliminate the offset from the list in order to create more memory space
2250      offsetsSet.remove(offset);
2251      // Use offset
2252      int offsetValue = offset.intValue();
2253      String strNode = "<Node id=\"" + offsetValue + "\"/>";
2254      // Before inserting this string into the textWithNodes, check to see if
2255      // there are any chars to be replaced with their corresponding entities
2256      if (!offsets2CharsMap.isEmpty()){
2257        Long offsChar = (Long) offsets2CharsMap.lastKey();
2258        while( !offsets2CharsMap.isEmpty() &&
2259                       offsChar.intValue() >= offset.intValue()){
2260          // Replace the char at offsChar with its corresponding entity form
2261          // the entitiesMap.
2262          textWithNodes.replace(offsChar.intValue(),offsChar.intValue()+1,
2263          (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
2264          // Discard the offsChar after it was used because this offset will
2265          // never appear again
2266          offsets2CharsMap.remove(offsChar);
2267          // Investigate next offsChar
2268          if (!offsets2CharsMap.isEmpty())
2269            offsChar = (Long) offsets2CharsMap.lastKey();
2270        }// End while
2271      }// End if
2272      // Now it is safe to insert the node
2273      textWithNodes.insert(offsetValue,strNode);
2274    }// end while
2275    // Need to replace the entities in the remaining text, if there is any text
2276    // So, if there are any more items in offsets2CharsMap they need to be
2277    // replaced
2278    while (!offsets2CharsMap.isEmpty()){
2279      Long offsChar = (Long) offsets2CharsMap.lastKey();
2280      // Replace the char with its entity
2281      textWithNodes.replace(offsChar.intValue(),offsChar.intValue()+1,
2282      (String)entitiesMap.get((Character)offsets2CharsMap.get(offsChar)));
2283      // remove the offset from the map
2284      offsets2CharsMap.remove(offsChar);
2285    }// End while
2286    return textWithNodes.toString();
2287  }//textWithNodes()
2288
2289  /** This method saves an AnnotationSet as XML.
2290    * @param anAnnotationSet The annotation set that has to be saved as XML.
2291    * @return a String like this: <AnnotationSet> <Annotation>....
2292    * </AnnotationSet>
2293    */
2294  private String annotationSetToXml(AnnotationSet anAnnotationSet){
2295    StringBuffer str = new StringBuffer("");
2296
2297    if (anAnnotationSet == null){
2298      str.append("<AnnotationSet>\n");
2299      str.append("</AnnotationSet>\n");
2300      return str.toString();
2301    }// End if
2302    if (anAnnotationSet.getName() == null)
2303      str.append("<AnnotationSet>\n");
2304    else str.append("<AnnotationSet Name=\"" + anAnnotationSet.getName()+
2305                                                                    "\" >\n");
2306    // Iterate through AnnotationSet and save each Annotation as XML
2307    Iterator iterator = anAnnotationSet.iterator();
2308    while (iterator.hasNext()){
2309      Annotation annot = (Annotation) iterator.next();
2310      str.append("<Annotation " + "Type=\"" + annot.getType() +
2311                  "\" StartNode=\"" + annot.getStartNode().getOffset() +
2312                   "\" EndNode=\"" + annot.getEndNode().getOffset() + "\">\n");
2313      str.append(featuresToXml(annot.getFeatures()));
2314      str.append("</Annotation>\n");
2315    }// End while
2316
2317    str.append("</AnnotationSet>\n");
2318    return str.toString();
2319  }// annotationSetToXml
2320
2321  /** Returns a map with the named annotation sets. It returns <code>null</code>
2322   *  if no named annotaton set exists. */
2323  public Map getNamedAnnotationSets() {
2324    return namedAnnotSets;
2325  } // getNamedAnnotationSets
2326
2327  /**
2328   * Removes one of the named annotation sets.
2329   * Note that the default annotation set cannot be removed.
2330   * @param name the name of the annotation set to be removed
2331   */
2332  public void removeAnnotationSet(String name){
2333    Object removed = namedAnnotSets.remove(name);
2334    if(removed != null){
2335      fireAnnotationSetRemoved(
2336        new DocumentEvent(this, DocumentEvent.ANNOTATION_SET_REMOVED, name));
2337    }
2338  }
2339
2340  /** Propagate edit changes to the document content and annotations. */
2341  public void edit(Long start, Long end, DocumentContent replacement)
2342    throws InvalidOffsetException
2343  {
2344    if(! isValidOffsetRange(start, end))
2345      throw new InvalidOffsetException();
2346
2347    if(content != null)
2348      ((DocumentContentImpl) content).edit(start, end, replacement);
2349
2350    if(defaultAnnots != null)
2351      ((AnnotationSetImpl) defaultAnnots).edit(start, end, replacement);
2352
2353    if(namedAnnotSets != null) {
2354      Iterator iter = namedAnnotSets.values().iterator();
2355      while(iter.hasNext())
2356        ((AnnotationSetImpl) iter.next()).edit(start, end, replacement);
2357    }
2358
2359  } // edit(start,end,replacement)
2360
2361  /** Check that an offset is valid, i.e. it is non-null, greater than
2362    * or equal to 0 and less than the size of the document content.
2363    */
2364  public boolean isValidOffset(Long offset) {
2365    if(offset == null)
2366      return false;
2367
2368    long o = offset.longValue();
2369    if(o > getContent().size().longValue() || o < 0)
2370      return false;
2371
2372    return true;
2373  } // isValidOffset
2374
2375  /** Check that both start and end are valid offsets and that
2376    * they constitute a valid offset range, i.e. start is greater
2377    * than or equal to long.
2378    */
2379  public boolean isValidOffsetRange(Long start, Long end) {
2380    return
2381      isValidOffset(start) && isValidOffset(end) &&
2382      start.longValue() <= end.longValue();
2383  } // isValidOffsetRange(start,end)
2384
2385  /** Sets the nextAnnotationId */
2386  public void setNextAnnotationId(int aNextAnnotationId){
2387    nextAnnotationId = aNextAnnotationId;
2388  }// setNextAnnotationId();
2389
2390  /** Generate and return the next annotation ID */
2391  public Integer getNextAnnotationId() {
2392    return new Integer(nextAnnotationId++);
2393  } // getNextAnnotationId
2394
2395  /** Generate and return the next node ID */
2396  public Integer getNextNodeId() { return new Integer(nextNodeId++); }
2397
2398  /** Ordering based on URL.toString() and the URL offsets (if any) */
2399  public int compareTo(Object o) throws ClassCastException {
2400    DocumentImpl other = (DocumentImpl) o;
2401    return getOrderingString().compareTo(other.getOrderingString());
2402  } // compareTo
2403
2404  /** Utility method to produce a string for comparison in ordering.
2405    * String is based on the source URL and offsets.
2406    */
2407  protected String getOrderingString() {
2408    if(sourceUrl == null) return toString();
2409
2410    StringBuffer orderingString = new StringBuffer(sourceUrl.toString());
2411    if(sourceUrlStartOffset != null && sourceUrlEndOffset != null) {
2412      orderingString.append(sourceUrlStartOffset.toString());
2413      orderingString.append(sourceUrlEndOffset.toString());
2414    }
2415
2416    return orderingString.toString();
2417  } // getOrderingString()
2418
2419  /** The id of the next new annotation */
2420  protected int nextAnnotationId = 0;
2421
2422  /** The id of the next new node */
2423  protected int nextNodeId = 0;
2424  /** The source URL */
2425  protected URL sourceUrl;
2426
2427  /** The document's URL name. */
2428
2429  /** The content of the document */
2430  protected DocumentContent content;
2431
2432  /** The encoding of the source of the document content */
2433  protected String encoding = null;
2434
2435  // Data needed in toXml(AnnotationSet) methos
2436
2437  /** This field indicates whether or not to add the tag
2438    * called GatePreserveFormat to the document. HTML, XML, SGML docs won't
2439    * have this tag added
2440    */
2441//  private boolean addGatePreserveFormatTag = false;
2442
2443  /**
2444   * Used by the XML dump preserving format method
2445   */
2446  private Annotation theRootAnnotation = null;
2447
2448  /** This field is used when creating StringBuffers for toXml() methods.
2449    * The size of the StringBuffer will be docDonctent.size() multiplied by this
2450    * value. It is aimed to improve the performance of StringBuffer
2451    */
2452  private final int DOC_SIZE_MULTIPLICATION_FACTOR = 2;
2453
2454  /** Constant used in the inner class AnnotationComparator to order
2455    * annotations on their start offset
2456    */
2457  private final int ORDER_ON_START_OFFSET = 0;
2458  /** Constant used in the inner class AnnotationComparator to order
2459    * annotations on their end offset
2460    */
2461  private final int ORDER_ON_END_OFFSET = 1;
2462  /** Constant used in the inner class AnnotationComparator to order
2463    * annotations on their ID
2464    */
2465  private final int ORDER_ON_ANNOT_ID = 2;
2466  /** Constant used in the inner class AnnotationComparator to order
2467    * annotations ascending
2468    */
2469  private final int ASC = 3;
2470  /** Constant used in the inner class AnnotationComparator to order
2471    * annotations descending
2472    */
2473  private final int DESC = -3;
2474
2475  /** A map initialized in init() containing entities that needs to be
2476    * replaced in strings
2477    */
2478  private static Map entitiesMap = null;
2479  // Initialize the entities map use when saving as xml
2480  static{
2481    entitiesMap = new HashMap();
2482    entitiesMap.put(new Character('<'),"&lt;");
2483    entitiesMap.put(new Character('>'),"&gt;");
2484    entitiesMap.put(new Character('&'),"&amp;");
2485    entitiesMap.put(new Character('\''),"&apos;");
2486    entitiesMap.put(new Character('"'),"&quot;");
2487    entitiesMap.put(new Character((char)160),"&#160;");
2488    entitiesMap.put(new Character((char)169),"&#169;");
2489  }//static
2490
2491  /** The range that the content comes from at the source URL
2492    * (or null if none).
2493    */
2494  //protected Long[] sourceUrlOffsets;
2495
2496  /** The start of the range that the content comes from at the source URL
2497    * (or null if none).
2498    */
2499  protected Long sourceUrlStartOffset;
2500
2501  /** The end of the range that the content comes from at the source URL
2502    * (or null if none).
2503    */
2504  protected Long sourceUrlEndOffset;
2505
2506  /** The default annotation set */
2507  protected AnnotationSet defaultAnnots;
2508
2509  /** Named sets of annotations */
2510  protected Map namedAnnotSets;
2511
2512  /**
2513   * A property of the document that will be set when the user
2514   * wants to create the document from a string, as opposed to from
2515   * a URL.
2516   */
2517  private String stringContent;
2518
2519  /**
2520   * The stringContent of a document is
2521   * a property of the document that will be set when the user
2522   * wants to create the document from a string, as opposed to from
2523   * a URL.
2524   * <B>Use the <TT>getContent</TT> method instead to get the actual document
2525   * content.</B>
2526   */
2527  public String getStringContent() { return stringContent; }
2528
2529  /**
2530   * The stringContent of a document is
2531   * a property of the document that will be set when the user
2532   * wants to create the document from a string, as opposed to from
2533   * a URL.
2534   * <B>Use the <TT>setContent</TT> method instead to update the actual
2535   * document content.</B>
2536   */
2537  public void setStringContent(String stringContent) {
2538    this.stringContent = stringContent;
2539  } // set StringContent
2540
2541  /** Is the document markup-aware? */
2542  protected Boolean markupAware = new Boolean(false);
2543//  /** Hash code */
2544//  public int hashCode() {
2545//    int code = getContent().hashCode();
2546//    int memberCode = (defaultAnnots == null) ? 0 : defaultAnnots.hashCode();
2547//    code += memberCode;
2548//    memberCode = (encoding == null) ? 0 : encoding.hashCode();
2549//    code += memberCode;
2550//    memberCode = (features == null) ? 0 : features.hashCode();
2551//    code += memberCode;
2552//    code += (markupAware.booleanValue()) ? 0 : 1;
2553//    memberCode = (namedAnnotSets == null) ? 0 : namedAnnotSets.hashCode();
2554//    code += memberCode;
2555//    code += nextAnnotationId;
2556//    code += nextNodeId;
2557//    memberCode = (sourceUrl == null) ? 0 : sourceUrl.hashCode();
2558//    code += memberCode;
2559//    memberCode =
2560//      (sourceUrlStartOffset == null) ? 0 : sourceUrlStartOffset.hashCode();
2561//    code += memberCode;
2562//    memberCode =
2563//      (sourceUrlEndOffset == null) ? 0 : sourceUrlEndOffset.hashCode();
2564//    code += memberCode;
2565//    return code;
2566//  } // hashcode
2567
2568  /** String respresentation */
2569  public String toString() {
2570    String n = Strings.getNl();
2571    StringBuffer s = new StringBuffer("DocumentImpl: " + n);
2572    s.append("  content:" + content + n);
2573    s.append("  defaultAnnots:" + defaultAnnots + n);
2574    s.append("  encoding:" + encoding + n);
2575    s.append("  features:" + features + n);
2576    s.append("  markupAware:" + markupAware + n);
2577    s.append("  namedAnnotSets:" + namedAnnotSets + n);
2578    s.append("  nextAnnotationId:" + nextAnnotationId + n);
2579    s.append("  nextNodeId:" + nextNodeId + n);
2580    s.append("  sourceUrl:" + sourceUrl + n);
2581    s.append("  sourceUrlStartOffset:" + sourceUrlStartOffset + n);
2582    s.append("  sourceUrlEndOffset:" + sourceUrlEndOffset + n);
2583    s.append(n);
2584
2585    return s.toString();
2586  } // toString
2587
2588   /** Freeze the serialization UID. */
2589  static final long serialVersionUID = -8456893608311510260L;
2590
2591  /** Inner class needed to compare annotations*/
2592  class AnnotationComparator implements java.util.Comparator {
2593    int orderOn = -1;
2594    int orderType = ASC;
2595    /** Constructs a comparator according to one of three sorter types:
2596      * ORDER_ON_ANNOT_TYPE, ORDER_ON_END_OFFSET, ORDER_ON_START_OFFSET
2597      */
2598      public AnnotationComparator(int anOrderOn, int anOrderType){
2599        orderOn = anOrderOn;
2600        orderType = anOrderType;
2601      }// AnnotationComparator()
2602
2603      /**This method must be implemented according to Comparator interface */
2604      public int compare(Object o1, Object o2){
2605        Annotation a1 = (Annotation) o1;
2606        Annotation a2 = (Annotation) o2;
2607        // ORDER_ON_START_OFFSET ?
2608        if (orderOn == ORDER_ON_START_OFFSET){
2609          int result = a1.getStartNode().getOffset().compareTo(
2610                                                a2.getStartNode().getOffset());
2611          if (orderType == ASC){
2612            // ASC
2613            // If they are equal then their ID will decide.
2614            if (result == 0)
2615              return a1.getId().compareTo(a2.getId());
2616            return result;
2617          }else{
2618            // DESC
2619            if (result == 0)
2620              return - (a1.getId().compareTo(a2.getId()));
2621            return -result;
2622          }// End if (orderType == ASC)
2623        }// End if (orderOn == ORDER_ON_START_OFFSET)
2624
2625        // ORDER_ON_END_OFFSET ?
2626        if (orderOn == ORDER_ON_END_OFFSET){
2627          int result = a1.getEndNode().getOffset().compareTo(
2628                                                a2.getEndNode().getOffset());
2629          if (orderType == ASC){
2630            // ASC
2631            // If they are equal then their ID will decide.
2632            if (result == 0)
2633              return - (a1.getId().compareTo(a2.getId()));
2634            return result;
2635          }else{
2636            // DESC
2637            // If they are equal then their ID will decide.
2638            if (result == 0)
2639              return a1.getId().compareTo(a2.getId());
2640            return - result;
2641          }// End if (orderType == ASC)
2642        }// End if (orderOn == ORDER_ON_END_OFFSET)
2643
2644        // ORDER_ON_ANNOT_ID ?
2645        if (orderOn == ORDER_ON_ANNOT_ID){
2646          if (orderType == ASC)
2647            return a1.getId().compareTo(a2.getId());
2648          else
2649            return -(a1.getId().compareTo(a2.getId()));
2650        }// End if
2651        return 0;
2652      }//compare()
2653  } // End inner class AnnotationComparator
2654
2655
2656  private transient Vector documentListeners;
2657  private transient Vector gateListeners;
2658
2659  public synchronized void removeDocumentListener(DocumentListener l) {
2660    if (documentListeners != null && documentListeners.contains(l)) {
2661      Vector v = (Vector) documentListeners.clone();
2662      v.removeElement(l);
2663      documentListeners = v;
2664    }
2665  }
2666  public synchronized void addDocumentListener(DocumentListener l) {
2667    Vector v = documentListeners == null ? new Vector(2) : (Vector) documentListeners.clone();
2668    if (!v.contains(l)) {
2669      v.addElement(l);
2670      documentListeners = v;
2671    }
2672  }
2673
2674  protected void fireAnnotationSetAdded(DocumentEvent e) {
2675    if (documentListeners != null) {
2676      Vector listeners = documentListeners;
2677      int count = listeners.size();
2678      for (int i = 0; i < count; i++) {
2679        ((DocumentListener) listeners.elementAt(i)).annotationSetAdded(e);
2680      }
2681    }
2682  }
2683
2684  protected void fireAnnotationSetRemoved(DocumentEvent e) {
2685    if (documentListeners != null) {
2686      Vector listeners = documentListeners;
2687      int count = listeners.size();
2688      for (int i = 0; i < count; i++) {
2689        ((DocumentListener) listeners.elementAt(i)).annotationSetRemoved(e);
2690      }
2691    }
2692  }
2693  public void resourceLoaded(CreoleEvent e) {
2694  }
2695  public void resourceUnloaded(CreoleEvent e) {
2696  }
2697  public void datastoreOpened(CreoleEvent e) {
2698  }
2699  public void datastoreCreated(CreoleEvent e) {
2700  }
2701  public void resourceRenamed(Resource resource, String oldName,
2702                              String newName){
2703  }
2704  public void datastoreClosed(CreoleEvent e) {
2705    if (! e.getDatastore().equals(this.getDataStore()))
2706      return;
2707    //close this lr, since it cannot stay open when the DS it comes from
2708    //is closed
2709    Factory.deleteResource(this);
2710  }
2711  public void setLRPersistenceId(Object lrID) {
2712    super.setLRPersistenceId( lrID);
2713    //make persistent documents listen to the creole register
2714    //for events about their DS
2715    Gate.getCreoleRegister().addCreoleListener(this);
2716  }
2717  public void resourceAdopted(DatastoreEvent evt) {
2718  }
2719  public void resourceDeleted(DatastoreEvent evt) {
2720    if(! evt.getSource().equals(this.getDataStore()))
2721      return;
2722    //if an open document is deleted from a DS, then
2723    //it must close itself immediately, as is no longer valid
2724    if(evt.getResourceID().equals(this.getLRPersistenceId()))
2725      Factory.deleteResource(this);
2726  }
2727  public void resourceWritten(DatastoreEvent evt) {
2728  }
2729  public void setDataStore(DataStore dataStore) throws gate.persist.PersistenceException {
2730    super.setDataStore( dataStore);
2731    if (this.dataStore != null)
2732      this.dataStore.addDatastoreListener(this);
2733  }
2734
2735} // class DocumentImpl
2736