1
15
16 package gate.xml;
17
18 import java.lang.reflect.Constructor;
19 import java.util.*;
20
21 import org.xml.sax.*;
22 import org.xml.sax.helpers.DefaultHandler;
23
24 import gate.*;
25 import gate.corpora.DocumentContentImpl;
26 import gate.event.StatusListener;
27 import gate.util.*;
28
29
30
34 public class GateFormatXmlDocumentHandler extends DefaultHandler{
35
36 private static final boolean DEBUG = false;
37
38
39 private StringBuffer contentBuffer = new StringBuffer("");
40
41
42 private boolean readCharacterStatus = false;
43
44
46 public GateFormatXmlDocumentHandler(gate.Document aDocument){
47 tmpDocContent = new StringBuffer(aDocument.getContent().size().intValue());
49
50 colector = new LinkedList();
53
54 doc = aDocument;
56 currentAnnotationSet = doc.getAnnotations();
57 }
59
63 public void startDocument() throws org.xml.sax.SAXException {
64 }
66
74 public void endDocument() throws org.xml.sax.SAXException {
75
76 doc.setContent(new DocumentContentImpl(tmpDocContent.toString()));
78 long docSize = doc.getContent().size().longValue();
79
80 fireStatusChangedEvent("Total elements: " + elements);
82
83 }
85
89 public void startElement (String uri, String qName, String elemName,
90 Attributes atts) throws SAXException {
91
92 if(readCharacterStatus) {
94 readCharacterStatus = false;
95 charactersAction(new String(contentBuffer).toCharArray(),0,contentBuffer.length());
96 }
97
98 if ((++elements % ELEMENTS_RATE) == 0 )
101 fireStatusChangedEvent("Processed elements : " + elements);
102
103 currentElementStack.add(elemName);
105
106 if("AnnotationSet".equals(elemName))
107 processAnnotationSetElement(atts);
108
109 if("Annotation".equals(elemName))
110 processAnnotationElement(atts);
111
112 if("Feature".equals(elemName))
113 processFeatureElement(atts);
114
115 if("Name".equals(elemName))
116 processNameElement(atts);
117
118 if("Value".equals(elemName))
119 processValueElement(atts);
120
121 if("Node".equals(elemName))
122 processNodeElement(atts);
123 }
125
129 public void endElement (String uri, String qName, String elemName )
130 throws SAXException{
131
132 if(readCharacterStatus) {
134 readCharacterStatus = false;
135 charactersAction(new String(contentBuffer).toCharArray(),0,contentBuffer.length());
136 }
137
138 currentElementStack.pop();
139 if ("Annotation".equals(elemName)){
141 if (currentFeatureMap == null)
142 currentFeatureMap = Factory.newFeatureMap();
143 currentAnnot.setFM(currentFeatureMap);
144 colector.add(currentAnnot);
145 currentAnnot = null;
147 currentFeatureMap = null;
148 return;
149 } if ("Value".equals(elemName) && "Feature".equals(
152 (String)currentElementStack.peek())){
153 if (currentFeatureValue == null) currentFeatureValue = "";
155 } if ("Feature".equals(elemName)){
158 if(currentFeatureName == null){
159 throw new GateSaxException("A feature name was empty." +
162 "The annotation that cause it is " +
163 currentAnnot +
164 ".Please check the document with a text editor before trying again.");
165 }else {
166 if (currentFeatureMap == null){
167 throw new GateSaxException("Document not consistent. A start"+
169 " feature element is missing. " +
170 "The annotation that cause it is " +
171 currentAnnot +
172 "Please check the document with a text editor before trying again.");
173 } currentFeatureMap.put(createFeatKey(),createFeatValue());
178 currentFeatureKeyClassName = null;
181 currentFeatureKeyItemClassName = null;
182 currentFeatureName = null;
183 currentFeatureValueClassName = null;
185 currentFeatureValueItemClassName = null;
186 currentFeatureValue = null;
187 } currentFeatureName = null;
190 currentFeatureValue = null;
191 return;
192 } if ("GateDocumentFeatures".equals(elemName)){
195 if (currentFeatureMap == null)
196 currentFeatureMap = Factory.newFeatureMap();
197 doc.setFeatures(currentFeatureMap);
198 currentFeatureMap = null;
199 return;
200 }
202 if ("AnnotationSet".equals(elemName)){
204 Iterator iterator = colector.iterator();
206 while (iterator.hasNext()){
207 AnnotationObject annot = (AnnotationObject) iterator.next();
208 iterator.remove();
210 try{
212 currentAnnotationSet.add(annot.getStart(),
213 annot.getEnd(),
214 annot.getElemName(),
215 annot.getFM());
216 }catch (gate.util.InvalidOffsetException e){
217 throw new GateSaxException(e);
218 } } return;
222 }
224
225 }
227
232 public void characters(char [] text,int start,int length) throws SAXException {
233 if(!readCharacterStatus) {
234 contentBuffer = new StringBuffer(new String(text,start,length));
235 } else {
236 contentBuffer.append(new String(text,start,length));
237 }
238 readCharacterStatus = true;
239 }
240
241
244 public void charactersAction( char[] text,int start,int length) throws SAXException{
245 String content = new String(text, start, length);
247 if ("TextWithNodes".equals((String)currentElementStack.peek())){
248 processTextOfTextWithNodesElement(content);
249 return;
250 } if ("Name".equals((String)currentElementStack.peek())){
252 processTextOfNameElement(content);
253 return;
254 } if ("Value".equals((String)currentElementStack.peek())){
256 processTextOfValueElement(content);
261 return;
262 } }
265
268 public void ignorableWhitespace(char ch[],int start,int length) throws
269 SAXException{
270 }
272
275 public void error(SAXParseException ex) throws SAXException {
276 _seh.error(ex);
279 }
281
284 public void fatalError(SAXParseException ex) throws SAXException {
285 _seh.fatalError(ex);
288 }
290
293 public void warning(SAXParseException ex) throws SAXException {
294 _seh.warning(ex);
297 }
299
301
302
303 private void processAnnotationSetElement(Attributes atts){
304 if (atts != null){
305 for (int i = 0; i < atts.getLength(); i++) {
306 String attName = atts.getLocalName(i);
308 String attValue = atts.getValue(i);
309 if ("Name".equals(attName))
310 currentAnnotationSet = doc.getAnnotations(attValue);
311 } } }
315
316 private void processNameElement(Attributes atts){
317 if (atts == null) return;
318 currentFeatureKeyClassName = atts.getValue("className");
319 currentFeatureKeyItemClassName = atts.getValue("itemClassName");
320 }
322
323 private void processValueElement(Attributes atts){
324 if (atts == null) return;
325 currentFeatureValueClassName = atts.getValue("className");
326 currentFeatureValueItemClassName = atts.getValue("itemClassName");
327 }
329
330 private void processAnnotationElement(Attributes atts){
331 if (atts != null){
332 currentAnnot = new AnnotationObject();
333 for (int i = 0; i < atts.getLength(); i++) {
334 String attName = atts.getLocalName(i);
336 String attValue = atts.getValue(i);
337
338 if ("Type".equals(attName))
339 currentAnnot.setElemName(attValue);
340
341 try{
342 if ("StartNode".equals(attName)){
343 Integer id = new Integer(attValue);
344 Long offset = (Long)id2Offset.get(id);
345 if (offset == null){
346 throw new GateRuntimeException("Couldn't found Node with id = " +
347 id +
348 ".It was specified in annot " +
349 currentAnnot+
350 " as a start node!" +
351 "Check the document with a text editor or something"+
352 " before trying again.");
353
354 }else
355 currentAnnot.setStart(offset);
356 } if ("EndNode".equals(attName)){
358 Integer id = new Integer(attValue);
359 Long offset = (Long) id2Offset.get(id);
360 if (offset == null){
361 throw new GateRuntimeException("Couldn't found Node with id = " +
362 id+
363 ".It was specified in annot " +
364 currentAnnot+
365 " as a end node!" +
366 "Check the document with a text editor or something"+
367 " before trying again.");
368 }else
369 currentAnnot.setEnd(offset);
370 } } catch (NumberFormatException e){
372 throw new GateRuntimeException("Offsets problems.Couldn't create"+
373 " Integers from" + " id[" +
374 attValue + "]) in annot " +
375 currentAnnot+
376 "Check the document with a text editor or something,"+
377 " before trying again");
378 } } } }
383
384 private void processFeatureElement(Attributes atts){
385 if (currentFeatureMap == null)
387 currentFeatureMap = Factory.newFeatureMap();
388 }
390
391 private void processNodeElement(Attributes atts){
392 if (atts != null){
393 for (int i = 0; i < atts.getLength(); i++) {
394 String attName = atts.getLocalName(i);
396 String attValue = atts.getValue(i);
397 if ("id".equals(attName)){
399 try{
400 Integer id = new Integer(attValue);
401 id2Offset.put(id,new Long(tmpDocContent.length()));
402 }catch(NumberFormatException e){
403 throw new GateRuntimeException("Coudn't create a node from " +
404 attValue + " Expected an integer.");
405 } } } } }
411
412 private void processTextOfTextWithNodesElement(String text){
413 text = recoverNewLineSequence(text);
414 tmpDocContent.append(text);
415 }
417
418 private String recoverNewLineSequence(String text) {
419 String result = text;
420
421 if(text.indexOf('\n') != -1) {
423 String newLineType =
424 (String) doc.getFeatures().get(GateConstants.DOCUMENT_NEW_LINE_TYPE);
425
426 if("LF".equalsIgnoreCase(newLineType)) {
427 newLineType = null;
428 }
429
430 if(newLineType == null) return result;
432
433 String newLine = "\n";
434 if("CRLF".equalsIgnoreCase(newLineType)) {
435 newLine = "\r\n";
436 }
437 if("CR".equalsIgnoreCase(newLineType)) {
438 newLine = "\r";
439 }
440 if("LFCR".equalsIgnoreCase(newLineType)) {
441 newLine = "\n\r";
442 }
443
444 StringBuffer buff = new StringBuffer(text);
445 int index = text.lastIndexOf('\n');
446 while(index != -1) {
447 buff.replace(index, index+1, newLine);
448 index = text.lastIndexOf('\n', index-1);
449 } result = buff.toString();
451 }
453 return result;
454 }
456
457 private void processTextOfNameElement(String text) throws GateSaxException{
458 if (currentFeatureMap == null)
459 throw new GateSaxException("GATE xml format processing error:" +
460 " Found a Name element that is not enclosed into a Feature one while" +
461 " analyzing the annotation " +
462 currentAnnot +
463 "Please check the document with a text editor or something before" +
464 " trying again.");
465 else{
466 if (currentFeatureName == null)
469 currentFeatureName = text;
470 else
471 currentFeatureName = currentFeatureName + text;
472 } }
475
476 private void processTextOfValueElement(String text) throws GateSaxException{
477 if (currentFeatureMap == null)
478 throw new GateSaxException("GATE xml format processing error:" +
479 " Found a Value element that is not enclosed into a Feature one while" +
480 " analyzing the annotation " +
481 currentAnnot+
482 "Please check the document with a text editor or something before" +
483 " trying again.");
484 else{
485 if (currentFeatureValue == null)
488 currentFeatureValue = text;
489 else
490 currentFeatureValue = currentFeatureValue + text;
491 } }
494
498 private Object createFeatKey(){
499 return createFeatObject(currentFeatureKeyClassName,
500 currentFeatureKeyItemClassName,
501 currentFeatureName);
502 }
504
508 private Object createFeatValue(){
509 return createFeatObject(currentFeatureValueClassName,
510 currentFeatureValueItemClassName,
511 currentFeatureValue);
512 }
514
532 private Object createFeatObject( String aFeatClassName,
533 String aFeatItemClassName,
534 String aFeatStringRepresentation){
535 if (aFeatStringRepresentation == null) return null;
537 if (aFeatClassName == null) aFeatClassName = "java.lang.String";
538 if (aFeatItemClassName == null) aFeatItemClassName = "java.lang.String";
539 Class currentFeatClass = null;
540 try{
541 currentFeatClass = Gate.getClassLoader().loadClass(aFeatClassName);
542 }catch (ClassNotFoundException cnfex){
543 return aFeatStringRepresentation;
544 } if (java.util.Collection.class.isAssignableFrom(currentFeatClass)){
546 Class itemClass = null;
547 Collection featObject = null;
548 try{
549 featObject = (Collection) currentFeatClass.newInstance();
550 try{
551 itemClass = Gate.getClassLoader().loadClass(aFeatItemClassName);
552 }catch(ClassNotFoundException cnfex){
553 Out.prln("Warning: Item class "+ aFeatItemClassName + " not found."+
554 "Adding items as Strings to the feature called \"" + currentFeatureName
555 + "\" in the annotation " + currentAnnot);
556 itemClass = java.lang.String.class;
557 } Class[] paramsArray = new Class[1];
560 paramsArray[0] = java.lang.String.class;
561 Constructor itemConstructor = null;
562 boolean addItemAsString = false;
563 try{
564 itemConstructor = itemClass.getConstructor(paramsArray);
565 }catch (NoSuchMethodException nsme){
566 addItemAsString = true;
567 }catch (SecurityException se){
568 addItemAsString = true;
569 } StringTokenizer strTok = new StringTokenizer(
571 aFeatStringRepresentation,";");
572 Object[] params = new Object[1];
573 Object itemObj = null;
574 while (strTok.hasMoreTokens()){
575 String itemStrRep = strTok.nextToken();
576 if (addItemAsString) featObject.add(itemStrRep);
577 else{
578 params[0] = itemStrRep;
579 try{
580 itemObj = itemConstructor.newInstance(params);
581 }catch (Exception e){
582 throw new GateRuntimeException("An item("+
583 itemStrRep +
584 ") does not comply with its class" +
585 " definition("+aFeatItemClassName+").Happened while tried to"+
586 " add feature: " +
587 aFeatStringRepresentation + " to the annotation " + currentAnnot);
588 } featObject.add(itemObj);
590 } } }catch(InstantiationException instex ){
593 return aFeatStringRepresentation;
594 }catch (IllegalAccessException iae){
595 return aFeatStringRepresentation;
596 } return featObject;
598 } Class[] params = new Class[1];
602 params[0] = java.lang.String.class;
603 try{
604 Constructor featConstr = currentFeatClass.getConstructor(params);
605 Object[] featConstrParams = new Object[1];
606 featConstrParams[0] = aFeatStringRepresentation;
607 Object featObject = featConstr.newInstance(featConstrParams);
608 return featObject;
609 } catch(Exception e){
610 return aFeatStringRepresentation;
611 } }
614
619 public void comment(String text) throws SAXException {
620 }
622
628 public void startCDATA()throws SAXException {
629 }
631
637 public void endCDATA() throws SAXException {
638 }
640
645 public void startParsedEntity(String name) throws SAXException {
646 }
648
654 public void endParsedEntity(String name, boolean included)throws SAXException{
655 }
657
659
662 public void addStatusListener(StatusListener listener){
663 myStatusListeners.add(listener);
664 }
668 public void removeStatusListener(StatusListener listener){
669 myStatusListeners.remove(listener);
670 }
675 protected void fireStatusChangedEvent(String text){
676 Iterator listenersIter = myStatusListeners.iterator();
677 while(listenersIter.hasNext())
678 ((StatusListener)listenersIter.next()).statusChanged(text);
679 }
681
683
687 final static int ELEMENTS_RATE = 128;
688
689
690 private SimpleErrorHandler _seh = new SimpleErrorHandler();
691
692
693 private StringBuffer tmpDocContent = new StringBuffer("");
694
695
696 private gate.Document doc = null;
697
698
699 protected List myStatusListeners = new LinkedList();
700
701
702 private int elements = 0;
703
704
709 private List colector = null;
710
713 private Map id2Offset = new TreeMap();
714
715 private Stack currentElementStack = new Stack();
716
719 private AnnotationObject currentAnnot = null;
720
721 private FeatureMap currentFeatureMap = null;
722
723 private String currentFeatureName = null;
724
725 private String currentFeatureValue = null;
726
727 private String currentFeatureKeyClassName = null;
728
731 private String currentFeatureKeyItemClassName = null;
732
733 private String currentFeatureValueClassName = null;
734
737 private String currentFeatureValueItemClassName = null;
738
741 private AnnotationSet currentAnnotationSet = null;
742
743
744 class AnnotationObject {
745
746 public AnnotationObject(){}
748 public String getElemName(){
749 return elemName;
750 }
752 public FeatureMap getFM(){
753 return fm;
754 }
756 public Long getStart(){
757 return start;
758 }
760 public Long getEnd(){
761 return end;
762 }
764 public void setElemName(String anElemName){
765 elemName = anElemName;
766 }
768 public void setFM(FeatureMap aFm){
769 fm = aFm;
770 }
772 public void setStart(Long aStart){
773 start = aStart;
774 }
776 public void setEnd(Long anEnd){
777 end = anEnd;
778 }
780 public String toString(){
781 return " [type=" + elemName +
782 " startNode=" + start+
783 " endNode=" + end+
784 " features="+ fm +"] ";
785 }
786 private String elemName = null;
788 private FeatureMap fm = null;
789 private Long start = null;
790 private Long end = null;
791 } }
794