1
15
16 package gate.xml;
17
18 import java.io.File;
19 import java.net.URL;
20 import java.util.*;
21
22 import junit.framework.*;
23
24 import gate.*;
25 import gate.util.Files;
26
28
29
32 public class TestXml extends TestCase
33 {
34
35 private static final boolean DEBUG = false;
36
37
38 public TestXml(String name) { super(name); }
39
40
41 public void setUp() {
42 }
44 public void testGateDocumentToAndFromXmlWithDifferentKindOfFormats()
45 throws Exception{
46 List urlList = new LinkedList();
47 List urlDescription = new LinkedList();
48 URL url = null;
49
50 url = Gate.getUrl("tests/xml/xces.xml");
51 assertTrue("Coudn't create a URL object for tests/xml/xces.xml ", url != null);
52 urlList.add(url);
53 urlDescription.add(" an XML document ");
54
55 url = Gate.getUrl("tests/xml/Sentence.xml");
56 assertTrue("Coudn't create a URL object for tests/xml/Sentence.xml",
57 url != null);
58 urlList.add(url);
59 urlDescription.add(" an XML document ");
60
61 url = Gate.getUrl("tests/html/test1.htm");
62 assertTrue("Coudn't create a URL object for tests/html/test.htm",url != null);
63 urlList.add(url);
64 urlDescription.add(" an HTML document ");
65
66 url = Gate.getUrl("tests/rtf/Sample.rtf");
67 assertTrue("Coudn't create a URL object for defg ",url != null);
68 urlList.add(url);
69 urlDescription.add(" a RTF document ");
70
71
72 url = Gate.getUrl("tests/email/test2.eml");
73 assertTrue("Coudn't create a URL object for defg ",url != null);
74 urlList.add(url);
75 urlDescription.add(" an EMAIL document ");
76
77 Iterator iter = urlList.iterator();
78 Iterator descrIter = urlDescription.iterator();
79 while(iter.hasNext()){
80 runCompleteTestWithAFormat((URL) iter.next(),(String)descrIter.next());
81 }
83
84 }
86 private void runCompleteTestWithAFormat(URL url, String urlDescription)
87 throws Exception{
88 gate.Document keyDocument = null;
90
91 FeatureMap params = Factory.newFeatureMap();
92 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url);
93 params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
94 keyDocument = (Document)Factory.createResource("gate.corpora.DocumentImpl",
95 params);
96
97 assertTrue("Coudn't create a GATE document instance for " +
98 url.toString() +
99 " Can't continue." , keyDocument != null);
100
101 gate.DocumentFormat keyDocFormat = null;
102 keyDocFormat = gate.DocumentFormat.getDocumentFormat(
103 keyDocument, keyDocument.getSourceUrl()
104 );
105
106 assertTrue("Fail to recognize " +
107 url.toString() +
108 " as being " + urlDescription + " !", keyDocFormat != null);
109
110 keyDocFormat.unpackMarkup(keyDocument);
112 gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument);
114
115 long keyDocumentSize = keyDocument.getContent().size().longValue();
117 int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size();
118
119
120 File xmlFile = null;
123 xmlFile = Files.writeTempFile(keyDocument.toXml(),"UTF-8");
124 assertTrue("The temp GATE XML file is null. Can't continue.",xmlFile != null);
125
134 gate.Document gateDoc = null;
136 gateDoc = gate.Factory.newDocument(xmlFile.toURL());
137
138 assertTrue("Coudn't create a GATE document instance for " +
139 xmlFile.toURL().toString() +
140 " Can't continue." , gateDoc != null);
141
142 gate.DocumentFormat gateDocFormat = null;
143 gateDocFormat =
144 DocumentFormat.getDocumentFormat(gateDoc,gateDoc.getSourceUrl());
145
146 assertTrue("Fail to recognize " +
147 xmlFile.toURL().toString() +
148 " as being a GATE XML document !", gateDocFormat != null);
149
150 gateDocFormat.unpackMarkup(gateDoc);
151 gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc);
153
154 long gateDocSize = keyDocument.getContent().size().longValue();
156 int gateDocAnnotationSetSize = keyDocument.getAnnotations().size();
157
158 assertTrue("Exporting as GATE XML resulted in document content size lost." +
159 " Something went wrong.", keyDocumentSize == gateDocSize);
160
161 assertTrue("Exporting as GATE XML resulted in annotation lost." +
162 " No. of annotations missing = " +
163 Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize),
164 keyDocumentAnnotationSetSize == gateDocAnnotationSetSize);
165
166 xmlFile.delete();
168 }
170
171 public void testUnpackMarkup() throws Exception{
172 Map markupElementsMap = null;
174 gate.Document doc = null;
175
181 Map anElement2StringMap = null;
183 anElement2StringMap = new HashMap();
184 anElement2StringMap.put("S","\n");
186 anElement2StringMap.put("s","\n");
187
188 doc = gate.Factory.newDocument(Gate.getUrl("tests/xml/xces.xml"));
189
191 AnnotationSet annotSet = doc.getAnnotations(
192 GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
193 assertEquals("For "+doc.getSourceUrl()+" the number of annotations"+
194 " should be:758",758,annotSet.size());
195
196 gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
197 }
199
200 public static Test suite() {
201 return new TestSuite(TestXml.class);
202 }
204 }