1   /*
2    *  TestDocument.java
3    *
4    *  Copyright (c) 1998-2004, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 21/Jan/00
12   *
13   *  $Id: TestDocument.java,v 1.29 2004/08/02 16:37:55 valyt Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.io.*;
19  import java.net.URL;
20  import java.net.UnknownHostException;
21  import java.util.*;
22  
23  import junit.framework.*;
24  
25  import gate.*;
26  import gate.util.Err;
27  import gate.util.GateException;
28  
29  /** Tests for the Document classes
30    */
31  public class TestDocument extends TestCase
32  {
33  
34    /** Debug flag */
35    private static final boolean DEBUG = false;
36  
37    /** Construction */
38    public TestDocument(String name) { super(name); setUp();}
39  
40    /** Base of the test server URL */
41    protected static String testServer = null;
42  
43    /** Name of test document 1 */
44    protected String testDocument1;
45  
46    /** Fixture set up */
47    public void setUp() {
48  
49      try{
50  //      Gate.init();
51        testServer = Gate.getUrl().toExternalForm();
52      } catch (GateException e){
53        e.printStackTrace(Err.getPrintWriter());
54      }
55  
56      testDocument1 = "tests/html/test2.htm";
57    } // setUp
58  
59    /** Get the name of the test server */
60    public static String getTestServerName() {
61      if(testServer != null) return testServer;
62      else{
63        try { testServer = Gate.getUrl().toExternalForm(); }
64        catch(Exception e) { }
65        return testServer;
66      }
67    }
68  
69    /** Test ordering */
70    public void testCompareTo() throws Exception{
71      Document doc1 = null;
72      Document doc2 = null;
73      Document doc3 = null;
74  
75  
76      doc1 = Factory.newDocument(new URL(testServer + "tests/def"));
77      doc2 = Factory.newDocument(new URL(testServer + "tests/defg"));
78      doc3 = Factory.newDocument(new URL(testServer + "tests/abc"));
79  
80      assertTrue(doc1.compareTo(doc2) < 0);
81      assertTrue(doc1.compareTo(doc1) == 0);
82      assertTrue(doc1.compareTo(doc3) > 0);
83  
84    } // testCompareTo()
85  
86    /** Test loading of the original document content */
87  
88    public void testOriginalContentPreserving() throws Exception {
89      Document doc = null;
90      FeatureMap params;
91      String encoding = "UTF-8";
92      String origContent;
93  
94      // test the default value of preserve content flag
95      params = Factory.newFeatureMap();
96      params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(testServer + testDocument1));
97      params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
98      doc =
99        (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
100 
101     origContent = (String) doc.getFeatures().get(
102       GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
103 
104     assertNull(
105       "The original content should not be preserved without demand.",
106       origContent);
107 
108     params = Factory.newFeatureMap();
109     params.put(Document.DOCUMENT_URL_PARAMETER_NAME,
110       new URL(testServer + testDocument1));
111     params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
112     params.put(Document.DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME, new Boolean(true));
113     doc =
114       (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
115 
116     origContent = (String) doc.getFeatures().get(
117       GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
118 
119     assertNotNull("The original content is not preserved on demand.",
120               origContent);
121 
122     assertTrue("The original content size is zerro.", origContent.length()>0);
123   } // testOriginalContentPreserving()
124 
125   /** A comprehensive test */
126   public void testLotsOfThings() {
127 
128     // check that the test URL is available
129     URL u = null;
130     try{
131       u = new URL(testServer + testDocument1);
132     } catch (Exception e){
133       e.printStackTrace(Err.getPrintWriter());
134     }
135 
136     // get some text out of the test URL
137     BufferedReader uReader = null;
138     try {
139       uReader = new BufferedReader(new InputStreamReader(u.openStream()));
140       assertEquals(uReader.readLine(), "<HTML>");
141     } catch(UnknownHostException e) { // no network connection
142       return;
143     } catch(IOException e) {
144       fail(e.toString());
145     }
146     /*
147     Document doc = new TextualDocument(testServer + testDocument1);
148     AnnotationGraph ag = new AnnotationGraphImpl();
149 
150     Tokeniser t = ...   doc.getContent()
151     tokenise doc using java stream tokeniser
152 
153     add several thousand token annotation
154     select a subset
155     */
156   } // testLotsOfThings
157 
158   /** The reason this is method begins with verify and not with test is that it
159    *  gets called by various other test methods. It is somehow a utility test
160    *  method. It should be called on all gate documents having annotation sets.
161    */
162   public static void verifyNodeIdConsistency(gate.Document doc)throws Exception{
163       if (doc == null) return;
164       Map offests2NodeId = new HashMap();
165       // Test the default annotation set
166       AnnotationSet annotSet = doc.getAnnotations();
167       verifyNodeIdConsistency(annotSet,offests2NodeId, doc);
168       // Test all named annotation sets
169       if (doc.getNamedAnnotationSets() != null){
170         Iterator namedAnnotSetsIter =
171                               doc.getNamedAnnotationSets().values().iterator();
172         while(namedAnnotSetsIter.hasNext()){
173          verifyNodeIdConsistency((gate.AnnotationSet) namedAnnotSetsIter.next(),
174                                                                  offests2NodeId,
175                                                                  doc);
176         }// End while
177       }// End if
178       // Test suceeded. The map is not needed anymore.
179       offests2NodeId = null;
180   }// verifyNodeIdConsistency();
181 
182   /** This metod runs the test over an annotation Set. It is called from her
183    *  older sister. Se above.
184    *  @param annotSet is the annotation set being tested.
185    *  @param offests2NodeId is the Map used to test the consistency.
186    *  @param doc is used in composing the assert error messsage.
187    */
188   public static void verifyNodeIdConsistency(gate.AnnotationSet annotSet,
189                                              Map  offests2NodeId,
190                                              gate.Document doc)
191                                                               throws Exception{
192 
193       if (annotSet == null || offests2NodeId == null) return;
194 
195       Iterator iter = annotSet.iterator();
196       while(iter.hasNext()){
197         Annotation annot = (Annotation) iter.next();
198         String annotSetName = (annotSet.getName() == null)? "Default":
199                                                           annotSet.getName();
200         // check the Start node
201         if (offests2NodeId.containsKey(annot.getStartNode().getOffset())){
202              assertEquals("Found two different node IDs for the same offset( "+
203              annot.getStartNode().getOffset()+ " ).\n" +
204              "START NODE is buggy for annotation(" + annot +
205              ") from annotation set " + annotSetName + " of GATE document :" +
206              doc.getSourceUrl(),
207              annot.getStartNode().getId(),
208              (Integer) offests2NodeId.get(annot.getStartNode().getOffset()));
209         }// End if
210         // Check the End node
211         if (offests2NodeId.containsKey(annot.getEndNode().getOffset())){
212              assertEquals("Found two different node IDs for the same offset("+
213              annot.getEndNode().getOffset()+ ").\n" +
214              "END NODE is buggy for annotation(" + annot+ ") from annotation"+
215              " set " + annotSetName +" of GATE document :" + doc.getSourceUrl(),
216              annot.getEndNode().getId(),
217              (Integer) offests2NodeId.get(annot.getEndNode().getOffset()));
218         }// End if
219         offests2NodeId.put(annot.getStartNode().getOffset(),
220                                                   annot.getStartNode().getId());
221         offests2NodeId.put(annot.getEndNode().getOffset(),
222                                                     annot.getEndNode().getId());
223     }// End while
224   }//verifyNodeIdConsistency();
225 
226   /** Test suite routine for the test runner */
227   public static Test suite() {
228     return new TestSuite(TestDocument.class);
229   } // suite
230 
231 } // class TestDocument
232