1   /*
2    *  TestDocument.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 21/Jan/00
12   *
13   *  $Id: TestDocument.java,v 1.26 2002/03/06 17:15:38 kalina Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.util.*;
19  import java.net.*;
20  import java.io.*;
21  import junit.framework.*;
22  
23  import gate.*;
24  import gate.util.*;
25  import gate.annotation.*;
26  
27  /** Tests for the Document classes
28    */
29  public class TestDocument extends TestCase
30  {
31  
32    /** Debug flag */
33    private static final boolean DEBUG = false;
34  
35    /** Construction */
36    public TestDocument(String name) { super(name); setUp();}
37  
38    /** Base of the test server URL */
39    protected static String testServer = null;
40  
41    /** Name of test document 1 */
42    protected String testDocument1;
43  
44    /** Fixture set up */
45    public void setUp() {
46  
47      try{
48        Gate.init();
49        testServer = Gate.getUrl().toExternalForm();
50      } catch (GateException e){
51        e.printStackTrace(Err.getPrintWriter());
52      }
53  
54      testDocument1 = "tests/html/test2.htm";
55    } // setUp
56  
57    /** Get the name of the test server */
58    public static String getTestServerName() {
59      if(testServer != null) return testServer;
60      else{
61        try { testServer = Gate.getUrl().toExternalForm(); }
62        catch(Exception e) { }
63        return testServer;
64      }
65    }
66  
67    /** Test ordering */
68    public void testCompareTo() throws Exception{
69      Document doc1 = null;
70      Document doc2 = null;
71      Document doc3 = null;
72  
73  
74      doc1 = Factory.newDocument(new URL(testServer + "tests/def"));
75      doc2 = Factory.newDocument(new URL(testServer + "tests/defg"));
76      doc3 = Factory.newDocument(new URL(testServer + "tests/abc"));
77  
78      assertTrue(doc1.compareTo(doc2) < 0);
79      assertTrue(doc1.compareTo(doc1) == 0);
80      assertTrue(doc1.compareTo(doc3) > 0);
81  
82    } // testCompareTo()
83  
84    /** Test loading of the original document content */
85  
86    public void testOriginalContentPreserving() throws Exception {
87      Document doc = null;
88      FeatureMap params;
89      String encoding = "UTF-8";
90      String origContent;
91  
92      // test the default value of preserve content flag
93      params = Factory.newFeatureMap();
94      params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(testServer + testDocument1));
95      params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
96      doc =
97        (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
98  
99      origContent = (String) doc.getFeatures().get(
100       GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
101 
102     assertNull(
103       "The original content should not be preserved without demand.",
104       origContent);
105 
106     params = Factory.newFeatureMap();
107     params.put(Document.DOCUMENT_URL_PARAMETER_NAME,
108       new URL(testServer + testDocument1));
109     params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
110     params.put(Document.DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME, new Boolean(true));
111     doc =
112       (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
113 
114     origContent = (String) doc.getFeatures().get(
115       GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
116 
117     assertNotNull("The original content is not preserved on demand.",
118               origContent);
119 
120     assertTrue("The original content size is zerro.", origContent.length()>0);
121   } // testOriginalContentPreserving()
122 
123   /** A comprehensive test */
124   public void testLotsOfThings() {
125 
126     // check that the test URL is available
127     URL u = null;
128     try{
129       u = new URL(testServer + testDocument1);
130     } catch (Exception e){
131       e.printStackTrace(Err.getPrintWriter());
132     }
133 
134     // get some text out of the test URL
135     BufferedReader uReader = null;
136     try {
137       uReader = new BufferedReader(new InputStreamReader(u.openStream()));
138       assertEquals(uReader.readLine(), "<HTML>");
139     } catch(UnknownHostException e) { // no network connection
140       return;
141     } catch(IOException e) {
142       fail(e.toString());
143     }
144     /*
145     Document doc = new TextualDocument(testServer + testDocument1);
146     AnnotationGraph ag = new AnnotationGraphImpl();
147 
148     Tokeniser t = ...   doc.getContent()
149     tokenise doc using java stream tokeniser
150 
151     add several thousand token annotation
152     select a subset
153     */
154   } // testLotsOfThings
155 
156   /** The reason this is method begins with verify and not with test is that it
157    *  gets called by various other test methods. It is somehow a utility test
158    *  method. It should be called on all gate documents having annotation sets.
159    */
160   public static void verifyNodeIdConsistency(gate.Document doc)throws Exception{
161       if (doc == null) return;
162       Map offests2NodeId = new HashMap();
163       // Test the default annotation set
164       AnnotationSet annotSet = doc.getAnnotations();
165       verifyNodeIdConsistency(annotSet,offests2NodeId, doc);
166       // Test all named annotation sets
167       if (doc.getNamedAnnotationSets() != null){
168         Iterator namedAnnotSetsIter =
169                               doc.getNamedAnnotationSets().values().iterator();
170         while(namedAnnotSetsIter.hasNext()){
171          verifyNodeIdConsistency((gate.AnnotationSet) namedAnnotSetsIter.next(),
172                                                                  offests2NodeId,
173                                                                  doc);
174         }// End while
175       }// End if
176       // Test suceeded. The map is not needed anymore.
177       offests2NodeId = null;
178   }// verifyNodeIdConsistency();
179 
180   /** This metod runs the test over an annotation Set. It is called from her
181    *  older sister. Se above.
182    *  @param annotSet is the annotation set being tested.
183    *  @param offests2NodeId is the Map used to test the consistency.
184    *  @param doc is used in composing the assert error messsage.
185    */
186   public static void verifyNodeIdConsistency(gate.AnnotationSet annotSet,
187                                              Map  offests2NodeId,
188                                              gate.Document doc)
189                                                               throws Exception{
190 
191       if (annotSet == null || offests2NodeId == null) return;
192 
193       Iterator iter = annotSet.iterator();
194       while(iter.hasNext()){
195         Annotation annot = (Annotation) iter.next();
196         String annotSetName = (annotSet.getName() == null)? "Default":
197                                                           annotSet.getName();
198         // check the Start node
199         if (offests2NodeId.containsKey(annot.getStartNode().getOffset())){
200              assertEquals("Found two different node IDs for the same offset( "+
201              annot.getStartNode().getOffset()+ " ).\n" +
202              "START NODE is buggy for annotation(" + annot +
203              ") from annotation set " + annotSetName + " of GATE document :" +
204              doc.getSourceUrl(),
205              annot.getStartNode().getId(),
206              (Integer) offests2NodeId.get(annot.getStartNode().getOffset()));
207         }// End if
208         // Check the End node
209         if (offests2NodeId.containsKey(annot.getEndNode().getOffset())){
210              assertEquals("Found two different node IDs for the same offset("+
211              annot.getEndNode().getOffset()+ ").\n" +
212              "END NODE is buggy for annotation(" + annot+ ") from annotation"+
213              " set " + annotSetName +" of GATE document :" + doc.getSourceUrl(),
214              annot.getEndNode().getId(),
215              (Integer) offests2NodeId.get(annot.getEndNode().getOffset()));
216         }// End if
217         offests2NodeId.put(annot.getStartNode().getOffset(),
218                                                   annot.getStartNode().getId());
219         offests2NodeId.put(annot.getEndNode().getOffset(),
220                                                     annot.getEndNode().getId());
221     }// End while
222   }//verifyNodeIdConsistency();
223 
224   /** Test suite routine for the test runner */
225   public static Test suite() {
226     return new TestSuite(TestDocument.class);
227   } // suite
228 
229 } // class TestDocument
230