1   /*
2    *  TestDocument.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 21/Jan/00
12   *
13   *  $Id: TestDocument.java,v 1.23 2001/10/30 12:45:34 valyt Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.util.*;
19  import java.net.*;
20  import java.io.*;
21  import junit.framework.*;
22  
23  import gate.*;
24  import gate.util.*;
25  import gate.annotation.*;
26  
27  /** Tests for the Document classes
28    */
29  public class TestDocument extends TestCase
30  {
31  
32    /** Debug flag */
33    private static final boolean DEBUG = false;
34  
35    /** Construction */
36    public TestDocument(String name) { super(name); setUp();}
37  
38    /** Base of the test server URL */
39    protected static String testServer = null;
40  
41    /** Name of test document 1 */
42    protected String testDocument1;
43  
44    /** Fixture set up */
45    public void setUp() {
46  
47      try{
48        Gate.init();
49        testServer = Gate.getUrl().toExternalForm();
50      } catch (GateException e){
51        e.printStackTrace(Err.getPrintWriter());
52      }
53  
54      testDocument1 = "tests/html/test2.htm";
55    } // setUp
56  
57    /** Get the name of the test server */
58    public static String getTestServerName() {
59      if(testServer != null) return testServer;
60      else{
61        try { testServer = Gate.getUrl().toExternalForm(); }
62        catch(Exception e) { }
63        return testServer;
64      }
65    }
66  
67    /** Test ordering */
68    public void testCompareTo() throws Exception{
69      Document doc1 = null;
70      Document doc2 = null;
71      Document doc3 = null;
72  
73  
74      doc1 = Factory.newDocument(new URL(testServer + "tests/def"));
75      doc2 = Factory.newDocument(new URL(testServer + "tests/defg"));
76      doc3 = Factory.newDocument(new URL(testServer + "tests/abc"));
77  
78      assertTrue(doc1.compareTo(doc2) < 0);
79      assertTrue(doc1.compareTo(doc1) == 0);
80      assertTrue(doc1.compareTo(doc3) > 0);
81  
82    } // testCompareTo()
83  
84    /** A comprehensive test */
85    public void testLotsOfThings() {
86  
87      // check that the test URL is available
88      URL u = null;
89      try{
90        u = new URL(testServer + testDocument1);
91      } catch (Exception e){
92        e.printStackTrace(Err.getPrintWriter());
93      }
94  
95      // get some text out of the test URL
96      BufferedReader uReader = null;
97      try {
98        uReader = new BufferedReader(new InputStreamReader(u.openStream()));
99        assertEquals(uReader.readLine(), "<HTML>");
100     } catch(UnknownHostException e) { // no network connection
101       return;
102     } catch(IOException e) {
103       fail(e.toString());
104     }
105     /*
106     Document doc = new TextualDocument(testServer + testDocument1);
107     AnnotationGraph ag = new AnnotationGraphImpl();
108 
109     Tokeniser t = ...   doc.getContent()
110     tokenise doc using java stream tokeniser
111 
112     add several thousand token annotation
113     select a subset
114     */
115   } // testLotsOfThings
116 
117   /** The reason this is method begins with verify and not with test is that it
118    *  gets called by various other test methods. It is somehow a utility test
119    *  method. It should be called on all gate documents having annotation sets.
120    */
121   public static void verifyNodeIdConsistency(gate.Document doc)throws Exception{
122       if (doc == null) return;
123       Map offests2NodeId = new HashMap();
124       // Test the default annotation set
125       AnnotationSet annotSet = doc.getAnnotations();
126       verifyNodeIdConsistency(annotSet,offests2NodeId, doc);
127       // Test all named annotation sets
128       if (doc.getNamedAnnotationSets() != null){
129         Iterator namedAnnotSetsIter =
130                               doc.getNamedAnnotationSets().values().iterator();
131         while(namedAnnotSetsIter.hasNext()){
132          verifyNodeIdConsistency((gate.AnnotationSet) namedAnnotSetsIter.next(),
133                                                                  offests2NodeId,
134                                                                  doc);
135         }// End while
136       }// End if
137       // Test suceeded. The map is not needed anymore.
138       offests2NodeId = null;
139   }// verifyNodeIdConsistency();
140 
141   /** This metod runs the test over an annotation Set. It is called from her
142    *  older sister. Se above.
143    *  @param annotSet is the annotation set being tested.
144    *  @param offests2NodeId is the Map used to test the consistency.
145    *  @param doc is used in composing the assert error messsage.
146    */
147   public static void verifyNodeIdConsistency(gate.AnnotationSet annotSet,
148                                              Map  offests2NodeId,
149                                              gate.Document doc)
150                                                               throws Exception{
151 
152       if (annotSet == null || offests2NodeId == null) return;
153 
154       Iterator iter = annotSet.iterator();
155       while(iter.hasNext()){
156         Annotation annot = (Annotation) iter.next();
157         String annotSetName = (annotSet.getName() == null)? "Default":
158                                                           annotSet.getName();
159         // check the Start node
160         if (offests2NodeId.containsKey(annot.getStartNode().getOffset())){
161              assertEquals("Found two different node IDs for the same offset( "+
162              annot.getStartNode().getOffset()+ " ).\n" +
163              "START NODE is buggy for annotation(" + annot +
164              ") from annotation set " + annotSetName + " of GATE document :" +
165              doc.getSourceUrl(),
166              annot.getStartNode().getId(),
167              (Integer) offests2NodeId.get(annot.getStartNode().getOffset()));
168         }// End if
169         // Check the End node
170         if (offests2NodeId.containsKey(annot.getEndNode().getOffset())){
171              assertEquals("Found two different node IDs for the same offset("+
172              annot.getEndNode().getOffset()+ ").\n" +
173              "END NODE is buggy for annotation(" + annot+ ") from annotation"+
174              " set " + annotSetName +" of GATE document :" + doc.getSourceUrl(),
175              annot.getEndNode().getId(),
176              (Integer) offests2NodeId.get(annot.getEndNode().getOffset()));
177         }// End if
178         offests2NodeId.put(annot.getStartNode().getOffset(),
179                                                   annot.getStartNode().getId());
180         offests2NodeId.put(annot.getEndNode().getOffset(),
181                                                     annot.getEndNode().getId());
182     }// End while
183   }//verifyNodeIdConsistency();
184 
185   /** Test suite routine for the test runner */
186   public static Test suite() {
187     return new TestSuite(TestDocument.class);
188   } // suite
189 
190 } // class TestDocument
191