1
15
16 package gate.corpora;
17
18 import java.io.*;
19 import java.net.URL;
20 import java.net.UnknownHostException;
21 import java.util.*;
22
23 import junit.framework.*;
24
25 import gate.*;
26 import gate.util.Err;
27 import gate.util.GateException;
28
29
31 public class TestDocument extends TestCase
32 {
33
34
35 private static final boolean DEBUG = false;
36
37
38 public TestDocument(String name) { super(name); setUp();}
39
40
41 protected static String testServer = null;
42
43
44 protected String testDocument1;
45
46
47 public void setUp() {
48
49 try{
50 testServer = Gate.getUrl().toExternalForm();
52 } catch (GateException e){
53 e.printStackTrace(Err.getPrintWriter());
54 }
55
56 testDocument1 = "tests/html/test2.htm";
57 }
59
60 public static String getTestServerName() {
61 if(testServer != null) return testServer;
62 else{
63 try { testServer = Gate.getUrl().toExternalForm(); }
64 catch(Exception e) { }
65 return testServer;
66 }
67 }
68
69
70 public void testCompareTo() throws Exception{
71 Document doc1 = null;
72 Document doc2 = null;
73 Document doc3 = null;
74
75
76 doc1 = Factory.newDocument(new URL(testServer + "tests/def"));
77 doc2 = Factory.newDocument(new URL(testServer + "tests/defg"));
78 doc3 = Factory.newDocument(new URL(testServer + "tests/abc"));
79
80 assertTrue(doc1.compareTo(doc2) < 0);
81 assertTrue(doc1.compareTo(doc1) == 0);
82 assertTrue(doc1.compareTo(doc3) > 0);
83
84 }
86
87
88 public void testOriginalContentPreserving() throws Exception {
89 Document doc = null;
90 FeatureMap params;
91 String encoding = "UTF-8";
92 String origContent;
93
94 params = Factory.newFeatureMap();
96 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(testServer + testDocument1));
97 params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
98 doc =
99 (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
100
101 origContent = (String) doc.getFeatures().get(
102 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
103
104 assertNull(
105 "The original content should not be preserved without demand.",
106 origContent);
107
108 params = Factory.newFeatureMap();
109 params.put(Document.DOCUMENT_URL_PARAMETER_NAME,
110 new URL(testServer + testDocument1));
111 params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding);
112 params.put(Document.DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME, new Boolean(true));
113 doc =
114 (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
115
116 origContent = (String) doc.getFeatures().get(
117 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
118
119 assertNotNull("The original content is not preserved on demand.",
120 origContent);
121
122 assertTrue("The original content size is zerro.", origContent.length()>0);
123 }
125
126 public void testLotsOfThings() {
127
128 URL u = null;
130 try{
131 u = new URL(testServer + testDocument1);
132 } catch (Exception e){
133 e.printStackTrace(Err.getPrintWriter());
134 }
135
136 BufferedReader uReader = null;
138 try {
139 uReader = new BufferedReader(new InputStreamReader(u.openStream()));
140 assertEquals(uReader.readLine(), "<HTML>");
141 } catch(UnknownHostException e) { return;
143 } catch(IOException e) {
144 fail(e.toString());
145 }
146
156 }
158
162 public static void verifyNodeIdConsistency(gate.Document doc)throws Exception{
163 if (doc == null) return;
164 Map offests2NodeId = new HashMap();
165 AnnotationSet annotSet = doc.getAnnotations();
167 verifyNodeIdConsistency(annotSet,offests2NodeId, doc);
168 if (doc.getNamedAnnotationSets() != null){
170 Iterator namedAnnotSetsIter =
171 doc.getNamedAnnotationSets().values().iterator();
172 while(namedAnnotSetsIter.hasNext()){
173 verifyNodeIdConsistency((gate.AnnotationSet) namedAnnotSetsIter.next(),
174 offests2NodeId,
175 doc);
176 } } offests2NodeId = null;
180 }
182
188 public static void verifyNodeIdConsistency(gate.AnnotationSet annotSet,
189 Map offests2NodeId,
190 gate.Document doc)
191 throws Exception{
192
193 if (annotSet == null || offests2NodeId == null) return;
194
195 Iterator iter = annotSet.iterator();
196 while(iter.hasNext()){
197 Annotation annot = (Annotation) iter.next();
198 String annotSetName = (annotSet.getName() == null)? "Default":
199 annotSet.getName();
200 if (offests2NodeId.containsKey(annot.getStartNode().getOffset())){
202 assertEquals("Found two different node IDs for the same offset( "+
203 annot.getStartNode().getOffset()+ " ).\n" +
204 "START NODE is buggy for annotation(" + annot +
205 ") from annotation set " + annotSetName + " of GATE document :" +
206 doc.getSourceUrl(),
207 annot.getStartNode().getId(),
208 (Integer) offests2NodeId.get(annot.getStartNode().getOffset()));
209 } if (offests2NodeId.containsKey(annot.getEndNode().getOffset())){
212 assertEquals("Found two different node IDs for the same offset("+
213 annot.getEndNode().getOffset()+ ").\n" +
214 "END NODE is buggy for annotation(" + annot+ ") from annotation"+
215 " set " + annotSetName +" of GATE document :" + doc.getSourceUrl(),
216 annot.getEndNode().getId(),
217 (Integer) offests2NodeId.get(annot.getEndNode().getOffset()));
218 } offests2NodeId.put(annot.getStartNode().getOffset(),
220 annot.getStartNode().getId());
221 offests2NodeId.put(annot.getEndNode().getOffset(),
222 annot.getEndNode().getId());
223 } }
226
227 public static Test suite() {
228 return new TestSuite(TestDocument.class);
229 }
231 }