|
TestDocument |
|
1 /* 2 * TestDocument.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 21/Jan/00 12 * 13 * $Id: TestDocument.java,v 1.26 2002/03/06 17:15:38 kalina Exp $ 14 */ 15 16 package gate.corpora; 17 18 import java.util.*; 19 import java.net.*; 20 import java.io.*; 21 import junit.framework.*; 22 23 import gate.*; 24 import gate.util.*; 25 import gate.annotation.*; 26 27 /** Tests for the Document classes 28 */ 29 public class TestDocument extends TestCase 30 { 31 32 /** Debug flag */ 33 private static final boolean DEBUG = false; 34 35 /** Construction */ 36 public TestDocument(String name) { super(name); setUp();} 37 38 /** Base of the test server URL */ 39 protected static String testServer = null; 40 41 /** Name of test document 1 */ 42 protected String testDocument1; 43 44 /** Fixture set up */ 45 public void setUp() { 46 47 try{ 48 Gate.init(); 49 testServer = Gate.getUrl().toExternalForm(); 50 } catch (GateException e){ 51 e.printStackTrace(Err.getPrintWriter()); 52 } 53 54 testDocument1 = "tests/html/test2.htm"; 55 } // setUp 56 57 /** Get the name of the test server */ 58 public static String getTestServerName() { 59 if(testServer != null) return testServer; 60 else{ 61 try { testServer = Gate.getUrl().toExternalForm(); } 62 catch(Exception e) { } 63 return testServer; 64 } 65 } 66 67 /** Test ordering */ 68 public void testCompareTo() throws Exception{ 69 Document doc1 = null; 70 Document doc2 = null; 71 Document doc3 = null; 72 73 74 doc1 = Factory.newDocument(new URL(testServer + "tests/def")); 75 doc2 = Factory.newDocument(new URL(testServer + "tests/defg")); 76 doc3 = Factory.newDocument(new URL(testServer + "tests/abc")); 77 78 assertTrue(doc1.compareTo(doc2) < 0); 79 assertTrue(doc1.compareTo(doc1) == 0); 80 assertTrue(doc1.compareTo(doc3) > 0); 81 82 } // testCompareTo() 83 84 /** Test loading of the original document content */ 85 86 public void testOriginalContentPreserving() throws Exception { 87 Document doc = null; 88 FeatureMap params; 89 String encoding = "UTF-8"; 90 String origContent; 91 92 // test the default value of preserve content flag 93 params = Factory.newFeatureMap(); 94 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(testServer + testDocument1)); 95 params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding); 96 doc = 97 (Document) Factory.createResource("gate.corpora.DocumentImpl", params); 98 99 origContent = (String) doc.getFeatures().get( 100 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); 101 102 assertNull( 103 "The original content should not be preserved without demand.", 104 origContent); 105 106 params = Factory.newFeatureMap(); 107 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, 108 new URL(testServer + testDocument1)); 109 params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, encoding); 110 params.put(Document.DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME, new Boolean(true)); 111 doc = 112 (Document) Factory.createResource("gate.corpora.DocumentImpl", params); 113 114 origContent = (String) doc.getFeatures().get( 115 GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); 116 117 assertNotNull("The original content is not preserved on demand.", 118 origContent); 119 120 assertTrue("The original content size is zerro.", origContent.length()>0); 121 } // testOriginalContentPreserving() 122 123 /** A comprehensive test */ 124 public void testLotsOfThings() { 125 126 // check that the test URL is available 127 URL u = null; 128 try{ 129 u = new URL(testServer + testDocument1); 130 } catch (Exception e){ 131 e.printStackTrace(Err.getPrintWriter()); 132 } 133 134 // get some text out of the test URL 135 BufferedReader uReader = null; 136 try { 137 uReader = new BufferedReader(new InputStreamReader(u.openStream())); 138 assertEquals(uReader.readLine(), "<HTML>"); 139 } catch(UnknownHostException e) { // no network connection 140 return; 141 } catch(IOException e) { 142 fail(e.toString()); 143 } 144 /* 145 Document doc = new TextualDocument(testServer + testDocument1); 146 AnnotationGraph ag = new AnnotationGraphImpl(); 147 148 Tokeniser t = ... doc.getContent() 149 tokenise doc using java stream tokeniser 150 151 add several thousand token annotation 152 select a subset 153 */ 154 } // testLotsOfThings 155 156 /** The reason this is method begins with verify and not with test is that it 157 * gets called by various other test methods. It is somehow a utility test 158 * method. It should be called on all gate documents having annotation sets. 159 */ 160 public static void verifyNodeIdConsistency(gate.Document doc)throws Exception{ 161 if (doc == null) return; 162 Map offests2NodeId = new HashMap(); 163 // Test the default annotation set 164 AnnotationSet annotSet = doc.getAnnotations(); 165 verifyNodeIdConsistency(annotSet,offests2NodeId, doc); 166 // Test all named annotation sets 167 if (doc.getNamedAnnotationSets() != null){ 168 Iterator namedAnnotSetsIter = 169 doc.getNamedAnnotationSets().values().iterator(); 170 while(namedAnnotSetsIter.hasNext()){ 171 verifyNodeIdConsistency((gate.AnnotationSet) namedAnnotSetsIter.next(), 172 offests2NodeId, 173 doc); 174 }// End while 175 }// End if 176 // Test suceeded. The map is not needed anymore. 177 offests2NodeId = null; 178 }// verifyNodeIdConsistency(); 179 180 /** This metod runs the test over an annotation Set. It is called from her 181 * older sister. Se above. 182 * @param annotSet is the annotation set being tested. 183 * @param offests2NodeId is the Map used to test the consistency. 184 * @param doc is used in composing the assert error messsage. 185 */ 186 public static void verifyNodeIdConsistency(gate.AnnotationSet annotSet, 187 Map offests2NodeId, 188 gate.Document doc) 189 throws Exception{ 190 191 if (annotSet == null || offests2NodeId == null) return; 192 193 Iterator iter = annotSet.iterator(); 194 while(iter.hasNext()){ 195 Annotation annot = (Annotation) iter.next(); 196 String annotSetName = (annotSet.getName() == null)? "Default": 197 annotSet.getName(); 198 // check the Start node 199 if (offests2NodeId.containsKey(annot.getStartNode().getOffset())){ 200 assertEquals("Found two different node IDs for the same offset( "+ 201 annot.getStartNode().getOffset()+ " ).\n" + 202 "START NODE is buggy for annotation(" + annot + 203 ") from annotation set " + annotSetName + " of GATE document :" + 204 doc.getSourceUrl(), 205 annot.getStartNode().getId(), 206 (Integer) offests2NodeId.get(annot.getStartNode().getOffset())); 207 }// End if 208 // Check the End node 209 if (offests2NodeId.containsKey(annot.getEndNode().getOffset())){ 210 assertEquals("Found two different node IDs for the same offset("+ 211 annot.getEndNode().getOffset()+ ").\n" + 212 "END NODE is buggy for annotation(" + annot+ ") from annotation"+ 213 " set " + annotSetName +" of GATE document :" + doc.getSourceUrl(), 214 annot.getEndNode().getId(), 215 (Integer) offests2NodeId.get(annot.getEndNode().getOffset())); 216 }// End if 217 offests2NodeId.put(annot.getStartNode().getOffset(), 218 annot.getStartNode().getId()); 219 offests2NodeId.put(annot.getEndNode().getOffset(), 220 annot.getEndNode().getId()); 221 }// End while 222 }//verifyNodeIdConsistency(); 223 224 /** Test suite routine for the test runner */ 225 public static Test suite() { 226 return new TestSuite(TestDocument.class); 227 } // suite 228 229 } // class TestDocument 230
|
TestDocument |
|