|
TestPR |
|
1 /* 2 * TestPR.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Oana Hamza, 12 * 13 * $Id: TestPR.java,v 1.22 2001/11/24 18:23:35 hamish Exp $ 14 */ 15 16 package gate.creole; 17 18 import java.util.*; 19 import java.io.*; 20 import java.net.*; 21 import junit.framework.*; 22 import gnu.regexp.*; 23 24 import gate.*; 25 import gate.util.*; 26 import gate.corpora.TestDocument; 27 import gate.creole.tokeniser.*; 28 import gate.creole.gazetteer.*; 29 import gate.creole.splitter.*; 30 import gate.creole.orthomatcher.*; 31 import gate.persist.*; 32 import gate.annotation.*; 33 34 /** Test the PRs on three documents */ 35 public class TestPR extends TestCase 36 { 37 /** Debug flag */ 38 private static final boolean DEBUG = false; 39 40 protected static Document doc1; 41 protected static Document doc2; 42 protected static Document doc3; 43 44 protected static List annotationTypes = new ArrayList(10); 45 46 /** Construction */ 47 public TestPR(String name) { super(name); } 48 49 /** Fixture set up */ 50 public void setUp() throws Exception { 51 //get 3 documents 52 if (doc1 == null) 53 doc1 = Factory.newDocument( 54 new URL(TestDocument.getTestServerName() + 55 "tests/ft-bt-03-aug-2001.html") 56 ); 57 58 if (doc2 == null) 59 doc2 = Factory.newDocument( 60 new URL(TestDocument.getTestServerName() + 61 "tests/gu-Am-Brit-4-aug-2001.html") 62 ); 63 64 if (doc3 == null) 65 doc3 = Factory.newDocument( 66 new URL(TestDocument.getTestServerName() + 67 "tests/in-outlook-09-aug-2001.html") 68 ); 69 70 annotationTypes.add("Sentence"); 71 annotationTypes.add("Organization"); 72 annotationTypes.add("Location"); 73 annotationTypes.add("Person"); 74 annotationTypes.add("Date"); 75 annotationTypes.add("Money"); 76 annotationTypes.add("Lookup"); 77 annotationTypes.add("Token"); 78 } // setUp 79 80 /** Put things back as they should be after running tests. 81 */ 82 public void tearDown() throws Exception { 83 } // tearDown 84 85 public void testTokenizer() throws Exception { 86 FeatureMap params = Factory.newFeatureMap(); 87 DefaultTokeniser tokeniser = (DefaultTokeniser) Factory.createResource( 88 "gate.creole.tokeniser.DefaultTokeniser", params); 89 90 91 //run the tokeniser for doc1 92 tokeniser.setDocument(doc1); 93 tokeniser.execute(); 94 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 95 doc1.getAnnotations().size() + 96 " Token annotations, instead of the expected 1286.", 97 doc1.getAnnotations().size()== 1286); 98 99 //run the tokeniser for doc2 100 tokeniser.setDocument(doc2); 101 tokeniser.execute(); 102 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 103 doc2.getAnnotations().size() + 104 " Token annotations, instead of the expected 2144.", 105 doc2.getAnnotations().size()== 2144); 106 107 //run the tokeniser for doc3 108 tokeniser.setDocument(doc3); 109 tokeniser.execute(); 110 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 111 doc3.getAnnotations().size() + 112 " Token annotations, instead of the expected 2812.", 113 doc3.getAnnotations().size()== 2812); 114 115 Factory.deleteResource(tokeniser); 116 }// testTokenizer 117 118 public void testGazetteer() throws Exception { 119 FeatureMap params = Factory.newFeatureMap(); 120 DefaultGazetteer gaz = (DefaultGazetteer) Factory.createResource( 121 "gate.creole.gazetteer.DefaultGazetteer", params); 122 123 //run gazetteer for doc1 124 gaz.setDocument(doc1); 125 gaz.execute(); 126 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 127 doc1.getAnnotations().get("Lookup").size() + 128 " Lookup annotations, instead of the expected 47.", 129 doc1.getAnnotations().get("Lookup").size()== 47); 130 131 //run gazetteer for doc2 132 gaz.setDocument(doc2); 133 gaz.execute(); 134 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 135 doc2.getAnnotations().get("Lookup").size() + 136 " Lookup annotations, instead of the expected 99.", 137 doc2.getAnnotations().get("Lookup").size()== 99); 138 139 //run gazetteer for doc3 140 gaz.setDocument(doc3); 141 gaz.execute(); 142 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 143 doc3.getAnnotations().get("Lookup").size() + 144 " Lookup annotations, instead of the expected 112.", 145 doc3.getAnnotations().get("Lookup").size()== 112); 146 Factory.deleteResource(gaz); 147 }//testGazetteer 148 149 public void testSplitter() throws Exception { 150 FeatureMap params = Factory.newFeatureMap(); 151 SentenceSplitter splitter = (SentenceSplitter) Factory.createResource( 152 "gate.creole.splitter.SentenceSplitter", params); 153 154 //run splitter for doc1 155 splitter.setDocument(doc1); 156 splitter.execute(); 157 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 158 doc1.getAnnotations().get("Sentence").size() + 159 " Sentence annotations, instead of the expected 22.", 160 doc1.getAnnotations().get("Sentence").size()== 22); 161 162 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 163 doc1.getAnnotations().get("Split").size() + 164 " Split annotations, instead of the expected 36.", 165 doc1.getAnnotations().get("Split").size()== 36); 166 167 168 //run splitter for doc2 169 splitter.setDocument(doc2); 170 splitter.execute(); 171 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 172 doc2.getAnnotations().get("Sentence").size() + 173 " Sentence annotations, instead of the expected 53.", 174 doc2.getAnnotations().get("Sentence").size()== 53); 175 176 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 177 doc2.getAnnotations().get("Split").size() + 178 " Split annotations, instead of the expected 71.", 179 doc2.getAnnotations().get("Split").size()== 71); 180 181 //run splitter for doc3 182 splitter.setDocument(doc3); 183 splitter.execute(); 184 185 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 186 doc3.getAnnotations().get("Sentence").size() + 187 " Sentence annotations, instead of the expected 65.", 188 doc3.getAnnotations().get("Sentence").size()== 65); 189 if (DEBUG) 190 Out.prln(doc3.getAnnotations().get("Sentence")); 191 192 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 193 doc3.getAnnotations().get("Split").size() + 194 " Split annotations, instead of the expected 85.", 195 doc3.getAnnotations().get("Split").size()== 85); 196 Factory.deleteResource(splitter); 197 }//testSplitter 198 199 public void testTagger() throws Exception { 200 FeatureMap params = Factory.newFeatureMap(); 201 POSTagger tagger = (POSTagger) Factory.createResource( 202 "gate.creole.POSTagger", params); 203 204 205 //run the tagger for doc1 206 tagger.setDocument(doc1); 207 tagger.execute(); 208 209 HashSet fType = new HashSet(); 210 fType.add("category"); 211 AnnotationSet annots = 212 doc1.getAnnotations().get("Token", fType); 213 214 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ annots.size() + 215 " Token annotations with category feature, instead of the expected 675.", 216 annots.size() == 675); 217 218 //run the tagger for doc2 219 tagger.setDocument(doc2); 220 tagger.execute(); 221 annots = doc2.getAnnotations().get("Token", fType); 222 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+annots.size() + 223 " Token annotations with category feature, instead of the expected 1131.", 224 annots.size() == 1131); 225 226 //run the tagger for doc3 227 tagger.setDocument(doc3); 228 tagger.execute(); 229 annots = doc3.getAnnotations().get("Token", fType); 230 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ annots.size() + 231 " Token annotations with category feature, instead of the expected 1426.", 232 annots.size() == 1426); 233 Factory.deleteResource(tagger); 234 }//testTagger() 235 236 public void testTransducer() throws Exception { 237 FeatureMap params = Factory.newFeatureMap(); 238 ANNIETransducer transducer = (ANNIETransducer) Factory.createResource( 239 "gate.creole.ANNIETransducer", params); 240 241 //run the transducer for doc1 242 transducer.setDocument(doc1); 243 transducer.execute(); 244 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 245 doc1.getAnnotations().get("Organization").size() + 246 " Organization annotations, instead of the expected 16", 247 doc1.getAnnotations().get("Organization").size()== 16); 248 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 249 doc1.getAnnotations().get("Location").size() + 250 " Location annotations, instead of the expected 3", 251 doc1.getAnnotations().get("Location").size()== 3); 252 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 253 doc1.getAnnotations().get("Person").size() + 254 " Person annotations, instead of the expected 3", 255 doc1.getAnnotations().get("Person").size()== 3); 256 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 257 doc1.getAnnotations().get("Date").size() + 258 " Date annotations, instead of the expected 6", 259 doc1.getAnnotations().get("Date").size()== 6); 260 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 261 doc1.getAnnotations().get("Money").size() + 262 " Money annotations, instead of the expected 1", 263 doc1.getAnnotations().get("Money").size()== 1); 264 265 //run the transducer for doc2 266 transducer.setDocument(doc2); 267 transducer.execute(); 268 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 269 doc2.getAnnotations().get("Organization").size() + 270 " Organization annotations, instead of the expected 18", 271 doc2.getAnnotations().get("Organization").size()== 18); 272 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 273 doc2.getAnnotations().get("Location").size() + 274 " Location annotations, instead of the expected 9", 275 doc2.getAnnotations().get("Location").size()== 9); 276 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 277 doc2.getAnnotations().get("Person").size() + 278 " Person annotations, instead of the expected 1", 279 doc2.getAnnotations().get("Person").size()== 1); 280 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 281 doc2.getAnnotations().get("Date").size() + 282 " Date annotations, instead of the expected 6", 283 doc2.getAnnotations().get("Date").size()== 6); 284 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 285 doc2.getAnnotations().get("Money").size() + 286 " Money annotations, instead of the expected 3", 287 doc2.getAnnotations().get("Money").size()== 3); 288 289 //run the transducer for doc3 290 transducer.setDocument(doc3); 291 transducer.execute(); 292 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 293 doc3.getAnnotations().get("Organization").size() + 294 " Organization annotations, instead of the expected 9", 295 doc3.getAnnotations().get("Organization").size()== 9); 296 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 297 doc3.getAnnotations().get("Location").size() + 298 " Location annotations, instead of the expected 12", 299 doc3.getAnnotations().get("Location").size()== 12); 300 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 301 doc3.getAnnotations().get("Person").size() + 302 " Person annotations, instead of the expected 8", 303 doc3.getAnnotations().get("Person").size()== 8); 304 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 305 doc3.getAnnotations().get("Date").size() + 306 " Date annotations, instead of the expected 7", 307 doc3.getAnnotations().get("Date").size()== 7); 308 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 309 doc3.getAnnotations().get("Money").size() + 310 " Money annotations, instead of the expected 4", 311 doc3.getAnnotations().get("Money").size()== 4); 312 313 Factory.deleteResource(transducer); 314 }//testTransducer 315 316 public void testOrthomatcher() throws Exception { 317 FeatureMap params = Factory.newFeatureMap(); 318 319 OrthoMatcher orthomatcher = (OrthoMatcher) Factory.createResource( 320 "gate.creole.orthomatcher.OrthoMatcher", params); 321 322 323 // run the orthomatcher for doc1 324 orthomatcher.setDocument(doc1); 325 orthomatcher.execute(); 326 327 HashSet fType = new HashSet(); 328 fType.add("matches"); 329 AnnotationSet annots = 330 doc1.getAnnotations().get(null,fType); 331 332 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ annots.size() + 333 " annotations with matches feature, instead of the expected 29.", 334 annots.size() == 29); 335 336 //run the orthomatcher for doc2 337 orthomatcher.setDocument(doc2); 338 orthomatcher.execute(); 339 annots = doc2.getAnnotations().get(null,fType); 340 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ annots.size() + 341 " annotations with matches feature, instead of the expected 35.", 342 annots.size() == 33); 343 344 //run the orthomatcher for doc3 345 orthomatcher.setDocument(doc3); 346 orthomatcher.execute(); 347 348 annots = doc3.getAnnotations().get(null,fType); 349 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ annots.size() + 350 " annotations with matches feature, instead of the expected 20.", 351 annots.size() == 20); 352 Factory.deleteResource(orthomatcher); 353 }//testOrthomatcher 354 355 /** A test for comparing the annotation sets*/ 356 public void testAllPR() throws Exception { 357 358 // verify if the saved data store is the same with the just processed file 359 // first document 360 String urlBaseName = Gate.locateGateFiles(); 361 // RE re1 = new RE("build/gate.jar!"); 362 // RE re2 = new RE("jar:"); 363 // urlBaseName = re1.substituteAll( urlBaseName,"classes"); 364 // urlBaseName = re2.substituteAll( urlBaseName,""); 365 366 if (urlBaseName.endsWith("/gate/build/gate.jar!/")) { 367 StringBuffer buff = new StringBuffer( 368 urlBaseName.substring( 369 0, 370 urlBaseName.lastIndexOf("build/gate.jar!/")) 371 ); 372 buff.append("classes/"); 373 buff.delete(0, "jar:file:".length()); 374 buff.insert(0, "file://"); 375 urlBaseName = buff.toString(); 376 } 377 378 URL urlBase = new URL(urlBaseName + "gate/resources/gate.ac.uk/"); 379 380 URL storageDir = null; 381 storageDir = new URL(urlBase, "tests/ft"); 382 383 //open the data store 384 DataStore ds = Factory.openDataStore 385 ("gate.persist.SerialDataStore", 386 storageDir.toExternalForm()); 387 388 //get LR id 389 String lrId = (String)ds.getLrIds 390 ("gate.corpora.DocumentImpl").get(0); 391 392 393 // get the document from data store 394 FeatureMap features = Factory.newFeatureMap(); 395 features.put(DataStore.DATASTORE_FEATURE_NAME, ds); 396 features.put(DataStore.LR_ID_FEATURE_NAME, lrId); 397 Document document = (Document) Factory.createResource( 398 "gate.corpora.DocumentImpl", 399 features); 400 compareAnnots(document, doc1); 401 402 // second document 403 storageDir = null; 404 storageDir = new URL(urlBase, "tests/gu"); 405 406 //open the data store 407 ds = Factory.openDataStore("gate.persist.SerialDataStore", 408 storageDir.toExternalForm()); 409 //get LR id 410 lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0); 411 // get the document from data store 412 features = Factory.newFeatureMap(); 413 features.put(DataStore.DATASTORE_FEATURE_NAME, ds); 414 features.put(DataStore.LR_ID_FEATURE_NAME, lrId); 415 document = (Document) Factory.createResource( 416 "gate.corpora.DocumentImpl", 417 features); 418 compareAnnots(document,doc2); 419 420 // third document 421 storageDir = null; 422 storageDir = new URL(urlBase, "tests/in"); 423 424 //open the data store 425 ds = Factory.openDataStore("gate.persist.SerialDataStore", 426 storageDir.toExternalForm()); 427 //get LR id 428 lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0); 429 // get the document from data store 430 features = Factory.newFeatureMap(); 431 features.put(DataStore.DATASTORE_FEATURE_NAME, ds); 432 features.put(DataStore.LR_ID_FEATURE_NAME, lrId); 433 document = (Document) Factory.createResource( 434 "gate.corpora.DocumentImpl", 435 features); 436 compareAnnots(document,doc3); 437 } // testAllPR() 438 439 public void compareAnnots(Document keyDocument, Document responseDocument){ 440 441 // create annotation schema 442 AnnotationSchema annotationSchema = new AnnotationSchema(); 443 String annotType = null; 444 445 // organization type 446 Iterator iteratorTypes = annotationTypes.iterator(); 447 while (iteratorTypes.hasNext()){ 448 // get the type of annotation 449 annotType = (String)iteratorTypes.next(); 450 451 annotationSchema.setAnnotationName(annotType); 452 453 // create an annotation diff 454 FeatureMap parameters = Factory.newFeatureMap(); 455 parameters.put("keyDocument",keyDocument); 456 parameters.put("responseDocument",responseDocument); 457 parameters.put("annotationSchema",annotationSchema); 458 parameters.put("keyAnnotationSetName",null); 459 parameters.put("responseAnnotationSetName",null); 460 461 // Create Annotation Diff visual resource 462 try { 463 AnnotationDiff annotDiff = (AnnotationDiff) 464 Factory.createResource("gate.annotation.AnnotationDiff",parameters); 465 466 if (DEBUG){ 467 if (annotDiff.getFMeasureAverage() != 1.0) { 468 assertTrue("missing annotations " + 469 annotDiff.getAnnotationsOfType(AnnotationDiff.MISSING_TYPE),false); 470 assertTrue("spurious annotations " + 471 annotDiff.getAnnotationsOfType(AnnotationDiff.SPURIOUS_TYPE),false); 472 assertTrue("partially-correct annotations " + 473 annotDiff.getAnnotationsOfType( 474 AnnotationDiff.PARTIALLY_CORRECT_TYPE),false); 475 } 476 }//if 477 478 assertTrue(annotType+ " precision average in "+ 479 responseDocument.getSourceUrl().getFile()+ 480 " is "+ annotDiff.getPrecisionAverage()+ " instead of 1.0 ", 481 annotDiff.getPrecisionAverage()== 1.0); 482 assertTrue(annotType+" recall average in " 483 +responseDocument.getSourceUrl().getFile()+ 484 " is " + annotDiff.getRecallAverage()+ " instead of 1.0 ", 485 annotDiff.getRecallAverage()== 1.0); 486 assertTrue(annotType+" f-measure average in " 487 +responseDocument.getSourceUrl().getFile()+ 488 " is "+ annotDiff.getFMeasureAverage()+ " instead of 1.0 ", 489 annotDiff.getFMeasureAverage()== 1.0); 490 } catch (ResourceInstantiationException rie) { 491 rie.printStackTrace(Err.getPrintWriter()); 492 } 493 494 }//while 495 }// public void compareAnnots 496 497 /** Test suite routine for the test runner */ 498 public static Test suite() { 499 return new TestSuite(TestPR.class); 500 } // suite 501 502 public static void main(String[] args) { 503 try{ 504 505 Gate.init(); 506 TestPR testPR = new TestPR(""); 507 testPR.setUp(); 508 testPR.testTokenizer(); 509 testPR.testGazetteer(); 510 testPR.testSplitter(); 511 testPR.testTagger(); 512 testPR.testTransducer(); 513 testPR.testOrthomatcher(); 514 testPR.testAllPR(); 515 testPR.tearDown(); 516 } catch(Exception e) { 517 e.printStackTrace(); 518 } 519 } // main 520 } // class TestPR 521
|
TestPR |
|