|
TestPR |
|
1 /* 2 * TestPR.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Oana Hamza, 12 * 13 * $Id: TestPR.java,v 1.42 2003/01/30 18:35:21 valyt Exp $ 14 */ 15 16 package gate.creole; 17 18 import java.util.*; 19 import java.io.*; 20 import java.net.*; 21 import junit.framework.*; 22 import gnu.regexp.*; 23 24 import gate.*; 25 import gate.util.*; 26 import gate.corpora.TestDocument; 27 import gate.creole.tokeniser.*; 28 import gate.creole.gazetteer.*; 29 import gate.creole.splitter.*; 30 import gate.creole.orthomatcher.*; 31 import gate.persist.*; 32 import gate.creole.ANNIEConstants; 33 34 /** Test the PRs on three documents */ 35 public class TestPR extends TestCase 36 { 37 /** Debug flag */ 38 private static final boolean DEBUG = false; 39 40 protected static Document doc1; 41 protected static Document doc2; 42 protected static Document doc3; 43 44 protected static List annotationTypes = new ArrayList(10); 45 46 static{ 47 annotationTypes.add(ANNIEConstants.SENTENCE_ANNOTATION_TYPE); 48 annotationTypes.add(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE); 49 annotationTypes.add(ANNIEConstants.LOCATION_ANNOTATION_TYPE); 50 annotationTypes.add(ANNIEConstants.PERSON_ANNOTATION_TYPE); 51 annotationTypes.add(ANNIEConstants.DATE_ANNOTATION_TYPE); 52 annotationTypes.add(ANNIEConstants.MONEY_ANNOTATION_TYPE); 53 annotationTypes.add(ANNIEConstants.LOOKUP_ANNOTATION_TYPE); 54 annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE); 55 try{ 56 //get 3 documents 57 if (doc1 == null) 58 doc1 = Factory.newDocument( 59 new URL(TestDocument.getTestServerName() + 60 "tests/ft-bt-03-aug-2001.html"), 61 "windows-1252" 62 ); 63 64 if (doc2 == null) 65 doc2 = Factory.newDocument( 66 new URL(TestDocument.getTestServerName() + 67 "tests/gu-Am-Brit-4-aug-2001.html"), 68 "windows-1252" 69 ); 70 71 if (doc3 == null) 72 doc3 = Factory.newDocument( 73 new URL(TestDocument.getTestServerName() + 74 "tests/in-outlook-09-aug-2001.html"), 75 "windows-1252" 76 ); 77 }catch(Exception e){ 78 e.printStackTrace(); 79 } 80 } 81 82 /** Construction */ 83 public TestPR(String name) { super(name); } 84 85 /** Fixture set up */ 86 public void setUp() throws Exception { 87 } // setUp 88 89 /** Put things back as they should be after running tests. 90 */ 91 public void tearDown() throws Exception { 92 } // tearDown 93 94 public void testTokenizer() throws Exception { 95 FeatureMap params = Factory.newFeatureMap(); 96 DefaultTokeniser tokeniser = (DefaultTokeniser) Factory.createResource( 97 "gate.creole.tokeniser.DefaultTokeniser", params); 98 99 100 //run the tokeniser for doc1 101 tokeniser.setDocument(doc1); 102 tokeniser.execute(); 103 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 104 doc1.getAnnotations().size() + 105 " Token annotations, instead of the expected 1284.", 106 doc1.getAnnotations().size()== 1284); 107 108 109 //run the tokeniser for doc2 110 tokeniser.setDocument(doc2); 111 tokeniser.execute(); 112 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 113 doc2.getAnnotations().size() + 114 " Token annotations, instead of the expected 2138.", 115 doc2.getAnnotations().size()== 2138); 116 117 118 //run the tokeniser for doc3 119 tokeniser.setDocument(doc3); 120 tokeniser.execute(); 121 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 122 doc3.getAnnotations().size() + 123 " Token annotations, instead of the expected 2806.", 124 doc3.getAnnotations().size()== 2806); 125 126 Factory.deleteResource(tokeniser); 127 }// testTokenizer 128 129 public void testGazetteer() throws Exception { 130 FeatureMap params = Factory.newFeatureMap(); 131 DefaultGazetteer gaz = (DefaultGazetteer) Factory.createResource( 132 "gate.creole.gazetteer.DefaultGazetteer", params); 133 134 //run gazetteer for doc1 135 gaz.setDocument(doc1); 136 gaz.execute(); 137 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 138 doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() + 139 " Lookup annotations, instead of the expected 63.", 140 doc1.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 63); 141 142 //run gazetteer for doc2 143 gaz.setDocument(doc2); 144 gaz.execute(); 145 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 146 doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() + 147 " Lookup annotations, instead of the expected 109.", 148 doc2.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 109); 149 150 //run gazetteer for doc3 151 gaz.setDocument(doc3); 152 gaz.execute(); 153 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 154 doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size() + 155 " Lookup annotations, instead of the expected 136.", 156 doc3.getAnnotations().get(ANNIEConstants.LOOKUP_ANNOTATION_TYPE).size()== 136); 157 Factory.deleteResource(gaz); 158 }//testGazetteer 159 160 public void testSplitter() throws Exception { 161 FeatureMap params = Factory.newFeatureMap(); 162 SentenceSplitter splitter = (SentenceSplitter) Factory.createResource( 163 "gate.creole.splitter.SentenceSplitter", params); 164 165 //run splitter for doc1 166 splitter.setDocument(doc1); 167 splitter.execute(); 168 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 169 doc1.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() + 170 " Sentence annotations, instead of the expected 22.", 171 doc1.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 22); 172 173 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 174 doc1.getAnnotations().get("Split").size() + 175 " Split annotations, instead of the expected 36.", 176 doc1.getAnnotations().get("Split").size()== 36); 177 178 179 //run splitter for doc2 180 splitter.setDocument(doc2); 181 splitter.execute(); 182 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 183 doc2.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() + 184 " Sentence annotations, instead of the expected 52.", 185 doc2.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 52); 186 187 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+ 188 doc2.getAnnotations().get("Split").size() + 189 " Split annotations, instead of the expected 71.", 190 doc2.getAnnotations().get("Split").size()== 71); 191 192 //run splitter for doc3 193 splitter.setDocument(doc3); 194 splitter.execute(); 195 196 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 197 doc3.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size() + 198 " Sentence annotations, instead of the expected 66.", 199 doc3.getAnnotations().get(ANNIEConstants.SENTENCE_ANNOTATION_TYPE).size()== 66); 200 201 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ 202 doc3.getAnnotations().get("Split").size() + 203 " Split annotations, instead of the expected 84.", 204 doc3.getAnnotations().get("Split").size()== 84); 205 Factory.deleteResource(splitter); 206 }//testSplitter 207 208 public void testTagger() throws Exception { 209 FeatureMap params = Factory.newFeatureMap(); 210 POSTagger tagger = (POSTagger) Factory.createResource( 211 "gate.creole.POSTagger", params); 212 213 214 //run the tagger for doc1 215 tagger.setDocument(doc1); 216 tagger.execute(); 217 218 HashSet fType = new HashSet(); 219 fType.add(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME); 220 AnnotationSet annots = 221 doc1.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType); 222 223 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ annots.size() + 224 " Token annotations with category feature, instead of the expected 675.", 225 annots.size() == 675); 226 227 //run the tagger for doc2 228 tagger.setDocument(doc2); 229 tagger.execute(); 230 annots = doc2.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType); 231 assertTrue("Found in "+ doc2.getSourceUrl().getFile()+ " "+annots.size() + 232 " Token annotations with category feature, instead of the expected 1131.", 233 annots.size() == 1131); 234 235 //run the tagger for doc3 236 tagger.setDocument(doc3); 237 tagger.execute(); 238 annots = doc3.getAnnotations().get(ANNIEConstants.TOKEN_ANNOTATION_TYPE, fType); 239 assertTrue("Found in "+ doc3.getSourceUrl().getFile()+ " "+ annots.size() + 240 " Token annotations with category feature, instead of the expected 1443.", 241 annots.size() == 1443); 242 Factory.deleteResource(tagger); 243 }//testTagger() 244 245 public void testTransducer() throws Exception { 246 FeatureMap params = Factory.newFeatureMap(); 247 ANNIETransducer transducer = (ANNIETransducer) Factory.createResource( 248 "gate.creole.ANNIETransducer", params); 249 250 //run the transducer for doc1 251 transducer.setDocument(doc1); 252 transducer.execute(); 253 assertTrue("Found in "+ doc1.getSourceUrl().getFile()+ " "+ 254 doc1.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() + 255 " Organization annotations, instead of the expected 17", 256 doc1.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 17); 257 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 258 doc1.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() + 259 " Location annotations, instead of the expected 3", 260 doc1.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 3); 261 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 262 doc1.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() + 263 " Person annotations, instead of the expected 3", 264 doc1.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 3); 265 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 266 doc1.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() + 267 " Date annotations, instead of the expected 6", 268 doc1.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 6); 269 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ 270 doc1.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() + 271 " Money annotations, instead of the expected 1", 272 doc1.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 1); 273 274 //run the transducer for doc2 275 transducer.setDocument(doc2); 276 transducer.execute(); 277 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 278 doc2.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() + 279 " Organization annotations, instead of the expected 18", 280 doc2.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 18); 281 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 282 doc2.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() + 283 " Location annotations, instead of the expected 9", 284 doc2.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 9); 285 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 286 doc2.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() + 287 " Person annotations, instead of the expected 1", 288 doc2.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 1); 289 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 290 doc2.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() + 291 " Date annotations, instead of the expected 6", 292 doc2.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 6); 293 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ 294 doc2.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() + 295 " Money annotations, instead of the expected 3", 296 doc2.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 3); 297 298 //run the transducer for doc3 299 transducer.setDocument(doc3); 300 transducer.execute(); 301 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 302 doc3.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size() + 303 " Organization annotations, instead of the expected 9", 304 doc3.getAnnotations().get(ANNIEConstants.ORGANIZATION_ANNOTATION_TYPE).size()== 9); 305 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 306 doc3.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size() + 307 " Location annotations, instead of the expected 12", 308 doc3.getAnnotations().get(ANNIEConstants.LOCATION_ANNOTATION_TYPE).size()== 12); 309 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 310 doc3.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size() + 311 " Person annotations, instead of the expected 8", 312 doc3.getAnnotations().get(ANNIEConstants.PERSON_ANNOTATION_TYPE).size()== 8); 313 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 314 doc3.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size() + 315 " Date annotations, instead of the expected 7", 316 doc3.getAnnotations().get(ANNIEConstants.DATE_ANNOTATION_TYPE).size()== 7); 317 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ 318 doc3.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size() + 319 " Money annotations, instead of the expected 4", 320 doc3.getAnnotations().get(ANNIEConstants.MONEY_ANNOTATION_TYPE).size()== 4); 321 322 Factory.deleteResource(transducer); 323 }//testTransducer 324 325 public void testOrthomatcher() throws Exception { 326 FeatureMap params = Factory.newFeatureMap(); 327 328 OrthoMatcher orthomatcher = (OrthoMatcher) Factory.createResource( 329 "gate.creole.orthomatcher.OrthoMatcher", params); 330 331 332 // run the orthomatcher for doc1 333 orthomatcher.setDocument(doc1); 334 orthomatcher.execute(); 335 336 HashSet fType = new HashSet(); 337 fType.add(ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME); 338 AnnotationSet annots = 339 doc1.getAnnotations().get(null,fType); 340 341 assertTrue("Found in "+doc1.getSourceUrl().getFile()+ " "+ annots.size() + 342 " annotations with matches feature, instead of the expected 30.", 343 annots.size() == 30); 344 345 //run the orthomatcher for doc2 346 orthomatcher.setDocument(doc2); 347 orthomatcher.execute(); 348 annots = doc2.getAnnotations().get(null,fType); 349 assertTrue("Found in "+doc2.getSourceUrl().getFile()+ " "+ annots.size() + 350 " annotations with matches feature, instead of the expected 35.", 351 annots.size() == 33); 352 353 //run the orthomatcher for doc3 354 orthomatcher.setDocument(doc3); 355 orthomatcher.execute(); 356 357 annots = doc3.getAnnotations().get(null,fType); 358 assertTrue("Found in "+doc3.getSourceUrl().getFile()+ " "+ annots.size() + 359 " annotations with matches feature, instead of the expected 24.", 360 annots.size() == 24); 361 Factory.deleteResource(orthomatcher); 362 }//testOrthomatcher 363 364 /** A test for comparing the annotation sets*/ 365 public void testAllPR() throws Exception { 366 367 // verify if the saved data store is the same with the just processed file 368 // first document 369 String urlBaseName = Gate.locateGateFiles(); 370 // RE re1 = new RE("build/gate.jar!"); 371 // RE re2 = new RE("jar:"); 372 // urlBaseName = re1.substituteAll( urlBaseName,"classes"); 373 // urlBaseName = re2.substituteAll( urlBaseName,""); 374 375 if (urlBaseName.endsWith("/gate/build/gate.jar!/")) { 376 StringBuffer buff = new StringBuffer( 377 urlBaseName.substring( 378 0, 379 urlBaseName.lastIndexOf("build/gate.jar!/")) 380 ); 381 buff.append("classes/"); 382 buff.delete(0, "jar:file:".length()); 383 buff.insert(0, "file://"); 384 urlBaseName = buff.toString(); 385 } 386 387 URL urlBase = new URL(urlBaseName + "gate/resources/gate.ac.uk/"); 388 389 URL storageDir = null; 390 storageDir = new URL(urlBase, "tests/ft"); 391 392 //open the data store 393 DataStore ds = Factory.openDataStore 394 ("gate.persist.SerialDataStore", 395 storageDir.toExternalForm()); 396 397 //get LR id 398 String lrId = (String)ds.getLrIds 399 ("gate.corpora.DocumentImpl").get(0); 400 401 402 // get the document from data store 403 FeatureMap features = Factory.newFeatureMap(); 404 features.put(DataStore.DATASTORE_FEATURE_NAME, ds); 405 features.put(DataStore.LR_ID_FEATURE_NAME, lrId); 406 Document document = (Document) Factory.createResource( 407 "gate.corpora.DocumentImpl", 408 features); 409 compareAnnots(document, doc1); 410 411 // second document 412 storageDir = null; 413 storageDir = new URL(urlBase, "tests/gu"); 414 415 //open the data store 416 ds = Factory.openDataStore("gate.persist.SerialDataStore", 417 storageDir.toExternalForm()); 418 //get LR id 419 lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0); 420 // get the document from data store 421 features = Factory.newFeatureMap(); 422 features.put(DataStore.DATASTORE_FEATURE_NAME, ds); 423 features.put(DataStore.LR_ID_FEATURE_NAME, lrId); 424 document = (Document) Factory.createResource( 425 "gate.corpora.DocumentImpl", 426 features); 427 compareAnnots(document,doc2); 428 429 // third document 430 storageDir = null; 431 storageDir = new URL(urlBase, "tests/in"); 432 433 //open the data store 434 ds = Factory.openDataStore("gate.persist.SerialDataStore", 435 storageDir.toExternalForm()); 436 //get LR id 437 lrId = (String)ds.getLrIds("gate.corpora.DocumentImpl").get(0); 438 // get the document from data store 439 features = Factory.newFeatureMap(); 440 features.put(DataStore.DATASTORE_FEATURE_NAME, ds); 441 features.put(DataStore.LR_ID_FEATURE_NAME, lrId); 442 document = (Document) Factory.createResource( 443 "gate.corpora.DocumentImpl", 444 features); 445 compareAnnots(document,doc3); 446 } // testAllPR() 447 448 // public void compareAnnots1(Document keyDocument, Document responseDocument) 449 // throws Exception{ 450 // // organization type 451 // Iterator iteratorTypes = annotationTypes.iterator(); 452 // while (iteratorTypes.hasNext()){ 453 // // get the type of annotation 454 // String annotType = (String)iteratorTypes.next(); 455 // // create annotation schema 456 // AnnotationSchema annotationSchema = new AnnotationSchema(); 457 // 458 // annotationSchema.setAnnotationName(annotType); 459 // 460 // // create an annotation diff 461 // AnnotationDiff annotDiff = new AnnotationDiff(); 462 // annotDiff.setKeyDocument(keyDocument); 463 // annotDiff.setResponseDocument(responseDocument); 464 // annotDiff.setAnnotationSchema(annotationSchema); 465 // annotDiff.setKeyAnnotationSetName(null); 466 // annotDiff.setResponseAnnotationSetName(null); 467 // 468 // Set significantFeatures = new HashSet(Arrays.asList( 469 // new String[]{"NMRule", "kind", "orgType", "rule", 470 // "rule1", "rule2", "locType", "gender", 471 // "majorType", "minorType", "category", 472 // "length", "orth", "string", "subkind", 473 // "symbolkind"})); 474 // annotDiff.setKeyFeatureNamesSet(significantFeatures); 475 // annotDiff.setTextMode(new Boolean(true)); 476 // 477 // annotDiff.init(); 478 // 479 // if (DEBUG){ 480 // if (annotDiff.getFMeasureAverage() != 1.0) { 481 // assertTrue("missing annotations " + 482 // annotDiff.getAnnotationsOfType(AnnotationDiff.MISSING_TYPE) 483 // + " spurious annotations " + 484 // annotDiff.getAnnotationsOfType(AnnotationDiff.SPURIOUS_TYPE) 485 // + " partially-correct annotations " + 486 // annotDiff.getAnnotationsOfType( 487 // AnnotationDiff.PARTIALLY_CORRECT_TYPE),false); 488 // } 489 // }//if 490 // 491 // assertTrue(annotType+ " precision average in "+ 492 // responseDocument.getSourceUrl().getFile()+ 493 // " is "+ annotDiff.getPrecisionAverage()+ " instead of 1.0 ", 494 // annotDiff.getPrecisionAverage()== 1.0); 495 // assertTrue(annotType+" recall average in " 496 // +responseDocument.getSourceUrl().getFile()+ 497 // " is " + annotDiff.getRecallAverage()+ " instead of 1.0 ", 498 // annotDiff.getRecallAverage()== 1.0); 499 // assertTrue(annotType+" f-measure average in " 500 // +responseDocument.getSourceUrl().getFile()+ 501 // " is "+ annotDiff.getFMeasureAverage()+ " instead of 1.0 ", 502 // annotDiff.getFMeasureAverage()== 1.0); 503 // }//while 504 // }// public void compareAnnots 505 // 506 public void compareAnnots(Document keyDocument, Document responseDocument) 507 throws Exception{ 508 // organization type 509 Iterator iteratorTypes = annotationTypes.iterator(); 510 while (iteratorTypes.hasNext()){ 511 // get the type of annotation 512 String annotType = (String)iteratorTypes.next(); 513 514 // create an annotation diff 515 AnnotationDiffer annotDiffer = new AnnotationDiffer(); 516 Set significantFeatures = new HashSet(Arrays.asList( 517 new String[]{"NMRule", "kind", "orgType", "rule", 518 "rule1", "rule2", "locType", "gender", 519 "majorType", "minorType", "category", 520 "length", "orth", "string", "subkind", 521 "symbolkind"})); 522 annotDiffer.setSignificantFeaturesSet(significantFeatures); 523 annotDiffer.calculateDiff(keyDocument.getAnnotations().get(annotType), 524 responseDocument.getAnnotations().get(annotType)); 525 if(DEBUG) annotDiffer.printMissmatches(); 526 527 assertTrue(annotType+ " precision strict in "+ 528 responseDocument.getSourceUrl().getFile()+ 529 " is "+ annotDiffer.getPrecisionStrict()+ " instead of 1.0 ", 530 annotDiffer.getPrecisionStrict()== 1.0); 531 532 assertTrue(annotType+" recall strict in " 533 +responseDocument.getSourceUrl().getFile()+ 534 " is " + annotDiffer.getRecallStrict()+ " instead of 1.0 ", 535 annotDiffer.getRecallStrict()== 1.0); 536 537 assertTrue(annotType+" f-measure strict in " 538 +responseDocument.getSourceUrl().getFile()+ 539 " is "+ annotDiffer.getFMeasureStrict(0.5)+ " instead of 1.0 ", 540 annotDiffer.getFMeasureStrict(0.5)== 1.0); 541 }//while 542 }// public void compareAnnots 543 544 545 /** Test suite routine for the test runner */ 546 public static Test suite() { 547 return new TestSuite(TestPR.class); 548 } // suite 549 550 public static void main(String[] args) { 551 try{ 552 553 Gate.init(); 554 TestPR testPR = new TestPR(""); 555 testPR.setUp(); 556 testPR.testTokenizer(); 557 testPR.testGazetteer(); 558 testPR.testSplitter(); 559 testPR.testTagger(); 560 testPR.testTransducer(); 561 testPR.testOrthomatcher(); 562 testPR.testAllPR(); 563 testPR.tearDown(); 564 } catch(Exception e) { 565 e.printStackTrace(); 566 } 567 } // main 568 } // class TestPR 569
|
TestPR |
|