1
14 package gate.util;
15
16 import java.util.*;
17
18 import gate.Annotation;
19
20 public class AnnotationDiffer {
21
22
23 public static interface Pairing{
24 public Annotation getKey();
25 public Annotation getResponse();
26 public int getType();
27 }
28
29
36 public List calculateDiff(Collection key, Collection response){
37 keyList = new ArrayList(key);
39 responseList = new ArrayList(response);
40
41 keyChoices = new ArrayList(keyList.size());
42 keyChoices.addAll(Collections.nCopies(keyList.size(), null));
43 responseChoices = new ArrayList(responseList.size());
44 responseChoices.addAll(Collections.nCopies(responseList.size(), null));
45
46 possibleChoices = new ArrayList();
47
48 for(int i = 0; i < keyList.size(); i++){
50 for(int j =0; j < responseList.size(); j++){
51 Annotation keyAnn = (Annotation)keyList.get(i);
52 Annotation resAnn = (Annotation)responseList.get(j);
53 PairingImpl choice = null;
54 if(keyAnn.coextensive(resAnn)){
55 if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
57 choice = new PairingImpl(i, j, CORRECT);
59 }else{
60 choice = new PairingImpl(i, j, WRONG);
63 }
64 }else if(keyAnn.overlaps(resAnn)){
65 if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
67 choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
68 }else{
69 choice = new PairingImpl(i, j, WRONG);
70 }
71 }
72
73
91 if (choice != null) {
93 addPairing(choice, i, keyChoices);
94 addPairing(choice, j, responseChoices);
95 possibleChoices.add(choice);
96 }
97 } }
100 Collections.sort(possibleChoices, new PairingScoreComparator());
103 Collections.reverse(possibleChoices);
104 finalChoices = new ArrayList();
105 correctMatches = 0;
106 partiallyCorrectMatches = 0;
107 missing = 0;
108 spurious = 0;
109
110 while(!possibleChoices.isEmpty()){
111 PairingImpl bestChoice = (PairingImpl)possibleChoices.remove(0);
112 bestChoice.consume();
113 finalChoices.add(bestChoice);
114 switch(bestChoice.type){
115 case CORRECT:{
116 if(correctAnnotations == null) correctAnnotations = new HashSet();
117 correctAnnotations.add(bestChoice.getResponse());
118 correctMatches++;
119 break;
120 }
121 case PARTIALLY_CORRECT:{
122 if(partiallyCorrectAnnotations == null) partiallyCorrectAnnotations = new HashSet();
123 partiallyCorrectAnnotations.add(bestChoice.getResponse());
124 partiallyCorrectMatches++;
125 break;
126 }
127 case WRONG:{
128 if(bestChoice.getKey() != null){
129 if(missingAnnotations == null) missingAnnotations = new HashSet();
131 missingAnnotations.add(bestChoice.getKey());
132 missing ++;
133 }
134 if(bestChoice.getResponse() != null){
135 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
137 spuriousAnnotations.add(bestChoice.getResponse());
138 spurious ++;
139 }
140 break;
141 }
142 default:{
143 throw new GateRuntimeException("Invalid pairing type: " +
144 bestChoice.type);
145 }
146 }
147 }
148 for(int i = 0; i < keyChoices.size(); i++){
151 List aList = (List)keyChoices.get(i);
152 if(aList == null || aList.isEmpty()){
153 if(missingAnnotations == null) missingAnnotations = new HashSet();
154 missingAnnotations.add((Annotation)(keyList.get(i)));
155 finalChoices.add(new PairingImpl(i, -1, WRONG));
156 missing ++;
157 }
158 }
159
160 for(int i = 0; i < responseChoices.size(); i++){
162 List aList = (List)responseChoices.get(i);
163 if(aList == null || aList.isEmpty()){
164 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
165 spuriousAnnotations.add((Annotation)(responseList.get(i)));
166 finalChoices.add(new PairingImpl(-1, i, WRONG));
167 spurious ++;
168 }
169 }
170
171 return finalChoices;
172 }
173
174 public double getPrecisionStrict(){
175 return (double)((double)correctMatches / (double)responseList.size());
176 }
177
178 public double getRecallStrict(){
179 return (double)((double)correctMatches / (double)keyList.size());
180 }
181
182 public double getPrecisionLenient(){
183 return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)responseList.size());
184 }
185
186 public double getPrecisionAverage() {
187 return (double)((double)(getPrecisionLenient() + getPrecisionStrict()) / (double)(2.0));
188 }
189
190 public double getRecallLenient(){
191 return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)keyList.size());
192 }
193
194 public double getRecallAverage() {
195 return (double)((double)(getRecallLenient() + getRecallStrict()) / (double)(2.0));
196 }
197
198 public double getFMeasureStrict(double beta){
199 double precision = getPrecisionStrict();
200 double recall = getRecallStrict();
201 double betaSq = beta * beta;
202 return (double)(((betaSq + 1) * precision * recall ) /
203 (double)(betaSq * precision + recall));
204 }
205
206 public double getFMeasureLenient(double beta){
207 double precision = getPrecisionLenient();
208 double recall = getRecallLenient();
209 double betaSq = beta * beta;
210 return (double)(((betaSq + 1) * precision * recall ) /
211 (double)(betaSq * precision + recall));
212 }
213
214 public double getFMeasureAverage(double beta) {
215 return (double)(((double)(getFMeasureLenient(beta) + getFMeasureStrict(beta)) / (double)(2.0)));
216 }
217
218 public int getCorrectMatches(){
219 return correctMatches;
220 }
221
222 public int getPartiallyCorrectMatches(){
223 return partiallyCorrectMatches;
224 }
225
226 public int getMissing(){
227 return missing;
228 }
229
230 public int getSpurious(){
231 return spurious;
232 }
233
234 public int getFalsePositivesStrict(){
235 return responseList.size() - correctMatches;
236 }
237
238 public int getFalsePositivesLenient(){
239 return responseList.size() - correctMatches - partiallyCorrectMatches;
240 }
241
242 public void printMissmatches(){
243 Iterator iter = finalChoices.iterator();
245 while(iter.hasNext()){
246 PairingImpl aChoice = (PairingImpl)iter.next();
247 switch(aChoice.type){
248 case PARTIALLY_CORRECT:{
249 System.out.println("Missmatch (partially correct):");
250 System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString());
251 System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString());
252 break;
253 }
254 }
255 }
256
257 for(int i = 0; i < keyChoices.size(); i++){
259 List aList = (List)keyChoices.get(i);
260 if(aList == null || aList.isEmpty()){
261 System.out.println("Missed Key: " + keyList.get(i).toString());
262 }
263 }
264
265 for(int i = 0; i < responseChoices.size(); i++){
267 List aList = (List)responseChoices.get(i);
268 if(aList == null || aList.isEmpty()){
269 System.out.println("Spurious Response: " + responseList.get(i).toString());
270 }
271 }
272 }
273
274
275
276
281 void sanityCheck()throws Exception{
282 Iterator iter =keyChoices.iterator();
284 while(iter.hasNext()){
285 List choices = (List)iter.next();
286 if(choices != null){
287 if(choices.size() > 1){
288 throw new Exception("Multiple choices found!");
289 }else if(!choices.isEmpty()){
290 PairingImpl aChoice = (PairingImpl)choices.get(0);
292 List otherChoices = (List)responseChoices.get(aChoice.responseIndex);
294 if(otherChoices == null ||
295 otherChoices.size() != 1 ||
296 otherChoices.get(0) != aChoice){
297 throw new Exception("Reciprocity error!");
298 }
299 }
300 }
301 }
302
303 iter =responseChoices.iterator();
304 while(iter.hasNext()){
305 List choices = (List)iter.next();
306 if(choices != null){
307 if(choices.size() > 1){
308 throw new Exception("Multiple choices found!");
309 }else if(!choices.isEmpty()){
310 PairingImpl aChoice = (PairingImpl)choices.get(0);
312 List otherChoices = (List)keyChoices.get(aChoice.keyIndex);
314 if(otherChoices == null){
315 throw new Exception("Reciprocity error : null!");
316 }else if(otherChoices.size() != 1){
317 throw new Exception("Reciprocity error: not 1!");
318 }else if(otherChoices.get(0) != aChoice){
319 throw new Exception("Reciprocity error: different!");
320 }
321 }
322 }
323 }
324 }
325
332 protected void addPairing(PairingImpl pairing, int index, List listOfPairings){
333 List existingChoices = (List)listOfPairings.get(index);
334 if(existingChoices == null){
335 existingChoices = new ArrayList();
336 listOfPairings.set(index, existingChoices);
337 }
338 existingChoices.add(pairing);
339 }
340
341 public java.util.Set getSignificantFeaturesSet() {
342 return significantFeaturesSet;
343 }
344
345 public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) {
346 this.significantFeaturesSet = significantFeaturesSet;
347 }
348
349
353 public class PairingImpl implements Pairing{
354 PairingImpl(int keyIndex, int responseIndex, int type) {
355 this.keyIndex = keyIndex;
356 this.responseIndex = responseIndex;
357 this.type = type;
358 scoreCalculated = false;
359 }
360
361 public int getScore(){
362 if(scoreCalculated) return score;
363 else{
364 calculateScore();
365 return score;
366 }
367 }
368
369 public Annotation getKey(){
370 return keyIndex == -1 ? null : (Annotation)keyList.get(keyIndex);
371 }
372
373 public Annotation getResponse(){
374 return responseIndex == -1 ? null :
375 (Annotation)responseList.get(responseIndex);
376 }
377
378 public int getType(){
379 return type;
380 }
381
382
387 public void consume(){
388 possibleChoices.remove(this);
389 List sameKeyChoices = (List)keyChoices.get(keyIndex);
390 sameKeyChoices.remove(this);
391 possibleChoices.removeAll(sameKeyChoices);
392
393 List sameResponseChoices = (List)responseChoices.get(responseIndex);
394 sameResponseChoices.remove(this);
395 possibleChoices.removeAll(sameResponseChoices);
396
397 Iterator iter = new ArrayList(sameKeyChoices).iterator();
398 while(iter.hasNext()){
399 ((PairingImpl)iter.next()).remove();
400 }
401 iter = new ArrayList(sameResponseChoices).iterator();
402 while(iter.hasNext()){
403 ((PairingImpl)iter.next()).remove();
404 }
405 sameKeyChoices.add(this);
406 sameResponseChoices.add(this);
407 }
408
409
412 protected void remove(){
413 List fromKey = (List)keyChoices.get(keyIndex);
414 fromKey.remove(this);
415 List fromResponse = (List)responseChoices.get(responseIndex);
416 fromResponse.remove(this);
417 }
418
419
423 void calculateScore(){
424 Set conflictSet = new HashSet();
426 conflictSet.addAll((List)responseChoices.get(responseIndex));
428 conflictSet.addAll((List)keyChoices.get(keyIndex));
430 conflictSet.remove(this);
432 score = type;
433 Iterator conflictIter = conflictSet.iterator();
434 while(conflictIter.hasNext()) score -= ((PairingImpl)conflictIter.next()).type;
435 scoreCalculated = true;
436 }
437
438 int keyIndex;
439 int responseIndex;
440 int type;
441 int score;
442 boolean scoreCalculated;
443 }
444
445 protected static class PairingScoreComparator implements Comparator{
446
454
455 public int compare(Object o1, Object o2){
456 PairingImpl first = (PairingImpl)o1;
457 PairingImpl second = (PairingImpl)o2;
458 int res = first.getScore() - second.getScore();
460 if(res == 0) res = first.getType() - second.getType();
462 if(res == 0){
465 res = (first.getKey() == null ? 0 : 1) +
466 (first.getResponse() == null ? 0 : 1) +
467 (second.getKey() == null ? 0 : -1) +
468 (second.getResponse() == null ? 0 : -1);
469 }
470 return res;
471 }
472 }
473
474
475 public static class PairingOffsetComparator implements Comparator{
476
480 public int compare(Object o1, Object o2){
481 Pairing first = (Pairing)o1;
482 Pairing second = (Pairing)o2;
483 Annotation key1 = first.getKey();
484 Annotation key2 = second.getKey();
485 Annotation res1 = first.getResponse();
486 Annotation res2 = second.getResponse();
487 Long start1 = key1 == null ? null : key1.getStartNode().getOffset();
488 if(start1 == null) start1 = res1.getStartNode().getOffset();
489 Long start2 = key2 == null ? null : key2.getStartNode().getOffset();
490 if(start2 == null) start2 = res2.getStartNode().getOffset();
491 int res = start1.compareTo(start2);
492 if(res == 0){
493 res = second.getType() - first.getType();
495 }
496
497 return res;
533 }
534
535 }
536
537
542 public Set getAnnotationsOfType(int type) {
543 switch(type) {
544 case CORRECT_TYPE:
545 return (correctAnnotations == null)? new HashSet() : correctAnnotations;
546 case PARTIALLY_CORRECT_TYPE:
547 return (partiallyCorrectAnnotations == null) ? new HashSet() : partiallyCorrectAnnotations;
548 case SPURIOUS_TYPE:
549 return (spuriousAnnotations == null) ? new HashSet() : spuriousAnnotations;
550 case MISSING_TYPE:
551 return (missingAnnotations == null) ? new HashSet() : missingAnnotations;
552 default:
553 return new HashSet();
554 }
555 }
556
557
558 public HashSet correctAnnotations, partiallyCorrectAnnotations, missingAnnotations, spuriousAnnotations;
559
560
561
562 public static final int CORRECT_TYPE = 1;
563
565 public static final int PARTIALLY_CORRECT_TYPE = 2;
566
568 public static final int SPURIOUS_TYPE = 3;
569
571 public static final int MISSING_TYPE = 4;
572
573
574 public static final int CORRECT = 2;
575 public static final int PARTIALLY_CORRECT = 1;
576 public static final int WRONG = 0;
577
578 private java.util.Set significantFeaturesSet;
579
580 protected int correctMatches;
581 protected int partiallyCorrectMatches;
582 protected int missing;
583 protected int spurious;
584
585
588 protected List keyList;
589
590
593 protected List responseList;
594
595
598 protected List keyChoices;
599
600
603 protected List responseChoices;
604
605
608 protected List possibleChoices;
609
610
613 protected List finalChoices;
614
615 }