1   /*
2    *  Copyright (c) 1998-2001, The University of Sheffield.
3    *
4    *  This file is part of GATE (see http://gate.ac.uk/), and is free
5    *  software, licenced under the GNU Library General Public License,
6    *  Version 2, June 1991 (in the distribution as file licence.html,
7    *  and also available at http://gate.ac.uk/gate/licence.html).
8    *
9    *  Valentin Tablan 28/01/2003
10   *
11   *  $Id: AnnotationDiffer.java,v 1.4 2003/01/30 15:58:50 valyt Exp $
12   *
13   */
14  package gate.util;
15  
16  import java.util.*;
17  import gate.*;
18  public class AnnotationDiffer {
19    /**
20     * Computes a diff between two collections of annotations.
21     * @param key
22     * @param response
23     */
24    public void calculateDiff(Collection key, Collection response){
25      //initialise data structures
26      keyList = new ArrayList(key);
27      responseList = new ArrayList(response);
28  
29      keyChoices = new ArrayList(keyList.size());
30      keyChoices.addAll(Collections.nCopies(keyList.size(), null));
31      responseChoices = new ArrayList(responseList.size());
32      responseChoices.addAll(Collections.nCopies(responseList.size(), null));
33  
34      possibleChoices = new ArrayList();
35  
36      //1) try all possible pairings
37      for(int i = 0; i < keyList.size(); i++){
38        for(int j =0; j < responseList.size(); j++){
39          Annotation keyAnn = (Annotation)keyList.get(i);
40          Annotation resAnn = (Annotation)responseList.get(j);
41          Choice choice = null;
42          if(significantFeaturesSet == null){
43            //full comaptibility required
44            if(keyAnn.isCompatible(resAnn)){
45              choice = new Choice(i, j, CORRECT);
46            }else if(keyAnn.isPartiallyCompatible(resAnn)){
47              choice = new Choice(i, j, PARTIALLY_CORRECT);
48            }
49          }else{
50            //compatibility tests restricted to a set of features
51            if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
52              choice = new Choice(i, j, CORRECT);
53            }else if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
54              choice = new Choice(i, j, PARTIALLY_CORRECT);
55            }
56          }
57          //add the new choice if any
58          if (choice != null) {
59            addChoice(choice, i, keyChoices);
60            addChoice(choice, j, responseChoices);
61            possibleChoices.add(choice);
62          }
63        }//for j
64      }//for i
65  
66      //2) from all possible pairings, find the maximal set that also
67      //maximises the total score
68      Collections.sort(possibleChoices);
69      Collections.reverse(possibleChoices);
70      finalChoices = new ArrayList();
71      correctMatches = 0;
72      partiallyCorrectMatches = 0;
73  
74      while(!possibleChoices.isEmpty()){
75        Choice bestChoice = (Choice)possibleChoices.remove(0);
76        bestChoice.consume();
77        finalChoices.add(bestChoice);
78        switch(bestChoice.type){
79          case CORRECT:{
80            correctMatches++;
81            break;
82          }
83          case PARTIALLY_CORRECT:{
84            partiallyCorrectMatches++;
85            break;
86          }
87        }
88      }
89    }
90  
91    public double getPrecisionStrict(){
92      return (double)correctMatches / responseList.size();
93    }
94  
95    public double getRecallStrict(){
96      return (double)correctMatches / keyList.size();
97    }
98  
99    public double getPrecisionLenient(){
100     return (double)(correctMatches + partiallyCorrectMatches) / responseList.size();
101   }
102 
103   public double getRecallLenient(){
104     return (double)(correctMatches + partiallyCorrectMatches) / keyList.size();
105   }
106 
107   public double getFMeasureStrict(double beta){
108     double precision = getPrecisionStrict();
109     double recall = getRecallStrict();
110     double betaSq = beta * beta;
111     return ((betaSq + 1) * precision * recall ) /
112            (betaSq * precision + recall);
113   }
114 
115   public double getFMeasureLenient(double beta){
116     double precision = getPrecisionLenient();
117     double recall = getRecallLenient();
118     double betaSq = beta * beta;
119     return ((betaSq + 1) * precision * recall ) /
120            (betaSq * precision + recall);
121   }
122 
123   public int getFalsePositivesStrict(){
124     return responseList.size() - correctMatches;
125   }
126 
127   public int getFalsePositivesLenient(){
128     return responseList.size() - correctMatches - partiallyCorrectMatches;
129   }
130 
131   public void printMissmatches(){
132     //get the partial correct matches
133     Iterator iter = finalChoices.iterator();
134     while(iter.hasNext()){
135       Choice aChoice = (Choice)iter.next();
136       switch(aChoice.type){
137         case PARTIALLY_CORRECT:{
138           System.out.println("Missmatch (partially correct):");
139           System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString());
140           System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString());
141           break;
142         }
143       }
144     }
145 
146     //get the unmatched keys
147     for(int i = 0; i < keyChoices.size(); i++){
148       List aList = (List)keyChoices.get(i);
149       if(aList == null || aList.isEmpty()){
150         System.out.println("Unmatched Key: " + keyList.get(i).toString());
151       }
152     }
153 
154     //get the unmatched responses
155     for(int i = 0; i < responseChoices.size(); i++){
156       List aList = (List)responseChoices.get(i);
157       if(aList == null || aList.isEmpty()){
158         System.out.println("Unmatched Key: " + responseList.get(i).toString());
159       }
160     }
161 
162   }
163   /**
164    * Performs some basic checks over the internal data structures from the last
165    * run.
166    * @throws Exception
167    */
168   void sanityCheck()throws Exception{
169     //all keys and responses should have at most one choice left
170     Iterator iter =keyChoices.iterator();
171     while(iter.hasNext()){
172       List choices = (List)iter.next();
173       if(choices != null){
174         if(choices.size() > 1){
175           throw new Exception("Multiple choices found!");
176         }else if(!choices.isEmpty()){
177           //size must be 1
178           Choice aChoice = (Choice)choices.get(0);
179           //the SAME choice should be found for the associated response
180           List otherChoices = (List)responseChoices.get(aChoice.responseIndex);
181           if(otherChoices == null ||
182              otherChoices.size() != 1 ||
183              otherChoices.get(0) != aChoice){
184             throw new Exception("Reciprocity error!");
185           }
186         }
187       }
188     }
189 
190     iter =responseChoices.iterator();
191     while(iter.hasNext()){
192       List choices = (List)iter.next();
193       if(choices != null){
194         if(choices.size() > 1){
195           throw new Exception("Multiple choices found!");
196         }else if(!choices.isEmpty()){
197           //size must be 1
198           Choice aChoice = (Choice)choices.get(0);
199           //the SAME choice should be found for the associated response
200           List otherChoices = (List)keyChoices.get(aChoice.keyIndex);
201           if(otherChoices == null){
202             throw new Exception("Reciprocity error : null!");
203           }else if(otherChoices.size() != 1){
204             throw new Exception("Reciprocity error: not 1!");
205           }else if(otherChoices.get(0) != aChoice){
206             throw new Exception("Reciprocity error: different!");
207           }
208         }
209       }
210     }
211   }
212   /**
213    *
214    * @param choice the choice to be added
215    * @param index the index in the list of choices
216    * @param list the list of choices where the choice should be added
217    */
218   protected void addChoice(Choice choice, int index, List listOfChoices){
219     List existingChoices = (List)listOfChoices.get(index);
220     if(existingChoices == null){
221       existingChoices = new ArrayList();
222       listOfChoices.set(index, existingChoices);
223     }
224     existingChoices.add(choice);
225   }
226 
227   public java.util.Set getSignificantFeaturesSet() {
228     return significantFeaturesSet;
229   }
230 
231   public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) {
232     this.significantFeaturesSet = significantFeaturesSet;
233   }
234 
235   /**
236    * Represents a pairing of a key annotation with a response annotation and
237    * the associated score for that pairing.
238    */
239   class Choice implements Comparable{
240     Choice(int keyIndex, int responseIndex, int type) {
241       this.keyIndex = keyIndex;
242       this.responseIndex = responseIndex;
243       this.type = type;
244       scoreCalculated = false;
245     }
246 
247     int getScore(){
248       if(scoreCalculated) return score;
249       else{
250         calculateScore();
251         return score;
252       }
253     }
254 
255     /**
256      * Removes all mutually exclusive OTHER choices possible from
257      * the data structures.
258      * <tt>this</tt> gets removed from {@link #possibleChoices} as well.
259      */
260     public void consume(){
261       possibleChoices.remove(this);
262       List sameKeyChoices = (List)keyChoices.get(keyIndex);
263       sameKeyChoices.remove(this);
264       possibleChoices.removeAll(sameKeyChoices);
265 
266       List sameResponseChoices = (List)responseChoices.get(responseIndex);
267       sameResponseChoices.remove(this);
268       possibleChoices.removeAll(sameResponseChoices);
269 
270       Iterator iter = new ArrayList(sameKeyChoices).iterator();
271       while(iter.hasNext()){
272         ((Choice)iter.next()).remove();
273       }
274       iter = new ArrayList(sameResponseChoices).iterator();
275       while(iter.hasNext()){
276         ((Choice)iter.next()).remove();
277       }
278       sameKeyChoices.add(this);
279       sameResponseChoices.add(this);
280     }
281 
282     /**
283      * Removes this choice from the two lists it belongs to
284      */
285     protected void remove(){
286       List fromKey = (List)keyChoices.get(keyIndex);
287       fromKey.remove(this);
288       List fromResponse = (List)responseChoices.get(responseIndex);
289       fromResponse.remove(this);
290     }
291     /**
292      * Compares two choices:
293      * the better score is preferred;
294      * for the same score the better type is preferred (exact matches are
295      * preffered to partial ones).
296      * @param other
297      * @return
298      */
299     public int compareTo(Object other){
300       int res = getScore() - ((Choice)other).getScore();
301       if(res == 0) res = type - ((Choice)other).type;
302       return res;
303     }
304 
305     /**
306      * Calculates the score for this choice as:
307      * type - sum of all the types of all OTHER mutually exclusive choices
308      */
309     void calculateScore(){
310       //this needs to be a set so we don't count conflicts twice
311       Set conflictSet = new HashSet();
312       //add all the choices from the same response annotation
313       conflictSet.addAll((List)responseChoices.get(responseIndex));
314       //add all the choices from the same key annotation
315       conflictSet.addAll((List)keyChoices.get(keyIndex));
316       //remove this choice from the conflict set
317       conflictSet.remove(this);
318       score = type;
319       Iterator conflictIter = conflictSet.iterator();
320       while(conflictIter.hasNext()) score -= ((Choice)conflictIter.next()).type;
321       scoreCalculated = true;
322     }
323 
324     int keyIndex;
325     int responseIndex;
326     int type;
327     int score;
328     boolean scoreCalculated;
329   }
330 
331   public static final int CORRECT = 2;
332   public static final int PARTIALLY_CORRECT = 1;
333   public static final int DIFFERENT = 0;
334 
335   private java.util.Set significantFeaturesSet;
336 
337   protected int correctMatches;
338   protected int partiallyCorrectMatches;
339 
340   /**
341    * A list with all the key annotations
342    */
343   protected List keyList;
344 
345   /**
346    * A list with all the response annotations
347    */
348   protected List responseList;
349 
350   /**
351    * A list of lists representing all possible choices for each key
352    */
353   protected List keyChoices;
354 
355   /**
356    * A list of lists representing all possible choices for each response
357    */
358   protected List responseChoices;
359 
360   /**
361    * All the posible choices are added to this list for easy iteration.
362    */
363   protected List possibleChoices;
364 
365   /**
366    * A list with the choices selected for the best result.
367    */
368   protected List finalChoices;
369 
370 }