1   /*
2    *  Copyright (c) 1998-2004, The University of Sheffield.
3    *
4    *  This file is part of GATE (see http://gate.ac.uk/), and is free
5    *  software, licenced under the GNU Library General Public License,
6    *  Version 2, June 1991 (in the distribution as file licence.html,
7    *  and also available at http://gate.ac.uk/gate/licence.html).
8    *
9    *  Valentin Tablan 28/01/2003
10   *
11   *  $Id: AnnotationDiffer.java,v 1.13 2004/07/21 17:10:09 akshay Exp $
12   *
13   */
14  package gate.util;
15  
16  import java.util.*;
17  
18  import gate.Annotation;
19  
20  public class AnnotationDiffer {
21  
22  
23    public static interface Pairing{
24      public Annotation getKey();
25      public Annotation getResponse();
26      public int getType();
27    }
28  
29    /**
30     * Computes a diff between two collections of annotations.
31     * @param key
32     * @param response
33     * @return a list of {@link Pairing} objects representing the pairing set
34     * that results in the best score.
35     */
36    public List calculateDiff(Collection key, Collection response){
37      //initialise data structures
38      keyList = new ArrayList(key);
39      responseList = new ArrayList(response);
40  
41      keyChoices = new ArrayList(keyList.size());
42      keyChoices.addAll(Collections.nCopies(keyList.size(), null));
43      responseChoices = new ArrayList(responseList.size());
44      responseChoices.addAll(Collections.nCopies(responseList.size(), null));
45  
46      possibleChoices = new ArrayList();
47  
48      //1) try all possible pairings
49      for(int i = 0; i < keyList.size(); i++){
50        for(int j =0; j < responseList.size(); j++){
51          Annotation keyAnn = (Annotation)keyList.get(i);
52          Annotation resAnn = (Annotation)responseList.get(j);
53          PairingImpl choice = null;
54          if(keyAnn.coextensive(resAnn)){
55            //we have full overlap -> CORRECT or WRONG
56            if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
57              //we have a full match
58              choice = new PairingImpl(i, j, CORRECT);
59            }else{
60              //the two annotations are coextensive but don't match
61              //we have a missmatch
62              choice = new PairingImpl(i, j, WRONG);
63            }
64          }else if(keyAnn.overlaps(resAnn)){
65            //we have partial overlap -> PARTIALLY_CORRECT or WRONG
66            if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
67              choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
68            }else{
69              choice = new PairingImpl(i, j, WRONG);
70            }
71          }
72          
73  //        // >>>>>>
74  //        if(significantFeaturesSet == null){
75  //          //full comaptibility required
76  //          if(keyAnn.isCompatible(resAnn)){
77  //            choice = new PairingImpl(i, j, CORRECT);
78  //          }else if(keyAnn.isPartiallyCompatible(resAnn)){
79  //            choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
80  //          }
81  //        }else{
82  //          //compatibility tests restricted to a set of features
83  //          if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
84  //            choice = new PairingImpl(i, j, CORRECT);
85  //          }else if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
86  //            choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
87  //          }
88  //        }
89  //        // <<<<<<
90          
91          //add the new choice if any
92          if (choice != null) {
93            addPairing(choice, i, keyChoices);
94            addPairing(choice, j, responseChoices);
95            possibleChoices.add(choice);
96          }
97        }//for j
98      }//for i
99  
100     //2) from all possible pairings, find the maximal set that also
101     //maximises the total score
102     Collections.sort(possibleChoices, new PairingScoreComparator());
103     Collections.reverse(possibleChoices);
104     finalChoices = new ArrayList();
105     correctMatches = 0;
106     partiallyCorrectMatches = 0;
107     missing = 0;
108     spurious = 0;
109 
110     while(!possibleChoices.isEmpty()){
111       PairingImpl bestChoice = (PairingImpl)possibleChoices.remove(0);
112       bestChoice.consume();
113       finalChoices.add(bestChoice);
114       switch(bestChoice.type){
115         case CORRECT:{
116           if(correctAnnotations == null) correctAnnotations = new HashSet();
117           correctAnnotations.add(bestChoice.getResponse());
118           correctMatches++;
119           break;
120         }
121         case PARTIALLY_CORRECT:{
122           if(partiallyCorrectAnnotations == null) partiallyCorrectAnnotations = new HashSet();
123           partiallyCorrectAnnotations.add(bestChoice.getResponse());
124           partiallyCorrectMatches++;
125           break;
126         }
127         case WRONG:{
128           if(bestChoice.getKey() != null){
129             //we have a missed key
130             if(missingAnnotations == null) missingAnnotations = new HashSet();
131             missingAnnotations.add(bestChoice.getKey());
132             missing ++;
133           }
134           if(bestChoice.getResponse() != null){
135             //we have a spurious response
136             if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
137             spuriousAnnotations.add(bestChoice.getResponse());
138             spurious ++;
139           }
140           break;
141         }
142         default:{
143           throw new GateRuntimeException("Invalid pairing type: " + 
144                   bestChoice.type);
145         }
146       }
147     } 
148     //add choices for the incorrect matches (MISSED, SPURIOUS)
149     //get the unmatched keys
150     for(int i = 0; i < keyChoices.size(); i++){
151       List aList = (List)keyChoices.get(i);
152       if(aList == null || aList.isEmpty()){
153         if(missingAnnotations == null) missingAnnotations = new HashSet();
154         missingAnnotations.add((Annotation)(keyList.get(i)));
155         finalChoices.add(new PairingImpl(i, -1, WRONG));
156         missing ++;
157       }
158     }
159 
160     //get the unmatched responses
161     for(int i = 0; i < responseChoices.size(); i++){
162       List aList = (List)responseChoices.get(i);
163       if(aList == null || aList.isEmpty()){
164         if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
165         spuriousAnnotations.add((Annotation)(responseList.get(i)));
166         finalChoices.add(new PairingImpl(-1, i, WRONG));
167         spurious ++;
168       }
169     }
170 
171     return finalChoices;
172   }
173 
174   public double getPrecisionStrict(){
175     return (double)((double)correctMatches / (double)responseList.size());
176   }
177 
178   public double getRecallStrict(){
179     return (double)((double)correctMatches / (double)keyList.size());
180   }
181 
182   public double getPrecisionLenient(){
183     return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)responseList.size());
184   }
185 
186   public double getPrecisionAverage() {
187     return (double)((double)(getPrecisionLenient() + getPrecisionStrict()) / (double)(2.0));
188   }
189 
190   public double getRecallLenient(){
191     return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)keyList.size());
192   }
193 
194   public double getRecallAverage() {
195     return (double)((double)(getRecallLenient() + getRecallStrict()) / (double)(2.0));
196   }
197 
198   public double getFMeasureStrict(double beta){
199     double precision = getPrecisionStrict();
200     double recall = getRecallStrict();
201     double betaSq = beta * beta;
202     return (double)(((betaSq + 1) * precision * recall ) /
203            (double)(betaSq * precision + recall));
204   }
205 
206   public double getFMeasureLenient(double beta){
207     double precision = getPrecisionLenient();
208     double recall = getRecallLenient();
209     double betaSq = beta * beta;
210     return (double)(((betaSq + 1) * precision * recall ) /
211            (double)(betaSq * precision + recall));
212   }
213 
214   public double getFMeasureAverage(double beta) {
215     return (double)(((double)(getFMeasureLenient(beta) + getFMeasureStrict(beta)) / (double)(2.0)));
216   }
217 
218   public int getCorrectMatches(){
219     return correctMatches;
220   }
221 
222   public int getPartiallyCorrectMatches(){
223     return partiallyCorrectMatches;
224   }
225 
226   public int getMissing(){
227     return missing;
228   }
229 
230   public int getSpurious(){
231     return spurious;
232   }
233 
234   public int getFalsePositivesStrict(){
235     return responseList.size() - correctMatches;
236   }
237 
238   public int getFalsePositivesLenient(){
239     return responseList.size() - correctMatches - partiallyCorrectMatches;
240   }
241 
242   public void printMissmatches(){
243     //get the partial correct matches
244     Iterator iter = finalChoices.iterator();
245     while(iter.hasNext()){
246       PairingImpl aChoice = (PairingImpl)iter.next();
247       switch(aChoice.type){
248         case PARTIALLY_CORRECT:{
249           System.out.println("Missmatch (partially correct):");
250           System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString());
251           System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString());
252           break;
253         }
254       }
255     }
256 
257     //get the unmatched keys
258     for(int i = 0; i < keyChoices.size(); i++){
259       List aList = (List)keyChoices.get(i);
260       if(aList == null || aList.isEmpty()){
261         System.out.println("Missed Key: " + keyList.get(i).toString());
262       }
263     }
264 
265     //get the unmatched responses
266     for(int i = 0; i < responseChoices.size(); i++){
267       List aList = (List)responseChoices.get(i);
268       if(aList == null || aList.isEmpty()){
269         System.out.println("Spurious Response: " + responseList.get(i).toString());
270       }
271     }
272   }
273 
274 
275 
276   /**
277    * Performs some basic checks over the internal data structures from the last
278    * run.
279    * @throws Exception
280    */
281   void sanityCheck()throws Exception{
282     //all keys and responses should have at most one choice left
283     Iterator iter =keyChoices.iterator();
284     while(iter.hasNext()){
285       List choices = (List)iter.next();
286       if(choices != null){
287         if(choices.size() > 1){
288           throw new Exception("Multiple choices found!");
289         }else if(!choices.isEmpty()){
290           //size must be 1
291           PairingImpl aChoice = (PairingImpl)choices.get(0);
292           //the SAME choice should be found for the associated response
293           List otherChoices = (List)responseChoices.get(aChoice.responseIndex);
294           if(otherChoices == null ||
295              otherChoices.size() != 1 ||
296              otherChoices.get(0) != aChoice){
297             throw new Exception("Reciprocity error!");
298           }
299         }
300       }
301     }
302 
303     iter =responseChoices.iterator();
304     while(iter.hasNext()){
305       List choices = (List)iter.next();
306       if(choices != null){
307         if(choices.size() > 1){
308           throw new Exception("Multiple choices found!");
309         }else if(!choices.isEmpty()){
310           //size must be 1
311           PairingImpl aChoice = (PairingImpl)choices.get(0);
312           //the SAME choice should be found for the associated response
313           List otherChoices = (List)keyChoices.get(aChoice.keyIndex);
314           if(otherChoices == null){
315             throw new Exception("Reciprocity error : null!");
316           }else if(otherChoices.size() != 1){
317             throw new Exception("Reciprocity error: not 1!");
318           }else if(otherChoices.get(0) != aChoice){
319             throw new Exception("Reciprocity error: different!");
320           }
321         }
322       }
323     }
324   }
325   /**
326    *
327    * @param pairing the pairing to be added
328    * @param index the index in the list of pairings
329    * @param listOfPairings the list of {@link Pairing}s where the
330    * pairing should be added
331    */
332   protected void addPairing(PairingImpl pairing, int index, List listOfPairings){
333     List existingChoices = (List)listOfPairings.get(index);
334     if(existingChoices == null){
335       existingChoices = new ArrayList();
336       listOfPairings.set(index, existingChoices);
337     }
338     existingChoices.add(pairing);
339   }
340 
341   public java.util.Set getSignificantFeaturesSet() {
342     return significantFeaturesSet;
343   }
344 
345   public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) {
346     this.significantFeaturesSet = significantFeaturesSet;
347   }
348 
349   /**
350    * Represents a pairing of a key annotation with a response annotation and
351    * the associated score for that pairing.
352    */
353    public class PairingImpl implements Pairing{
354     PairingImpl(int keyIndex, int responseIndex, int type) {
355       this.keyIndex = keyIndex;
356       this.responseIndex = responseIndex;
357       this.type = type;
358       scoreCalculated = false;
359       }
360 
361     public int getScore(){
362       if(scoreCalculated) return score;
363       else{
364         calculateScore();
365         return score;
366       }
367     }
368 
369     public Annotation getKey(){
370       return keyIndex == -1 ? null : (Annotation)keyList.get(keyIndex);
371     }
372 
373     public Annotation getResponse(){
374       return responseIndex == -1 ? null :
375         (Annotation)responseList.get(responseIndex);
376     }
377 
378     public int getType(){
379       return type;
380     }
381 
382     /**
383      * Removes all mutually exclusive OTHER choices possible from
384      * the data structures.
385      * <tt>this</tt> gets removed from {@link #possibleChoices} as well.
386      */
387     public void consume(){
388       possibleChoices.remove(this);
389       List sameKeyChoices = (List)keyChoices.get(keyIndex);
390       sameKeyChoices.remove(this);
391       possibleChoices.removeAll(sameKeyChoices);
392 
393       List sameResponseChoices = (List)responseChoices.get(responseIndex);
394       sameResponseChoices.remove(this);
395       possibleChoices.removeAll(sameResponseChoices);
396 
397       Iterator iter = new ArrayList(sameKeyChoices).iterator();
398       while(iter.hasNext()){
399         ((PairingImpl)iter.next()).remove();
400       }
401       iter = new ArrayList(sameResponseChoices).iterator();
402       while(iter.hasNext()){
403         ((PairingImpl)iter.next()).remove();
404       }
405       sameKeyChoices.add(this);
406       sameResponseChoices.add(this);
407     }
408 
409     /**
410      * Removes this choice from the two lists it belongs to
411      */
412     protected void remove(){
413       List fromKey = (List)keyChoices.get(keyIndex);
414       fromKey.remove(this);
415       List fromResponse = (List)responseChoices.get(responseIndex);
416       fromResponse.remove(this);
417     }
418 
419     /**
420      * Calculates the score for this choice as:
421      * type - sum of all the types of all OTHER mutually exclusive choices
422      */
423     void calculateScore(){
424       //this needs to be a set so we don't count conflicts twice
425       Set conflictSet = new HashSet();
426       //add all the choices from the same response annotation
427       conflictSet.addAll((List)responseChoices.get(responseIndex));
428       //add all the choices from the same key annotation
429       conflictSet.addAll((List)keyChoices.get(keyIndex));
430       //remove this choice from the conflict set
431       conflictSet.remove(this);
432       score = type;
433       Iterator conflictIter = conflictSet.iterator();
434       while(conflictIter.hasNext()) score -= ((PairingImpl)conflictIter.next()).type;
435       scoreCalculated = true;
436     }
437 
438     int keyIndex;
439     int responseIndex;
440     int type;
441     int score;
442     boolean scoreCalculated;
443   }
444 
445   protected static class PairingScoreComparator implements Comparator{
446     /**
447      * Compares two choices:
448      * the better score is preferred;
449      * for the same score the better type is preferred (exact matches are
450      * preffered to partial ones).
451      * @return a positive value if the first pairing is better than the second,
452      * zero if they score the same or negative otherwise.
453      */
454 
455     public int compare(Object o1, Object o2){
456       PairingImpl first = (PairingImpl)o1;
457       PairingImpl second = (PairingImpl)o2;
458       //compare by score
459       int res = first.getScore() - second.getScore();
460       //compare by type
461       if(res == 0) res = first.getType() - second.getType();
462       //compare by completeness (a wrong match with both key and response 
463       //is "better" than one with only key or response
464       if(res == 0){
465         res = (first.getKey() == null ? 0 : 1) +
466               (first.getResponse() == null ? 0 : 1) +
467               (second.getKey() == null ? 0 : -1) +
468               (second.getResponse() == null ? 0 : -1);
469       }
470       return res;
471     }
472   }
473 
474 
475   public static class PairingOffsetComparator implements Comparator{
476     /**
477      * Compares two choices based on start offset of key (or response
478      * if key not present) and type if offsets are equal.
479      */
480     public int compare(Object o1, Object o2){
481       Pairing first = (Pairing)o1;
482       Pairing second = (Pairing)o2;
483       Annotation key1 = first.getKey();
484       Annotation key2 = second.getKey();
485       Annotation res1 = first.getResponse();
486       Annotation res2 = second.getResponse();
487       Long start1 = key1 == null ? null : key1.getStartNode().getOffset();
488       if(start1 == null) start1 = res1.getStartNode().getOffset();
489       Long start2 = key2 == null ? null : key2.getStartNode().getOffset();
490       if(start2 == null) start2 = res2.getStartNode().getOffset();
491       int res = start1.compareTo(start2);
492       if(res == 0){
493         //compare by type
494         res = second.getType() - first.getType();
495       }
496 
497 //
498 //
499 //
500 //      //choices with keys are smaller than ones without
501 //      if(key1 == null && key2 != null) return 1;
502 //      if(key1 != null && key2 == null) return -1;
503 //      if(key1 == null){
504 //        //both keys are null
505 //        res = res1.getStartNode().getOffset().
506 //            compareTo(res2.getStartNode().getOffset());
507 //        if(res == 0) res = res1.getEndNode().getOffset().
508 //              compareTo(res2.getEndNode().getOffset());
509 //        if(res == 0) res = second.getType() - first.getType();
510 //      }else{
511 //        //both keys are present
512 //        res = key1.getStartNode().getOffset().compareTo(
513 //            key2.getStartNode().getOffset());
514 //
515 //        if(res == 0){
516 //          //choices with responses are smaller than ones without
517 //          if(res1 == null && res2 != null) return 1;
518 //          if(res1 != null && res2 == null) return -1;
519 //          if(res1 != null){
520 //            res = res1.getStartNode().getOffset().
521 //                compareTo(res2.getStartNode().getOffset());
522 //          }
523 //          if(res == 0)res = key1.getEndNode().getOffset().compareTo(
524 //                  key2.getEndNode().getOffset());
525 //          if(res == 0 && res1 != null){
526 //              res = res1.getEndNode().getOffset().
527 //                  compareTo(res2.getEndNode().getOffset());
528 //          }
529 //          if(res == 0) res = second.getType() - first.getType();
530 //        }
531 //      }
532       return res;
533     }
534 
535   }
536 
537   /**
538    * A method that returns specific type of annotations
539    * @param type
540    * @return a {@link Set} of {@link Pairing}s.
541    */
542   public Set getAnnotationsOfType(int type) {
543     switch(type) {
544       case CORRECT_TYPE:
545         return (correctAnnotations == null)? new HashSet() : correctAnnotations;
546       case PARTIALLY_CORRECT_TYPE:
547         return (partiallyCorrectAnnotations == null) ? new HashSet() : partiallyCorrectAnnotations;
548       case SPURIOUS_TYPE:
549         return (spuriousAnnotations == null) ? new HashSet() : spuriousAnnotations;
550       case MISSING_TYPE:
551         return (missingAnnotations == null) ? new HashSet() : missingAnnotations;
552       default:
553         return new HashSet();
554     }
555   }
556 
557 
558   public HashSet correctAnnotations, partiallyCorrectAnnotations, missingAnnotations, spuriousAnnotations;
559 
560 
561   /** A correct type when all annotation are correct represented by Green color*/
562   public static final int CORRECT_TYPE = 1;
563   /** A partially correct type when all annotation are corect represented
564    *  by Blue color*/
565   public static final int PARTIALLY_CORRECT_TYPE = 2;
566   /** A spurious type when annotations in response were not present in key.
567    *  Represented by Red color*/
568   public static final int SPURIOUS_TYPE = 3;
569   /** A missing type when annotations in key were not present in response
570    *  Represented by Yellow color*/
571   public static final int MISSING_TYPE = 4;
572 
573 
574   public static final int CORRECT = 2;
575   public static final int PARTIALLY_CORRECT = 1;
576   public static final int WRONG = 0;
577 
578   private java.util.Set significantFeaturesSet;
579 
580   protected int correctMatches;
581   protected int partiallyCorrectMatches;
582   protected int missing;
583   protected int spurious;
584 
585   /**
586    * A list with all the key annotations
587    */
588   protected List keyList;
589 
590   /**
591    * A list with all the response annotations
592    */
593   protected List responseList;
594 
595   /**
596    * A list of lists representing all possible choices for each key
597    */
598   protected List keyChoices;
599 
600   /**
601    * A list of lists representing all possible choices for each response
602    */
603   protected List responseChoices;
604 
605   /**
606    * All the posible choices are added to this list for easy iteration.
607    */
608   protected List possibleChoices;
609 
610   /**
611    * A list with the choices selected for the best result.
612    */
613   protected List finalChoices;
614 
615 }