1   /*
2    *  Copyright (c) 1998-2004, The University of Sheffield.
3    *
4    *  This file is part of GATE (see http://gate.ac.uk/), and is free
5    *  software, licenced under the GNU Library General Public License,
6    *  Version 2, June 1991 (in the distribution as file licence.html,
7    *  and also available at http://gate.ac.uk/gate/licence.html).
8    *
9    *  Valentin Tablan 28/01/2003
10   *
11   *  $Id: AnnotationDiffer.java,v 1.14 2004/11/02 13:19:02 niraj Exp $
12   *
13   */
14  package gate.util;
15  
16  import java.util.*;
17  
18  import gate.Annotation;
19  
20  public class AnnotationDiffer {
21  
22  
23    public static interface Pairing{
24      public Annotation getKey();
25      public Annotation getResponse();
26      public int getType();
27    }
28  
29    /**
30     * Computes a diff between two collections of annotations.
31     * @param key
32     * @param response
33     * @return a list of {@link Pairing} objects representing the pairing set
34     * that results in the best score.
35     */
36    public List calculateDiff(Collection key, Collection response){
37      //initialise data structures
38      if(key == null || key.size() == 0)
39        keyList = new ArrayList();
40      else
41        keyList = new ArrayList(key);
42  
43      if(response == null || response.size() == 0)
44        responseList = new ArrayList();
45      else
46        responseList = new ArrayList(response);
47  
48      keyChoices = new ArrayList(keyList.size());
49      keyChoices.addAll(Collections.nCopies(keyList.size(), null));
50      responseChoices = new ArrayList(responseList.size());
51      responseChoices.addAll(Collections.nCopies(responseList.size(), null));
52  
53      possibleChoices = new ArrayList();
54  
55      //1) try all possible pairings
56      for(int i = 0; i < keyList.size(); i++){
57        for(int j =0; j < responseList.size(); j++){
58          Annotation keyAnn = (Annotation)keyList.get(i);
59          Annotation resAnn = (Annotation)responseList.get(j);
60          PairingImpl choice = null;
61          if(keyAnn.coextensive(resAnn)){
62            //we have full overlap -> CORRECT or WRONG
63            if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
64              //we have a full match
65              choice = new PairingImpl(i, j, CORRECT);
66            }else{
67              //the two annotations are coextensive but don't match
68              //we have a missmatch
69              choice = new PairingImpl(i, j, WRONG);
70            }
71          }else if(keyAnn.overlaps(resAnn)){
72            //we have partial overlap -> PARTIALLY_CORRECT or WRONG
73            if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
74              choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
75            }else{
76              choice = new PairingImpl(i, j, WRONG);
77            }
78          }
79  
80  //        // >>>>>>
81  //        if(significantFeaturesSet == null){
82  //          //full comaptibility required
83  //          if(keyAnn.isCompatible(resAnn)){
84  //            choice = new PairingImpl(i, j, CORRECT);
85  //          }else if(keyAnn.isPartiallyCompatible(resAnn)){
86  //            choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
87  //          }
88  //        }else{
89  //          //compatibility tests restricted to a set of features
90  //          if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
91  //            choice = new PairingImpl(i, j, CORRECT);
92  //          }else if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
93  //            choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
94  //          }
95  //        }
96  //        // <<<<<<
97  
98          //add the new choice if any
99          if (choice != null) {
100           addPairing(choice, i, keyChoices);
101           addPairing(choice, j, responseChoices);
102           possibleChoices.add(choice);
103         }
104       }//for j
105     }//for i
106 
107     //2) from all possible pairings, find the maximal set that also
108     //maximises the total score
109     Collections.sort(possibleChoices, new PairingScoreComparator());
110     Collections.reverse(possibleChoices);
111     finalChoices = new ArrayList();
112     correctMatches = 0;
113     partiallyCorrectMatches = 0;
114     missing = 0;
115     spurious = 0;
116 
117     while(!possibleChoices.isEmpty()){
118       PairingImpl bestChoice = (PairingImpl)possibleChoices.remove(0);
119       bestChoice.consume();
120       finalChoices.add(bestChoice);
121       switch(bestChoice.type){
122         case CORRECT:{
123           if(correctAnnotations == null) correctAnnotations = new HashSet();
124           correctAnnotations.add(bestChoice.getResponse());
125           correctMatches++;
126           break;
127         }
128         case PARTIALLY_CORRECT:{
129           if(partiallyCorrectAnnotations == null) partiallyCorrectAnnotations = new HashSet();
130           partiallyCorrectAnnotations.add(bestChoice.getResponse());
131           partiallyCorrectMatches++;
132           break;
133         }
134         case WRONG:{
135           if(bestChoice.getKey() != null){
136             //we have a missed key
137             if(missingAnnotations == null) missingAnnotations = new HashSet();
138             missingAnnotations.add(bestChoice.getKey());
139             missing ++;
140           }
141           if(bestChoice.getResponse() != null){
142             //we have a spurious response
143             if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
144             spuriousAnnotations.add(bestChoice.getResponse());
145             spurious ++;
146           }
147           break;
148         }
149         default:{
150           throw new GateRuntimeException("Invalid pairing type: " +
151                   bestChoice.type);
152         }
153       }
154     }
155     //add choices for the incorrect matches (MISSED, SPURIOUS)
156     //get the unmatched keys
157     for(int i = 0; i < keyChoices.size(); i++){
158       List aList = (List)keyChoices.get(i);
159       if(aList == null || aList.isEmpty()){
160         if(missingAnnotations == null) missingAnnotations = new HashSet();
161         missingAnnotations.add((Annotation)(keyList.get(i)));
162         finalChoices.add(new PairingImpl(i, -1, WRONG));
163         missing ++;
164       }
165     }
166 
167     //get the unmatched responses
168     for(int i = 0; i < responseChoices.size(); i++){
169       List aList = (List)responseChoices.get(i);
170       if(aList == null || aList.isEmpty()){
171         if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
172         spuriousAnnotations.add((Annotation)(responseList.get(i)));
173         finalChoices.add(new PairingImpl(-1, i, WRONG));
174         spurious ++;
175       }
176     }
177 
178     return finalChoices;
179   }
180 
181   public double getPrecisionStrict(){
182     if(responseList.size() == 0) {
183       return 1.0;
184     }
185     return (double)((double)correctMatches / (double)responseList.size());
186   }
187 
188   public double getRecallStrict(){
189     if(keyList.size() == 0) {
190       return 1.0;
191     }
192     return (double)((double)correctMatches / (double)keyList.size());
193   }
194 
195   public double getPrecisionLenient(){
196     if(responseList.size() == 0) {
197       return 1.0;
198     }
199     return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)responseList.size());
200   }
201 
202   public double getPrecisionAverage() {
203     return (double)((double)((double)getPrecisionLenient() + getPrecisionStrict()) / (double)(2.0));
204   }
205 
206   public double getRecallLenient(){
207     if(keyList.size() == 0) {
208       return 1.0;
209     }
210     return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)keyList.size());
211   }
212 
213   public double getRecallAverage() {
214     return (double)((double)((double) getRecallLenient() + getRecallStrict()) / (double)(2.0));
215   }
216 
217   public double getFMeasureStrict(double beta){
218     double precision = getPrecisionStrict();
219     double recall = getRecallStrict();
220     double betaSq = beta * beta;
221     double answer = (double)(((betaSq + 1) * precision * recall ) /
222            (double)(betaSq * precision + recall));
223       return answer;
224   }
225 
226   public double getFMeasureLenient(double beta){
227     double precision = getPrecisionLenient();
228     double recall = getRecallLenient();
229     double betaSq = beta * beta;
230     double answer = (double)(((betaSq + 1) * precision * recall ) /
231            (double)(betaSq * precision + recall));
232       return answer;
233   }
234 
235   public double getFMeasureAverage(double beta) {
236     double answer = (double)(((double)((double) getFMeasureLenient(beta) + getFMeasureStrict(beta)) / (double)(2.0)));
237       return answer;
238   }
239 
240   public int getCorrectMatches(){
241     return correctMatches;
242   }
243 
244   public int getPartiallyCorrectMatches(){
245     return partiallyCorrectMatches;
246   }
247 
248   public int getMissing(){
249     return missing;
250   }
251 
252   public int getSpurious(){
253     return spurious;
254   }
255 
256   public int getFalsePositivesStrict(){
257     return responseList.size() - correctMatches;
258   }
259 
260   public int getFalsePositivesLenient(){
261     return responseList.size() - correctMatches - partiallyCorrectMatches;
262   }
263 
264 
265   public int getKeysCount() {
266     return keyList.size();
267   }
268 
269   public int getResponsesCount() {
270     return responseList.size();
271   }
272 
273   public void printMissmatches(){
274     //get the partial correct matches
275     Iterator iter = finalChoices.iterator();
276     while(iter.hasNext()){
277       PairingImpl aChoice = (PairingImpl)iter.next();
278       switch(aChoice.type){
279         case PARTIALLY_CORRECT:{
280           System.out.println("Missmatch (partially correct):");
281           System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString());
282           System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString());
283           break;
284         }
285       }
286     }
287 
288     //get the unmatched keys
289     for(int i = 0; i < keyChoices.size(); i++){
290       List aList = (List)keyChoices.get(i);
291       if(aList == null || aList.isEmpty()){
292         System.out.println("Missed Key: " + keyList.get(i).toString());
293       }
294     }
295 
296     //get the unmatched responses
297     for(int i = 0; i < responseChoices.size(); i++){
298       List aList = (List)responseChoices.get(i);
299       if(aList == null || aList.isEmpty()){
300         System.out.println("Spurious Response: " + responseList.get(i).toString());
301       }
302     }
303   }
304 
305 
306 
307   /**
308    * Performs some basic checks over the internal data structures from the last
309    * run.
310    * @throws Exception
311    */
312   void sanityCheck()throws Exception{
313     //all keys and responses should have at most one choice left
314     Iterator iter =keyChoices.iterator();
315     while(iter.hasNext()){
316       List choices = (List)iter.next();
317       if(choices != null){
318         if(choices.size() > 1){
319           throw new Exception("Multiple choices found!");
320         }else if(!choices.isEmpty()){
321           //size must be 1
322           PairingImpl aChoice = (PairingImpl)choices.get(0);
323           //the SAME choice should be found for the associated response
324           List otherChoices = (List)responseChoices.get(aChoice.responseIndex);
325           if(otherChoices == null ||
326              otherChoices.size() != 1 ||
327              otherChoices.get(0) != aChoice){
328             throw new Exception("Reciprocity error!");
329           }
330         }
331       }
332     }
333 
334     iter =responseChoices.iterator();
335     while(iter.hasNext()){
336       List choices = (List)iter.next();
337       if(choices != null){
338         if(choices.size() > 1){
339           throw new Exception("Multiple choices found!");
340         }else if(!choices.isEmpty()){
341           //size must be 1
342           PairingImpl aChoice = (PairingImpl)choices.get(0);
343           //the SAME choice should be found for the associated response
344           List otherChoices = (List)keyChoices.get(aChoice.keyIndex);
345           if(otherChoices == null){
346             throw new Exception("Reciprocity error : null!");
347           }else if(otherChoices.size() != 1){
348             throw new Exception("Reciprocity error: not 1!");
349           }else if(otherChoices.get(0) != aChoice){
350             throw new Exception("Reciprocity error: different!");
351           }
352         }
353       }
354     }
355   }
356   /**
357    *
358    * @param pairing the pairing to be added
359    * @param index the index in the list of pairings
360    * @param listOfPairings the list of {@link Pairing}s where the
361    * pairing should be added
362    */
363   protected void addPairing(PairingImpl pairing, int index, List listOfPairings){
364     List existingChoices = (List)listOfPairings.get(index);
365     if(existingChoices == null){
366       existingChoices = new ArrayList();
367       listOfPairings.set(index, existingChoices);
368     }
369     existingChoices.add(pairing);
370   }
371 
372   public java.util.Set getSignificantFeaturesSet() {
373     return significantFeaturesSet;
374   }
375 
376   public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) {
377     this.significantFeaturesSet = significantFeaturesSet;
378   }
379 
380   /**
381    * Represents a pairing of a key annotation with a response annotation and
382    * the associated score for that pairing.
383    */
384    public class PairingImpl implements Pairing{
385     PairingImpl(int keyIndex, int responseIndex, int type) {
386       this.keyIndex = keyIndex;
387       this.responseIndex = responseIndex;
388       this.type = type;
389       scoreCalculated = false;
390       }
391 
392     public int getScore(){
393       if(scoreCalculated) return score;
394       else{
395         calculateScore();
396         return score;
397       }
398     }
399 
400     public Annotation getKey(){
401       return keyIndex == -1 ? null : (Annotation)keyList.get(keyIndex);
402     }
403 
404     public Annotation getResponse(){
405       return responseIndex == -1 ? null :
406         (Annotation)responseList.get(responseIndex);
407     }
408 
409     public int getType(){
410       return type;
411     }
412 
413     /**
414      * Removes all mutually exclusive OTHER choices possible from
415      * the data structures.
416      * <tt>this</tt> gets removed from {@link #possibleChoices} as well.
417      */
418     public void consume(){
419       possibleChoices.remove(this);
420       List sameKeyChoices = (List)keyChoices.get(keyIndex);
421       sameKeyChoices.remove(this);
422       possibleChoices.removeAll(sameKeyChoices);
423 
424       List sameResponseChoices = (List)responseChoices.get(responseIndex);
425       sameResponseChoices.remove(this);
426       possibleChoices.removeAll(sameResponseChoices);
427 
428       Iterator iter = new ArrayList(sameKeyChoices).iterator();
429       while(iter.hasNext()){
430         ((PairingImpl)iter.next()).remove();
431       }
432       iter = new ArrayList(sameResponseChoices).iterator();
433       while(iter.hasNext()){
434         ((PairingImpl)iter.next()).remove();
435       }
436       sameKeyChoices.add(this);
437       sameResponseChoices.add(this);
438     }
439 
440     /**
441      * Removes this choice from the two lists it belongs to
442      */
443     protected void remove(){
444       List fromKey = (List)keyChoices.get(keyIndex);
445       fromKey.remove(this);
446       List fromResponse = (List)responseChoices.get(responseIndex);
447       fromResponse.remove(this);
448     }
449 
450     /**
451      * Calculates the score for this choice as:
452      * type - sum of all the types of all OTHER mutually exclusive choices
453      */
454     void calculateScore(){
455       //this needs to be a set so we don't count conflicts twice
456       Set conflictSet = new HashSet();
457       //add all the choices from the same response annotation
458       conflictSet.addAll((List)responseChoices.get(responseIndex));
459       //add all the choices from the same key annotation
460       conflictSet.addAll((List)keyChoices.get(keyIndex));
461       //remove this choice from the conflict set
462       conflictSet.remove(this);
463       score = type;
464       Iterator conflictIter = conflictSet.iterator();
465       while(conflictIter.hasNext()) score -= ((PairingImpl)conflictIter.next()).type;
466       scoreCalculated = true;
467     }
468 
469     int keyIndex;
470     int responseIndex;
471     int type;
472     int score;
473     boolean scoreCalculated;
474   }
475 
476   protected static class PairingScoreComparator implements Comparator{
477     /**
478      * Compares two choices:
479      * the better score is preferred;
480      * for the same score the better type is preferred (exact matches are
481      * preffered to partial ones).
482      * @return a positive value if the first pairing is better than the second,
483      * zero if they score the same or negative otherwise.
484      */
485 
486     public int compare(Object o1, Object o2){
487       PairingImpl first = (PairingImpl)o1;
488       PairingImpl second = (PairingImpl)o2;
489       //compare by score
490       int res = first.getScore() - second.getScore();
491       //compare by type
492       if(res == 0) res = first.getType() - second.getType();
493       //compare by completeness (a wrong match with both key and response
494       //is "better" than one with only key or response
495       if(res == 0){
496         res = (first.getKey() == null ? 0 : 1) +
497               (first.getResponse() == null ? 0 : 1) +
498               (second.getKey() == null ? 0 : -1) +
499               (second.getResponse() == null ? 0 : -1);
500       }
501       return res;
502     }
503   }
504 
505 
506   public static class PairingOffsetComparator implements Comparator{
507     /**
508      * Compares two choices based on start offset of key (or response
509      * if key not present) and type if offsets are equal.
510      */
511     public int compare(Object o1, Object o2){
512       Pairing first = (Pairing)o1;
513       Pairing second = (Pairing)o2;
514       Annotation key1 = first.getKey();
515       Annotation key2 = second.getKey();
516       Annotation res1 = first.getResponse();
517       Annotation res2 = second.getResponse();
518       Long start1 = key1 == null ? null : key1.getStartNode().getOffset();
519       if(start1 == null) start1 = res1.getStartNode().getOffset();
520       Long start2 = key2 == null ? null : key2.getStartNode().getOffset();
521       if(start2 == null) start2 = res2.getStartNode().getOffset();
522       int res = start1.compareTo(start2);
523       if(res == 0){
524         //compare by type
525         res = second.getType() - first.getType();
526       }
527 
528 //
529 //
530 //
531 //      //choices with keys are smaller than ones without
532 //      if(key1 == null && key2 != null) return 1;
533 //      if(key1 != null && key2 == null) return -1;
534 //      if(key1 == null){
535 //        //both keys are null
536 //        res = res1.getStartNode().getOffset().
537 //            compareTo(res2.getStartNode().getOffset());
538 //        if(res == 0) res = res1.getEndNode().getOffset().
539 //              compareTo(res2.getEndNode().getOffset());
540 //        if(res == 0) res = second.getType() - first.getType();
541 //      }else{
542 //        //both keys are present
543 //        res = key1.getStartNode().getOffset().compareTo(
544 //            key2.getStartNode().getOffset());
545 //
546 //        if(res == 0){
547 //          //choices with responses are smaller than ones without
548 //          if(res1 == null && res2 != null) return 1;
549 //          if(res1 != null && res2 == null) return -1;
550 //          if(res1 != null){
551 //            res = res1.getStartNode().getOffset().
552 //                compareTo(res2.getStartNode().getOffset());
553 //          }
554 //          if(res == 0)res = key1.getEndNode().getOffset().compareTo(
555 //                  key2.getEndNode().getOffset());
556 //          if(res == 0 && res1 != null){
557 //              res = res1.getEndNode().getOffset().
558 //                  compareTo(res2.getEndNode().getOffset());
559 //          }
560 //          if(res == 0) res = second.getType() - first.getType();
561 //        }
562 //      }
563       return res;
564     }
565 
566   }
567 
568   /**
569    * A method that returns specific type of annotations
570    * @param type
571    * @return a {@link Set} of {@link Pairing}s.
572    */
573   public Set getAnnotationsOfType(int type) {
574     switch(type) {
575       case CORRECT_TYPE:
576         return (correctAnnotations == null)? new HashSet() : correctAnnotations;
577       case PARTIALLY_CORRECT_TYPE:
578         return (partiallyCorrectAnnotations == null) ? new HashSet() : partiallyCorrectAnnotations;
579       case SPURIOUS_TYPE:
580         return (spuriousAnnotations == null) ? new HashSet() : spuriousAnnotations;
581       case MISSING_TYPE:
582         return (missingAnnotations == null) ? new HashSet() : missingAnnotations;
583       default:
584         return new HashSet();
585     }
586   }
587 
588 
589   public HashSet correctAnnotations, partiallyCorrectAnnotations, missingAnnotations, spuriousAnnotations;
590 
591 
592   /** A correct type when all annotation are correct represented by Green color*/
593   public static final int CORRECT_TYPE = 1;
594   /** A partially correct type when all annotation are corect represented
595    *  by Blue color*/
596   public static final int PARTIALLY_CORRECT_TYPE = 2;
597   /** A spurious type when annotations in response were not present in key.
598    *  Represented by Red color*/
599   public static final int SPURIOUS_TYPE = 3;
600   /** A missing type when annotations in key were not present in response
601    *  Represented by Yellow color*/
602   public static final int MISSING_TYPE = 4;
603 
604 
605   public static final int CORRECT = 2;
606   public static final int PARTIALLY_CORRECT = 1;
607   public static final int WRONG = 0;
608 
609   private java.util.Set significantFeaturesSet;
610 
611   protected int correctMatches;
612   protected int partiallyCorrectMatches;
613   protected int missing;
614   protected int spurious;
615 
616   /**
617    * A list with all the key annotations
618    */
619   protected List keyList;
620 
621   /**
622    * A list with all the response annotations
623    */
624   protected List responseList;
625 
626   /**
627    * A list of lists representing all possible choices for each key
628    */
629   protected List keyChoices;
630 
631   /**
632    * A list of lists representing all possible choices for each response
633    */
634   protected List responseChoices;
635 
636   /**
637    * All the posible choices are added to this list for easy iteration.
638    */
639   protected List possibleChoices;
640 
641   /**
642    * A list with the choices selected for the best result.
643    */
644   protected List finalChoices;
645 
646 }