|
AnnotationDiffer |
|
1 /* 2 * Copyright (c) 1998-2001, The University of Sheffield. 3 * 4 * This file is part of GATE (see http://gate.ac.uk/), and is free 5 * software, licenced under the GNU Library General Public License, 6 * Version 2, June 1991 (in the distribution as file licence.html, 7 * and also available at http://gate.ac.uk/gate/licence.html). 8 * 9 * Valentin Tablan 28/01/2003 10 * 11 * $Id: AnnotationDiffer.java,v 1.4 2003/01/30 15:58:50 valyt Exp $ 12 * 13 */ 14 package gate.util; 15 16 import java.util.*; 17 import gate.*; 18 public class AnnotationDiffer { 19 /** 20 * Computes a diff between two collections of annotations. 21 * @param key 22 * @param response 23 */ 24 public void calculateDiff(Collection key, Collection response){ 25 //initialise data structures 26 keyList = new ArrayList(key); 27 responseList = new ArrayList(response); 28 29 keyChoices = new ArrayList(keyList.size()); 30 keyChoices.addAll(Collections.nCopies(keyList.size(), null)); 31 responseChoices = new ArrayList(responseList.size()); 32 responseChoices.addAll(Collections.nCopies(responseList.size(), null)); 33 34 possibleChoices = new ArrayList(); 35 36 //1) try all possible pairings 37 for(int i = 0; i < keyList.size(); i++){ 38 for(int j =0; j < responseList.size(); j++){ 39 Annotation keyAnn = (Annotation)keyList.get(i); 40 Annotation resAnn = (Annotation)responseList.get(j); 41 Choice choice = null; 42 if(significantFeaturesSet == null){ 43 //full comaptibility required 44 if(keyAnn.isCompatible(resAnn)){ 45 choice = new Choice(i, j, CORRECT); 46 }else if(keyAnn.isPartiallyCompatible(resAnn)){ 47 choice = new Choice(i, j, PARTIALLY_CORRECT); 48 } 49 }else{ 50 //compatibility tests restricted to a set of features 51 if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){ 52 choice = new Choice(i, j, CORRECT); 53 }else if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){ 54 choice = new Choice(i, j, PARTIALLY_CORRECT); 55 } 56 } 57 //add the new choice if any 58 if (choice != null) { 59 addChoice(choice, i, keyChoices); 60 addChoice(choice, j, responseChoices); 61 possibleChoices.add(choice); 62 } 63 }//for j 64 }//for i 65 66 //2) from all possible pairings, find the maximal set that also 67 //maximises the total score 68 Collections.sort(possibleChoices); 69 Collections.reverse(possibleChoices); 70 finalChoices = new ArrayList(); 71 correctMatches = 0; 72 partiallyCorrectMatches = 0; 73 74 while(!possibleChoices.isEmpty()){ 75 Choice bestChoice = (Choice)possibleChoices.remove(0); 76 bestChoice.consume(); 77 finalChoices.add(bestChoice); 78 switch(bestChoice.type){ 79 case CORRECT:{ 80 correctMatches++; 81 break; 82 } 83 case PARTIALLY_CORRECT:{ 84 partiallyCorrectMatches++; 85 break; 86 } 87 } 88 } 89 } 90 91 public double getPrecisionStrict(){ 92 return (double)correctMatches / responseList.size(); 93 } 94 95 public double getRecallStrict(){ 96 return (double)correctMatches / keyList.size(); 97 } 98 99 public double getPrecisionLenient(){ 100 return (double)(correctMatches + partiallyCorrectMatches) / responseList.size(); 101 } 102 103 public double getRecallLenient(){ 104 return (double)(correctMatches + partiallyCorrectMatches) / keyList.size(); 105 } 106 107 public double getFMeasureStrict(double beta){ 108 double precision = getPrecisionStrict(); 109 double recall = getRecallStrict(); 110 double betaSq = beta * beta; 111 return ((betaSq + 1) * precision * recall ) / 112 (betaSq * precision + recall); 113 } 114 115 public double getFMeasureLenient(double beta){ 116 double precision = getPrecisionLenient(); 117 double recall = getRecallLenient(); 118 double betaSq = beta * beta; 119 return ((betaSq + 1) * precision * recall ) / 120 (betaSq * precision + recall); 121 } 122 123 public int getFalsePositivesStrict(){ 124 return responseList.size() - correctMatches; 125 } 126 127 public int getFalsePositivesLenient(){ 128 return responseList.size() - correctMatches - partiallyCorrectMatches; 129 } 130 131 public void printMissmatches(){ 132 //get the partial correct matches 133 Iterator iter = finalChoices.iterator(); 134 while(iter.hasNext()){ 135 Choice aChoice = (Choice)iter.next(); 136 switch(aChoice.type){ 137 case PARTIALLY_CORRECT:{ 138 System.out.println("Missmatch (partially correct):"); 139 System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString()); 140 System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString()); 141 break; 142 } 143 } 144 } 145 146 //get the unmatched keys 147 for(int i = 0; i < keyChoices.size(); i++){ 148 List aList = (List)keyChoices.get(i); 149 if(aList == null || aList.isEmpty()){ 150 System.out.println("Unmatched Key: " + keyList.get(i).toString()); 151 } 152 } 153 154 //get the unmatched responses 155 for(int i = 0; i < responseChoices.size(); i++){ 156 List aList = (List)responseChoices.get(i); 157 if(aList == null || aList.isEmpty()){ 158 System.out.println("Unmatched Key: " + responseList.get(i).toString()); 159 } 160 } 161 162 } 163 /** 164 * Performs some basic checks over the internal data structures from the last 165 * run. 166 * @throws Exception 167 */ 168 void sanityCheck()throws Exception{ 169 //all keys and responses should have at most one choice left 170 Iterator iter =keyChoices.iterator(); 171 while(iter.hasNext()){ 172 List choices = (List)iter.next(); 173 if(choices != null){ 174 if(choices.size() > 1){ 175 throw new Exception("Multiple choices found!"); 176 }else if(!choices.isEmpty()){ 177 //size must be 1 178 Choice aChoice = (Choice)choices.get(0); 179 //the SAME choice should be found for the associated response 180 List otherChoices = (List)responseChoices.get(aChoice.responseIndex); 181 if(otherChoices == null || 182 otherChoices.size() != 1 || 183 otherChoices.get(0) != aChoice){ 184 throw new Exception("Reciprocity error!"); 185 } 186 } 187 } 188 } 189 190 iter =responseChoices.iterator(); 191 while(iter.hasNext()){ 192 List choices = (List)iter.next(); 193 if(choices != null){ 194 if(choices.size() > 1){ 195 throw new Exception("Multiple choices found!"); 196 }else if(!choices.isEmpty()){ 197 //size must be 1 198 Choice aChoice = (Choice)choices.get(0); 199 //the SAME choice should be found for the associated response 200 List otherChoices = (List)keyChoices.get(aChoice.keyIndex); 201 if(otherChoices == null){ 202 throw new Exception("Reciprocity error : null!"); 203 }else if(otherChoices.size() != 1){ 204 throw new Exception("Reciprocity error: not 1!"); 205 }else if(otherChoices.get(0) != aChoice){ 206 throw new Exception("Reciprocity error: different!"); 207 } 208 } 209 } 210 } 211 } 212 /** 213 * 214 * @param choice the choice to be added 215 * @param index the index in the list of choices 216 * @param list the list of choices where the choice should be added 217 */ 218 protected void addChoice(Choice choice, int index, List listOfChoices){ 219 List existingChoices = (List)listOfChoices.get(index); 220 if(existingChoices == null){ 221 existingChoices = new ArrayList(); 222 listOfChoices.set(index, existingChoices); 223 } 224 existingChoices.add(choice); 225 } 226 227 public java.util.Set getSignificantFeaturesSet() { 228 return significantFeaturesSet; 229 } 230 231 public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) { 232 this.significantFeaturesSet = significantFeaturesSet; 233 } 234 235 /** 236 * Represents a pairing of a key annotation with a response annotation and 237 * the associated score for that pairing. 238 */ 239 class Choice implements Comparable{ 240 Choice(int keyIndex, int responseIndex, int type) { 241 this.keyIndex = keyIndex; 242 this.responseIndex = responseIndex; 243 this.type = type; 244 scoreCalculated = false; 245 } 246 247 int getScore(){ 248 if(scoreCalculated) return score; 249 else{ 250 calculateScore(); 251 return score; 252 } 253 } 254 255 /** 256 * Removes all mutually exclusive OTHER choices possible from 257 * the data structures. 258 * <tt>this</tt> gets removed from {@link #possibleChoices} as well. 259 */ 260 public void consume(){ 261 possibleChoices.remove(this); 262 List sameKeyChoices = (List)keyChoices.get(keyIndex); 263 sameKeyChoices.remove(this); 264 possibleChoices.removeAll(sameKeyChoices); 265 266 List sameResponseChoices = (List)responseChoices.get(responseIndex); 267 sameResponseChoices.remove(this); 268 possibleChoices.removeAll(sameResponseChoices); 269 270 Iterator iter = new ArrayList(sameKeyChoices).iterator(); 271 while(iter.hasNext()){ 272 ((Choice)iter.next()).remove(); 273 } 274 iter = new ArrayList(sameResponseChoices).iterator(); 275 while(iter.hasNext()){ 276 ((Choice)iter.next()).remove(); 277 } 278 sameKeyChoices.add(this); 279 sameResponseChoices.add(this); 280 } 281 282 /** 283 * Removes this choice from the two lists it belongs to 284 */ 285 protected void remove(){ 286 List fromKey = (List)keyChoices.get(keyIndex); 287 fromKey.remove(this); 288 List fromResponse = (List)responseChoices.get(responseIndex); 289 fromResponse.remove(this); 290 } 291 /** 292 * Compares two choices: 293 * the better score is preferred; 294 * for the same score the better type is preferred (exact matches are 295 * preffered to partial ones). 296 * @param other 297 * @return 298 */ 299 public int compareTo(Object other){ 300 int res = getScore() - ((Choice)other).getScore(); 301 if(res == 0) res = type - ((Choice)other).type; 302 return res; 303 } 304 305 /** 306 * Calculates the score for this choice as: 307 * type - sum of all the types of all OTHER mutually exclusive choices 308 */ 309 void calculateScore(){ 310 //this needs to be a set so we don't count conflicts twice 311 Set conflictSet = new HashSet(); 312 //add all the choices from the same response annotation 313 conflictSet.addAll((List)responseChoices.get(responseIndex)); 314 //add all the choices from the same key annotation 315 conflictSet.addAll((List)keyChoices.get(keyIndex)); 316 //remove this choice from the conflict set 317 conflictSet.remove(this); 318 score = type; 319 Iterator conflictIter = conflictSet.iterator(); 320 while(conflictIter.hasNext()) score -= ((Choice)conflictIter.next()).type; 321 scoreCalculated = true; 322 } 323 324 int keyIndex; 325 int responseIndex; 326 int type; 327 int score; 328 boolean scoreCalculated; 329 } 330 331 public static final int CORRECT = 2; 332 public static final int PARTIALLY_CORRECT = 1; 333 public static final int DIFFERENT = 0; 334 335 private java.util.Set significantFeaturesSet; 336 337 protected int correctMatches; 338 protected int partiallyCorrectMatches; 339 340 /** 341 * A list with all the key annotations 342 */ 343 protected List keyList; 344 345 /** 346 * A list with all the response annotations 347 */ 348 protected List responseList; 349 350 /** 351 * A list of lists representing all possible choices for each key 352 */ 353 protected List keyChoices; 354 355 /** 356 * A list of lists representing all possible choices for each response 357 */ 358 protected List responseChoices; 359 360 /** 361 * All the posible choices are added to this list for easy iteration. 362 */ 363 protected List possibleChoices; 364 365 /** 366 * A list with the choices selected for the best result. 367 */ 368 protected List finalChoices; 369 370 }
|
AnnotationDiffer |
|