1
14 package gate.util;
15
16 import java.util.*;
17
18 import gate.Annotation;
19
20 public class AnnotationDiffer {
21
22
23 public static interface Pairing{
24 public Annotation getKey();
25 public Annotation getResponse();
26 public int getType();
27 }
28
29
36 public List calculateDiff(Collection key, Collection response){
37 if(key == null || key.size() == 0)
39 keyList = new ArrayList();
40 else
41 keyList = new ArrayList(key);
42
43 if(response == null || response.size() == 0)
44 responseList = new ArrayList();
45 else
46 responseList = new ArrayList(response);
47
48 keyChoices = new ArrayList(keyList.size());
49 keyChoices.addAll(Collections.nCopies(keyList.size(), null));
50 responseChoices = new ArrayList(responseList.size());
51 responseChoices.addAll(Collections.nCopies(responseList.size(), null));
52
53 possibleChoices = new ArrayList();
54
55 for(int i = 0; i < keyList.size(); i++){
57 for(int j =0; j < responseList.size(); j++){
58 Annotation keyAnn = (Annotation)keyList.get(i);
59 Annotation resAnn = (Annotation)responseList.get(j);
60 PairingImpl choice = null;
61 if(keyAnn.coextensive(resAnn)){
62 if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
64 choice = new PairingImpl(i, j, CORRECT);
66 }else{
67 choice = new PairingImpl(i, j, WRONG);
70 }
71 }else if(keyAnn.overlaps(resAnn)){
72 if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
74 choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
75 }else{
76 choice = new PairingImpl(i, j, WRONG);
77 }
78 }
79
80
98 if (choice != null) {
100 addPairing(choice, i, keyChoices);
101 addPairing(choice, j, responseChoices);
102 possibleChoices.add(choice);
103 }
104 } }
107 Collections.sort(possibleChoices, new PairingScoreComparator());
110 Collections.reverse(possibleChoices);
111 finalChoices = new ArrayList();
112 correctMatches = 0;
113 partiallyCorrectMatches = 0;
114 missing = 0;
115 spurious = 0;
116
117 while(!possibleChoices.isEmpty()){
118 PairingImpl bestChoice = (PairingImpl)possibleChoices.remove(0);
119 bestChoice.consume();
120 finalChoices.add(bestChoice);
121 switch(bestChoice.type){
122 case CORRECT:{
123 if(correctAnnotations == null) correctAnnotations = new HashSet();
124 correctAnnotations.add(bestChoice.getResponse());
125 correctMatches++;
126 break;
127 }
128 case PARTIALLY_CORRECT:{
129 if(partiallyCorrectAnnotations == null) partiallyCorrectAnnotations = new HashSet();
130 partiallyCorrectAnnotations.add(bestChoice.getResponse());
131 partiallyCorrectMatches++;
132 break;
133 }
134 case WRONG:{
135 if(bestChoice.getKey() != null){
136 if(missingAnnotations == null) missingAnnotations = new HashSet();
138 missingAnnotations.add(bestChoice.getKey());
139 missing ++;
140 }
141 if(bestChoice.getResponse() != null){
142 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
144 spuriousAnnotations.add(bestChoice.getResponse());
145 spurious ++;
146 }
147 break;
148 }
149 default:{
150 throw new GateRuntimeException("Invalid pairing type: " +
151 bestChoice.type);
152 }
153 }
154 }
155 for(int i = 0; i < keyChoices.size(); i++){
158 List aList = (List)keyChoices.get(i);
159 if(aList == null || aList.isEmpty()){
160 if(missingAnnotations == null) missingAnnotations = new HashSet();
161 missingAnnotations.add((Annotation)(keyList.get(i)));
162 finalChoices.add(new PairingImpl(i, -1, WRONG));
163 missing ++;
164 }
165 }
166
167 for(int i = 0; i < responseChoices.size(); i++){
169 List aList = (List)responseChoices.get(i);
170 if(aList == null || aList.isEmpty()){
171 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
172 spuriousAnnotations.add((Annotation)(responseList.get(i)));
173 finalChoices.add(new PairingImpl(-1, i, WRONG));
174 spurious ++;
175 }
176 }
177
178 return finalChoices;
179 }
180
181 public double getPrecisionStrict(){
182 if(responseList.size() == 0) {
183 return 1.0;
184 }
185 return (double)((double)correctMatches / (double)responseList.size());
186 }
187
188 public double getRecallStrict(){
189 if(keyList.size() == 0) {
190 return 1.0;
191 }
192 return (double)((double)correctMatches / (double)keyList.size());
193 }
194
195 public double getPrecisionLenient(){
196 if(responseList.size() == 0) {
197 return 1.0;
198 }
199 return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)responseList.size());
200 }
201
202 public double getPrecisionAverage() {
203 return (double)((double)((double)getPrecisionLenient() + getPrecisionStrict()) / (double)(2.0));
204 }
205
206 public double getRecallLenient(){
207 if(keyList.size() == 0) {
208 return 1.0;
209 }
210 return (double)((double)(correctMatches + partiallyCorrectMatches) / (double)keyList.size());
211 }
212
213 public double getRecallAverage() {
214 return (double)((double)((double) getRecallLenient() + getRecallStrict()) / (double)(2.0));
215 }
216
217 public double getFMeasureStrict(double beta){
218 double precision = getPrecisionStrict();
219 double recall = getRecallStrict();
220 double betaSq = beta * beta;
221 double answer = (double)(((betaSq + 1) * precision * recall ) /
222 (double)(betaSq * precision + recall));
223 return answer;
224 }
225
226 public double getFMeasureLenient(double beta){
227 double precision = getPrecisionLenient();
228 double recall = getRecallLenient();
229 double betaSq = beta * beta;
230 double answer = (double)(((betaSq + 1) * precision * recall ) /
231 (double)(betaSq * precision + recall));
232 return answer;
233 }
234
235 public double getFMeasureAverage(double beta) {
236 double answer = (double)(((double)((double) getFMeasureLenient(beta) + getFMeasureStrict(beta)) / (double)(2.0)));
237 return answer;
238 }
239
240 public int getCorrectMatches(){
241 return correctMatches;
242 }
243
244 public int getPartiallyCorrectMatches(){
245 return partiallyCorrectMatches;
246 }
247
248 public int getMissing(){
249 return missing;
250 }
251
252 public int getSpurious(){
253 return spurious;
254 }
255
256 public int getFalsePositivesStrict(){
257 return responseList.size() - correctMatches;
258 }
259
260 public int getFalsePositivesLenient(){
261 return responseList.size() - correctMatches - partiallyCorrectMatches;
262 }
263
264
265 public int getKeysCount() {
266 return keyList.size();
267 }
268
269 public int getResponsesCount() {
270 return responseList.size();
271 }
272
273 public void printMissmatches(){
274 Iterator iter = finalChoices.iterator();
276 while(iter.hasNext()){
277 PairingImpl aChoice = (PairingImpl)iter.next();
278 switch(aChoice.type){
279 case PARTIALLY_CORRECT:{
280 System.out.println("Missmatch (partially correct):");
281 System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString());
282 System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString());
283 break;
284 }
285 }
286 }
287
288 for(int i = 0; i < keyChoices.size(); i++){
290 List aList = (List)keyChoices.get(i);
291 if(aList == null || aList.isEmpty()){
292 System.out.println("Missed Key: " + keyList.get(i).toString());
293 }
294 }
295
296 for(int i = 0; i < responseChoices.size(); i++){
298 List aList = (List)responseChoices.get(i);
299 if(aList == null || aList.isEmpty()){
300 System.out.println("Spurious Response: " + responseList.get(i).toString());
301 }
302 }
303 }
304
305
306
307
312 void sanityCheck()throws Exception{
313 Iterator iter =keyChoices.iterator();
315 while(iter.hasNext()){
316 List choices = (List)iter.next();
317 if(choices != null){
318 if(choices.size() > 1){
319 throw new Exception("Multiple choices found!");
320 }else if(!choices.isEmpty()){
321 PairingImpl aChoice = (PairingImpl)choices.get(0);
323 List otherChoices = (List)responseChoices.get(aChoice.responseIndex);
325 if(otherChoices == null ||
326 otherChoices.size() != 1 ||
327 otherChoices.get(0) != aChoice){
328 throw new Exception("Reciprocity error!");
329 }
330 }
331 }
332 }
333
334 iter =responseChoices.iterator();
335 while(iter.hasNext()){
336 List choices = (List)iter.next();
337 if(choices != null){
338 if(choices.size() > 1){
339 throw new Exception("Multiple choices found!");
340 }else if(!choices.isEmpty()){
341 PairingImpl aChoice = (PairingImpl)choices.get(0);
343 List otherChoices = (List)keyChoices.get(aChoice.keyIndex);
345 if(otherChoices == null){
346 throw new Exception("Reciprocity error : null!");
347 }else if(otherChoices.size() != 1){
348 throw new Exception("Reciprocity error: not 1!");
349 }else if(otherChoices.get(0) != aChoice){
350 throw new Exception("Reciprocity error: different!");
351 }
352 }
353 }
354 }
355 }
356
363 protected void addPairing(PairingImpl pairing, int index, List listOfPairings){
364 List existingChoices = (List)listOfPairings.get(index);
365 if(existingChoices == null){
366 existingChoices = new ArrayList();
367 listOfPairings.set(index, existingChoices);
368 }
369 existingChoices.add(pairing);
370 }
371
372 public java.util.Set getSignificantFeaturesSet() {
373 return significantFeaturesSet;
374 }
375
376 public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) {
377 this.significantFeaturesSet = significantFeaturesSet;
378 }
379
380
384 public class PairingImpl implements Pairing{
385 PairingImpl(int keyIndex, int responseIndex, int type) {
386 this.keyIndex = keyIndex;
387 this.responseIndex = responseIndex;
388 this.type = type;
389 scoreCalculated = false;
390 }
391
392 public int getScore(){
393 if(scoreCalculated) return score;
394 else{
395 calculateScore();
396 return score;
397 }
398 }
399
400 public Annotation getKey(){
401 return keyIndex == -1 ? null : (Annotation)keyList.get(keyIndex);
402 }
403
404 public Annotation getResponse(){
405 return responseIndex == -1 ? null :
406 (Annotation)responseList.get(responseIndex);
407 }
408
409 public int getType(){
410 return type;
411 }
412
413
418 public void consume(){
419 possibleChoices.remove(this);
420 List sameKeyChoices = (List)keyChoices.get(keyIndex);
421 sameKeyChoices.remove(this);
422 possibleChoices.removeAll(sameKeyChoices);
423
424 List sameResponseChoices = (List)responseChoices.get(responseIndex);
425 sameResponseChoices.remove(this);
426 possibleChoices.removeAll(sameResponseChoices);
427
428 Iterator iter = new ArrayList(sameKeyChoices).iterator();
429 while(iter.hasNext()){
430 ((PairingImpl)iter.next()).remove();
431 }
432 iter = new ArrayList(sameResponseChoices).iterator();
433 while(iter.hasNext()){
434 ((PairingImpl)iter.next()).remove();
435 }
436 sameKeyChoices.add(this);
437 sameResponseChoices.add(this);
438 }
439
440
443 protected void remove(){
444 List fromKey = (List)keyChoices.get(keyIndex);
445 fromKey.remove(this);
446 List fromResponse = (List)responseChoices.get(responseIndex);
447 fromResponse.remove(this);
448 }
449
450
454 void calculateScore(){
455 Set conflictSet = new HashSet();
457 conflictSet.addAll((List)responseChoices.get(responseIndex));
459 conflictSet.addAll((List)keyChoices.get(keyIndex));
461 conflictSet.remove(this);
463 score = type;
464 Iterator conflictIter = conflictSet.iterator();
465 while(conflictIter.hasNext()) score -= ((PairingImpl)conflictIter.next()).type;
466 scoreCalculated = true;
467 }
468
469 int keyIndex;
470 int responseIndex;
471 int type;
472 int score;
473 boolean scoreCalculated;
474 }
475
476 protected static class PairingScoreComparator implements Comparator{
477
485
486 public int compare(Object o1, Object o2){
487 PairingImpl first = (PairingImpl)o1;
488 PairingImpl second = (PairingImpl)o2;
489 int res = first.getScore() - second.getScore();
491 if(res == 0) res = first.getType() - second.getType();
493 if(res == 0){
496 res = (first.getKey() == null ? 0 : 1) +
497 (first.getResponse() == null ? 0 : 1) +
498 (second.getKey() == null ? 0 : -1) +
499 (second.getResponse() == null ? 0 : -1);
500 }
501 return res;
502 }
503 }
504
505
506 public static class PairingOffsetComparator implements Comparator{
507
511 public int compare(Object o1, Object o2){
512 Pairing first = (Pairing)o1;
513 Pairing second = (Pairing)o2;
514 Annotation key1 = first.getKey();
515 Annotation key2 = second.getKey();
516 Annotation res1 = first.getResponse();
517 Annotation res2 = second.getResponse();
518 Long start1 = key1 == null ? null : key1.getStartNode().getOffset();
519 if(start1 == null) start1 = res1.getStartNode().getOffset();
520 Long start2 = key2 == null ? null : key2.getStartNode().getOffset();
521 if(start2 == null) start2 = res2.getStartNode().getOffset();
522 int res = start1.compareTo(start2);
523 if(res == 0){
524 res = second.getType() - first.getType();
526 }
527
528 return res;
564 }
565
566 }
567
568
573 public Set getAnnotationsOfType(int type) {
574 switch(type) {
575 case CORRECT_TYPE:
576 return (correctAnnotations == null)? new HashSet() : correctAnnotations;
577 case PARTIALLY_CORRECT_TYPE:
578 return (partiallyCorrectAnnotations == null) ? new HashSet() : partiallyCorrectAnnotations;
579 case SPURIOUS_TYPE:
580 return (spuriousAnnotations == null) ? new HashSet() : spuriousAnnotations;
581 case MISSING_TYPE:
582 return (missingAnnotations == null) ? new HashSet() : missingAnnotations;
583 default:
584 return new HashSet();
585 }
586 }
587
588
589 public HashSet correctAnnotations, partiallyCorrectAnnotations, missingAnnotations, spuriousAnnotations;
590
591
592
593 public static final int CORRECT_TYPE = 1;
594
596 public static final int PARTIALLY_CORRECT_TYPE = 2;
597
599 public static final int SPURIOUS_TYPE = 3;
600
602 public static final int MISSING_TYPE = 4;
603
604
605 public static final int CORRECT = 2;
606 public static final int PARTIALLY_CORRECT = 1;
607 public static final int WRONG = 0;
608
609 private java.util.Set significantFeaturesSet;
610
611 protected int correctMatches;
612 protected int partiallyCorrectMatches;
613 protected int missing;
614 protected int spurious;
615
616
619 protected List keyList;
620
621
624 protected List responseList;
625
626
629 protected List keyChoices;
630
631
634 protected List responseChoices;
635
636
639 protected List possibleChoices;
640
641
644 protected List finalChoices;
645
646 }