MatchRule5.java
01 package gate.creole.orthomatcher;
02 
03 import static gate.creole.ANNIEConstants.TOKEN_KIND_FEATURE_NAME;
04 import static gate.creole.ANNIEConstants.TOKEN_STRING_FEATURE_NAME;
05 import static gate.creole.orthomatcher.OrthoMatcher.PUNCTUATION_VALUE;
06 import static gate.creole.orthomatcher.OrthoMatcher.log;
07 import gate.Annotation;
08 
09 import java.util.Iterator;
10 /**
11  * RULE #4Name: Does all the non-punctuation tokens from the long string match the corresponding tokens 
12  * in the short string?  
13  * This basically identifies cases where the two strings match token for token, excluding punctuation
14  * Applied to: person annotations
15  *
16  * Modified by Andrew Borthwick, Spock Networks:  Allowed for nickname match
17  */
18 public class MatchRule5 implements OrthoMatcherRule {
19 
20   OrthoMatcher orthomatcher;
21   
22   public MatchRule5(OrthoMatcher orthmatcher){
23     this.orthomatcher=orthmatcher;
24   }
25   
26   @Override
27   public boolean value(String s1, String s2) {
28     
29     boolean allTokensMatch = true;
30 //      if (s1.equals("wilson")) {
31 //        log.debug("MR4 Name: Matching" + tokensLongAnnot + " with " + tokensShortAnnot);
32 //        log.debug("MR4 Name: Matching " + s1 + " with " + s2);
33 //      }  
34       if (orthomatcher.tokensLongAnnot.size() == || orthomatcher.tokensShortAnnot.size() == 0) {
35         log.debug("Rule 5 rejecting " + s1 + " and " + s2 + " because one doesn't have any tokens");
36         return false;
37       }
38       Iterator<Annotation> tokensLongAnnotIter = orthomatcher.tokensLongAnnot.iterator();
39       Iterator<Annotation> tokensShortAnnotIter = orthomatcher.tokensShortAnnot.iterator();
40       while (tokensLongAnnotIter.hasNext() && tokensShortAnnotIter.hasNext()) {
41         Annotation token = tokensLongAnnotIter.next();
42         if (((String)token.getFeatures().get(TOKEN_KIND_FEATURE_NAME)).equals(PUNCTUATION_VALUE))
43           continue;
44         if (! orthomatcher.getOrthography().fuzzyMatch((String)(tokensShortAnnotIter.next().
45                 getFeatures().get(TOKEN_STRING_FEATURE_NAME)),
46                 (Stringtoken.getFeatures().get(TOKEN_STRING_FEATURE_NAME))) {
47           allTokensMatch = false;
48           break;
49         }
50       }
51       if (allTokensMatch && log.isDebugEnabled()) {
52         log.debug("rule 5 matched " + s1 + "(id: " + orthomatcher.longAnnot.getId() ", offset: " + orthomatcher.longAnnot.getStartNode().getOffset() ") to " 
53                                       s2+  "(id: " + orthomatcher.shortAnnot.getId() ", offset: " + orthomatcher.shortAnnot.getStartNode().getOffset() ")");
54       }   
55       
56       if (allTokensMatchOrthoMatcherHelper.usedRule(5);
57       
58       return allTokensMatch;
59   }
60   
61   @Override
62   public String getId(){
63     return "MatchRule5";
64   }
65 }