1   /*
2    *  AbstractCoreferencer.java
3    *
4    *  Copyright (c) 1998-2002, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  $Id: AbstractCoreferencer.java,v 1.1 2002/07/31 14:53:26 hamish Exp $
12   */
13  
14  package gate.creole.coref;
15  
16  import java.util.*;
17  
18  import junit.framework.*;
19  
20  import gate.*;
21  import gate.creole.*;
22  import gate.util.*;
23  
24  public abstract class AbstractCoreferencer extends AbstractLanguageAnalyser
25      implements ProcessingResource{
26  
27    public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
28  
29    public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
30  
31    public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";
32    public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";
33  
34    /** --- */
35    private static final boolean DEBUG = false;
36  
37    public String coreferenceType;
38  
39    /** --- */
40    public AbstractCoreferencer(String type) {
41      this.coreferenceType = type;
42    }
43  
44  
45    /** Initialise this resource, and return it. */
46    public Resource init() throws ResourceInstantiationException {
47  
48      Resource result = super.init();
49  
50      return result;
51    } // init()
52  
53  
54    /**
55     * Reinitialises the processing resource. After calling this method the
56     * resource should be in the state it is after calling init.
57     * If the resource depends on external resources (such as rules files) then
58     * the resource will re-read those resources. If the data used to create
59     * the resource has changed since the resource has been created then the
60     * resource will change too after calling reInit().
61    */
62    public void reInit() throws ResourceInstantiationException {
63      init();
64    } // reInit()
65  
66    /** Set the document to run on. */
67    public void setDocument(Document newDocument) {
68      super.setDocument(newDocument);
69    }
70  
71    /** --- */
72    public abstract void setAnnotationSetName(String annotationSetName);
73  
74    /** --- */
75    public abstract String getAnnotationSetName();
76  
77    /** --- */
78    protected void generateCorefChains(HashMap ana2ant)
79        throws GateRuntimeException{
80  
81      String asName = getAnnotationSetName();
82      AnnotationSet outputSet = null;
83  
84      if (null == asName || asName.equals("")) {
85        outputSet = getDocument().getAnnotations();
86      }
87      else {
88        outputSet = getDocument().getAnnotations(asName);
89      }
90  
91      //3. generate new annotations
92      Iterator it = ana2ant.entrySet().iterator();
93      while (it.hasNext()) {
94        Map.Entry currLink = (Map.Entry)it.next();
95        Annotation anaphor = (Annotation)currLink.getKey();
96        Annotation antecedent = (Annotation)currLink.getValue();
97  
98        if (DEBUG) {
99          AnnotationSet corefSet = getDocument().getAnnotations("COREF");
100         Long antOffset = new Long(0);
101 
102         if (null != antecedent) {
103           antOffset = antecedent.getStartNode().getOffset();
104         }
105 
106         FeatureMap features = new SimpleFeatureMapImpl();
107         features.put("antecedent",antOffset);
108         corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features);
109       }
110 
111       //do we have antecedent?
112       if (null == antecedent) {
113         continue;
114       }
115 
116       //get the ortho-matches of the antecedent
117       List matches = (List)antecedent.getFeatures().
118         get(ANNOTATION_COREF_FEATURE_NAME);
119       if (null == matches) {
120         matches = new ArrayList();
121         matches.add(antecedent.getId());
122         antecedent.getFeatures().
123           put(ANNOTATION_COREF_FEATURE_NAME,matches);
124         //check if the document has a list of matches
125         //if yes, simply add the new list to it
126         //if not, create it and add the list of matches to it
127         if (document.getFeatures().containsKey(
128             DOCUMENT_COREF_FEATURE_NAME)) {
129           Map matchesMap = (Map) document.getFeatures().get(
130                                 DOCUMENT_COREF_FEATURE_NAME);
131           List matchesList = (List) matchesMap.get(getAnnotationSetName());
132           if (matchesList == null) {
133             matchesList = new ArrayList();
134             matchesMap.put(getAnnotationSetName(), matchesList);
135           }
136           matchesList.add(matches);
137         } else {
138           Map matchesMap = new HashMap();
139             List matchesList = new ArrayList();
140             matchesMap.put(getAnnotationSetName(), matchesList);
141             matchesList.add(matches);
142         }//if else
143       }//if matches == null
144 
145       FeatureMap features = new SimpleFeatureMapImpl();
146       features.put(COREF_TYPE_FEATURE_NAME, coreferenceType);
147       features.put(ANNOTATION_COREF_FEATURE_NAME, matches);
148       features.put(COREF_ANTECEDENT_FEATURE_NAME,
149                    antecedent.getStartNode().getOffset());
150 
151       Integer annID = outputSet.add(anaphor.getStartNode(),
152                                     anaphor.getEndNode(),
153                                     antecedent.getType(),
154                                     features);
155       matches.add(annID);
156     }
157   }
158 
159 }
160