1   /*
2    *  AnnotationDeletePR.java
3    *
4    *  Copyright (c) 1998-2004, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Kalina Bontcheva, 19/10/2001
12   *
13   *  $Id: AnnotationDeletePR.java,v 1.15 2004/10/07 14:08:39 niraj Exp $
14   */
15  
16  package gate.creole.annotdelete;
17  
18  import java.util.*;
19  
20  import gate.*;
21  import gate.creole.*;
22  import gate.util.GateRuntimeException;
23  
24  /**
25   * This class is the implementation of a processing resource which
26   * deletes all annotations and sets other than 'original markups'.
27   * If put at the start of an application, it'll ensure that the
28   * document is restored to its clean state before being processed.
29   */
30  public class AnnotationDeletePR extends AbstractLanguageAnalyser
31    implements ProcessingResource {
32  
33    public static final String
34      TRANSD_DOCUMENT_PARAMETER_NAME = "document";
35  
36    public static final String
37      TRANSD_ANNOT_TYPES_PARAMETER_NAME = "annotationTypes";
38  
39    public static final String
40      TRANSD_SETS_KEEP_PARAMETER_NAME = "setsToKeep";
41  
42    protected String markupSetName = GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME;
43    protected List annotationTypes;
44    protected List setsToKeep;
45  
46    /** Initialise this resource, and return it. */
47    public Resource init() throws ResourceInstantiationException
48    {
49      return super.init();
50    } // init()
51  
52    /**
53    * Reinitialises the processing resource. After calling this method the
54    * resource should be in the state it is after calling init.
55    * If the resource depends on external resources (such as rules files) then
56    * the resource will re-read those resources. If the data used to create
57    * the resource has changed since the resource has been created then the
58    * resource will change too after calling reInit().
59    */
60    public void reInit() throws ResourceInstantiationException
61    {
62      init();
63    } // reInit()
64  
65    /** Run the resource. */
66    public void execute() throws ExecutionException {
67  
68      if(document == null)
69        throw new GateRuntimeException("No document to process!");
70  
71      /* Niraj */
72      Map matchesMap = null;
73      Object matchesMapObject = document.getFeatures().get(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME);
74      if(matchesMapObject instanceof Map) {
75        // no need to do anything
76        // and return
77        matchesMap = (Map) matchesMapObject;
78      }
79      /* End */
80  
81      //first clear the default set, which cannot be removed
82      if (annotationTypes == null || annotationTypes.isEmpty()) {
83        document.getAnnotations().clear();
84        /* Niraj */
85        removeFromDocumentCorefData( (String)null, matchesMap);
86        /* End */
87      } else {
88        removeSubSet(document.getAnnotations(), /* Niraj */ matchesMap /* End */);
89      }
90  
91      //get the names of all sets
92      Map namedSets = document.getNamedAnnotationSets();
93      //nothing left to do if there are no named sets
94      if (namedSets == null || namedSets.isEmpty())
95        return;
96  
97      //loop through the sets and delete them all unless they're original markups
98      List setNames = new ArrayList(namedSets.keySet());
99      Iterator iter = setNames.iterator();
100     String setName;
101 
102     while (iter.hasNext()) {
103       setName = (String) iter.next();
104       //check first whether this is the original markups or one of the sets
105       //that we want to keep
106       if (setName != null && !setName.equals(markupSetName) ) {
107         // skip named sets from setsToKeep
108         if(setsToKeep != null && setsToKeep.contains(setName)) continue;
109 
110         if (annotationTypes == null || annotationTypes.isEmpty()) {
111           document.removeAnnotationSet(setName);
112           /* Niraj */
113           removeFromDocumentCorefData( (String) setName, matchesMap);
114           /* End */
115         } else {
116           removeSubSet(document.getAnnotations(setName), /* Niraj */ matchesMap /* End */);
117         }
118       }//if
119     }
120 
121     /* Niraj */
122     // and finally we add it to the document
123     if(matchesMap != null) {
124       document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME,
125                                  matchesMap);
126     }
127     /* End */
128 
129   } // execute()
130 
131   /* Niraj */
132   // method to undate the Document-Coref-data
133   private void removeFromDocumentCorefData(String currentSet, Map matchesMap) {
134     if(matchesMap == null)
135       return;
136 
137     // if this is defaultAnnotationSet, we cannot remove this
138     if(currentSet == null) {
139       java.util.List matches = (java.util.List) matchesMap.get(currentSet);
140       if (matches == null || matches.size() == 0) {
141         // do nothing
142         return;
143       }
144       else {
145         matchesMap.put(currentSet, new java.util.ArrayList());
146       }
147     } else {
148       // we remove this set from the Coref Data
149       matchesMap.remove(currentSet);
150     }
151   }
152 
153   // method to update the Document-Coref-data
154   private void removeAnnotationsFromCorefData(AnnotationSet annotations, String setName, Map matchesMap) {
155     java.util.List matches = (java.util.List) matchesMap.get(setName);
156     if(matches == null)
157       return;
158 
159     // each element in the matches is a group of annotation IDs
160     // so for each annotation we will have to traverse through all the lists and
161     // find out the annotation and remove it
162     ArrayList annots = new ArrayList(annotations);
163     for(int i=0;i<annots.size();i++) {
164       Annotation toRemove = (Annotation) annots.get(i);
165       Iterator idIters = matches.iterator();
166       ArrayList ids = new ArrayList();
167       while(idIters.hasNext()) {
168         ids = (ArrayList) idIters.next();
169         if(ids.remove(toRemove.getId())) {
170           // yes removed
171           break;
172         }
173       }
174       if(ids.size()==0) {
175         matches.remove(ids);
176       }
177     }
178     // and finally see if there is any group available
179     if(matches.size()==0) {
180       matchesMap.remove(setName);
181     }
182   }
183 
184   /* End */
185 
186   private void removeSubSet(AnnotationSet theSet, Map matchMap) {
187     AnnotationSet toRemove = theSet.get(new HashSet(annotationTypes));
188     if (toRemove == null || toRemove.isEmpty())
189       return;
190     theSet.removeAll(toRemove);
191     /* Niraj */
192     removeAnnotationsFromCorefData(toRemove, theSet.getName(), matchMap);
193     /* End */
194   }//removeSubSet
195 
196   public void setMarkupASName(String newMarkupASName) {
197     markupSetName = newMarkupASName;
198   }
199 
200   public String  getMarkupASName() {
201     return markupSetName;
202   }
203 
204   public List getAnnotationTypes() {
205     return this.annotationTypes;
206   }
207 
208   public void setAnnotationTypes(List newTypes) {
209     annotationTypes = newTypes;
210   }
211 
212   public List getSetsToKeep() {
213     return this.setsToKeep;
214   }
215 
216   public void setSetsToKeep(List newSetNames) {
217     setsToKeep = newSetNames;
218   }
219 
220 
221 } // class AnnotationSetTransfer
222