1
15
16 package gate.creole.dumpingPR;
17
18 import java.io.*;
19 import java.net.URL;
20 import java.util.*;
21
22 import gate.*;
23 import gate.corpora.DocumentImpl;
24 import gate.creole.*;
25 import gate.util.*;
26
27
35 public class DumpingPR extends AbstractLanguageAnalyser
36 implements ProcessingResource {
37
38 public static final String
39 DPR_DOCUMENT_PARAMETER_NAME = "document";
40
41 public static final String
42 DPR_ANN_SET_PARAMETER_NAME = "annotationSetName";
43
44 public static final String
45 DPR_ANN_TYPES_PARAMETER_NAME = "annotationTypes";
46
47 public static final String
48 DPR_DUMP_TYPES_PARAMETER_NAME = "dumpTypes";
49
50 public static final String
51 DPR_OUTPUR_URL_PARAMETER_NAME = "outputFileUrl";
52
53 public static final String
54 DPR_INCLUDE_FEAT_PARAMETER_NAME = "includeFeatures";
55
56 public static final String
57 DPR_USE_SUFFIX_PARAMETER_NAME = "useSuffixForDumpFiles";
58
59 public static final String
60 DPR_FILE_SUFFIX_PARAMETER_NAME = "suffixForDumpFiles";
61
62 private static final boolean DEBUG = true;
63
64
67 protected List annotationTypes;
68
69
76 protected List dumpTypes;
77
78
81 protected String annotationSetName;
82
83
86 protected boolean includeFeatures = false;
87
88
91 protected boolean useStandOffXML = false;
92
93
97 protected String suffixForDumpFiles = ".gate";
98
99
103 protected boolean useSuffixForDumpFiles = true;
104
105 protected java.net.URL outputFileUrl;
106
107 private static final String DUMPING_PR_SET = "DumpingPRTempSet";
108
109
110 public Resource init() throws ResourceInstantiationException
111 {
112 return super.init();
113 }
115
123 public void reInit() throws ResourceInstantiationException
124 {
125 init();
126 }
128
129 public void execute() throws ExecutionException {
130
131 if(document == null)
132 throw new GateRuntimeException("No document to process!");
133
134 if (this.useStandOffXML) {
137 write2File();
138 return;
139 }
140
141 AnnotationSet allAnnots;
142 if ((annotationSetName == null)|| (annotationSetName.equals("")))
144 allAnnots = document.getAnnotations();
145 else
146 allAnnots = document.getAnnotations(annotationSetName);
147
148 if ((allAnnots == null) || allAnnots.isEmpty()) {
150 Out.prln("DumpingPR Warning: No annotations found for export. "
151 + "Including only those from the Original markups set.");
152 write2File(null);
153 return;
154 }
155
156 if (this.useStandOffXML) {
159 write2File();
160 return;
161 }
162
163 Set types2Export = new HashSet(annotationTypes);
166
167 AnnotationSet annots2Export = allAnnots.get(types2Export);
169
170 if (dumpTypes != null && !dumpTypes.isEmpty()) {
173 HashMap renameMap = new HashMap();
174 for(int i=0; i<dumpTypes.size() && i<annotationTypes.size(); i++) {
175 renameMap.put(annotationTypes.get(i), dumpTypes.get(i));
178 } if(!renameMap.isEmpty() && annots2Export != null)
181 annots2Export = renameAnnotations(annots2Export, renameMap);
182 }
184 write2File(annots2Export);
185 document.removeAnnotationSet(DumpingPR.DUMPING_PR_SET);
186
187 }
189 protected void write2File(AnnotationSet exportSet) {
190 File outputFile;
191
192 URL sourceURL = document.getSourceUrl();
195 StringBuffer tempBuff = new StringBuffer(sourceURL.getFile());
196 if (useSuffixForDumpFiles)
198 tempBuff.append(this.suffixForDumpFiles);
199 String outputPath = tempBuff.toString();
200 if (DEBUG)
201 Out.prln(outputPath);
202 outputFile = new File(outputPath);
203
204 try {
205 OutputStreamWriter writer;
207 if (document instanceof DocumentImpl) {
208 String encoding = ((DocumentImpl) document).getEncoding();
209 if (encoding == null || "".equals(encoding))
210 writer = new OutputStreamWriter(new FileOutputStream(outputFile));
211 else
212 writer = new OutputStreamWriter(
213 new FileOutputStream(outputFile), encoding);
214 } else
215 writer = new OutputStreamWriter(
216 new FileOutputStream(outputFile));
217
218 writer.write(document.toXml(exportSet, includeFeatures));
222 writer.flush();
223 writer.close();
224 } catch (IOException ex) {
225 throw new GateRuntimeException("Dumping PR: Error writing document "
226 + document.getName() + ": "
227 + ex.getMessage());
228 }
229
230
231 }
233 protected void write2File() {
234 File outputFile;
235
236 URL sourceURL = document.getSourceUrl();
237 StringBuffer tempBuff = new StringBuffer(sourceURL.getFile());
238 if (useSuffixForDumpFiles)
240 tempBuff.append(this.suffixForDumpFiles);
241 String outputPath = tempBuff.toString();
242 if (DEBUG)
243 Out.prln(outputPath);
244 outputFile = new File(outputPath);
245
246 try {
247 OutputStreamWriter writer;
249 if (document instanceof DocumentImpl) {
250 String encoding = ((DocumentImpl) document).getEncoding();
251 if (encoding == null || "".equals(encoding))
252 writer = new OutputStreamWriter(new FileOutputStream(outputFile));
253 else
254 writer = new OutputStreamWriter(
255 new FileOutputStream(outputFile), encoding);
256 } else
257 writer = new OutputStreamWriter(
258 new FileOutputStream(outputFile));
259
260 writer.write(document.toXml());
264 writer.flush();
265 writer.close();
266 } catch (IOException ex) {
267 throw new GateRuntimeException("Dumping PR: Error writing document "
268 + document.getName() + ": "
269 + ex.getMessage());
270 }
271
272
273 }
275
276 protected AnnotationSet renameAnnotations(AnnotationSet annots2Export,
277 HashMap renameMap){
278 Iterator iter = annots2Export.iterator();
279 AnnotationSet as = document.getAnnotations(DUMPING_PR_SET);
280 if (!as.isEmpty())
281 as.clear();
282 while(iter.hasNext()) {
283 Annotation annot = (Annotation) iter.next();
284 if (!renameMap.containsKey(annot.getType()))
287 renameMap.put(annot.getType(), annot.getType());
288 try{
289 as.add(annot.getId(),
290 annot.getStartNode().getOffset(),
291 annot.getEndNode().getOffset(),
292 (String) renameMap.get(annot.getType()),
293 annot.getFeatures());
294 } catch (InvalidOffsetException ex) {
295 throw new GateRuntimeException("DumpingPR: " + ex.getMessage());
296 }
297 } return as;
299 }
301
302
303 public String getAnnotationSetName() {
304 return annotationSetName;
305 }
307
308 public void setAnnotationSetName(String newAnnotationSetName) {
309 annotationSetName = newAnnotationSetName;
310 }
312 public List getAnnotationTypes() {
313 return this.annotationTypes;
314 }
315
316 public void setAnnotationTypes(List newTypes) {
317 annotationTypes = newTypes;
318 }
319
320 public List getDumpTypes() {
321 return this.dumpTypes;
322 }
323
324 public void setDumpTypes(List newTypes) {
325 dumpTypes = newTypes;
326 }
327
328 public URL getOutputFileUrl() {
329 return this.outputFileUrl;
330 }
331
332 public void setOutputFileUrl(URL file) {
333 outputFileUrl = file;
334 }
335
336 public void setIncludeFeatures(Boolean inclFeatures) {
337 if (inclFeatures != null)
338 includeFeatures = inclFeatures.booleanValue();
339 }
340
341 public Boolean getIncludeFeatures() {
342 return new Boolean(includeFeatures);
343 }
344
345 public void setUseStandOffXML(Boolean newValue) {
346 if (newValue != null)
347 useStandOffXML = newValue.booleanValue();
348 }
349
350 public Boolean getUseStandOffXML() {
351 return new Boolean(useStandOffXML);
352 }
353
354 public String getSuffixForDumpFiles() {
355 return suffixForDumpFiles;
356 }
357
358 public void setSuffixForDumpFiles(String newSuffix) {
359 this.suffixForDumpFiles = newSuffix;
360 }
361
362 public Boolean getUseSuffixForDumpFiles() {
363 return new Boolean(this.useSuffixForDumpFiles);
364 }
365
366 public void setUseSuffixForDumpFiles(Boolean useOrNot) {
367 if (useOrNot != null)
368 this.useSuffixForDumpFiles = useOrNot.booleanValue();
369 }
370
371 }