1   
2   /*
3    *  RDFFormatExporter.java
4    *
5    *  Copyright (c) 1998-2002, The University of Sheffield.
6    *
7    *  This file is part of GATE (see http://gate.ac.uk/), and is free
8    *  software, licenced under the GNU Library General Public License,
9    *  Version 2, June 1991 (in the distribution as file licence.html,
10   *  and also available at http://gate.ac.uk/gate/licence.html).
11   *
12   *  Marin Dimitrov, 07/May/2002
13   *
14   *  $Id: RDFFormatExporter.java,v 1.10 2002/05/22 11:27:43 marin Exp $
15   */
16  
17  package gate.creole;
18  
19  import java.util.*;
20  import java.net.*;
21  import java.io.*;
22  
23  import junit.framework.*;
24  import com.hp.hpl.jena.daml.*;
25  import com.hp.hpl.jena.daml.common.*;
26  import com.hp.hpl.mesa.rdf.jena.model.*;
27  import com.hp.hpl.mesa.rdf.jena.common.*;
28  import com.hp.hpl.jena.vocabulary.*;
29  import com.hp.hpl.mesa.rdf.jena.common.prettywriter.*;
30  import com.hp.hpl.mesa.rdf.jena.vocabulary.*;
31  import com.hp.hpl.jena.rdf.arp.*;
32  
33  import gate.*;
34  
35  
36  public class RDFFormatExporter extends AbstractLanguageAnalyser {
37  
38    private static final int DAML_EXPORT = 0;
39    private static final int RDF_EXPORT = 1;
40  
41    private static final String[] EXPORT_FORMATS = {"DAML+OIL","RDF"};
42    private static final String[] EXPORT_EXTS = {"daml","rdf"};
43  
44    private static final String ONTOGAZ_CLASS_FEATURE = "class";
45    private static final String ONTOGAZ_ONTOLOGY_FEATURE = "ontology";
46  
47    /** Debug flag */
48    private static final boolean DEBUG = false;
49  
50    private int exportFormat;
51  
52    /** This list of strings represents the entities type that will be exported*/
53    private List exportedTypes = null;
54  
55    private URL exportFilePath = null;
56  
57    private URL ontologyLocation = null;
58  
59    public RDFFormatExporter() {
60    }
61  
62    /** Java bean style mutator for exportedTypes */
63    public void setExportedTypes(List anExportedTypesList){
64      exportedTypes = anExportedTypesList;
65    }// setExportedTypes();
66  
67  
68    /** Java bean style accesor for exportedTypes */
69    public List getExportedTypes(){
70      return exportedTypes;
71    }// getExportedTypes()
72  
73    /** Java bean style mutator for exportedTypes */
74    public void setExportFormat(String format){
75  
76      Assert.assertTrue(format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT]) ||
77                        format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT]));
78  
79      if (format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT])) {
80        this.exportFormat = DAML_EXPORT;
81      }
82      else if (format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT])) {
83        this.exportFormat = RDF_EXPORT;
84      }
85      else {
86        Assert.fail();
87      }
88  
89    }// setExportedTypes();
90  
91    /** Java bean style mutator for exportedTypes */
92    public String getExportFormat() {
93      return EXPORT_FORMATS[this.exportFormat];
94    }// setExportedTypes();
95  
96    /** Java bean style mutator for exportFilePath */
97    public void setExportFilePath(URL anExportFilePath){
98      exportFilePath = anExportFilePath;
99    }// setExportFilePath();
100 
101   /** Java bean style accesor for exportFilePath */
102   public URL getExportFilePath(){
103     return exportFilePath;
104   }// getDtdFileName()
105 
106   /** Java bean style mutator for exportFilePath */
107   public void setOntology(URL _ontologyLocation){
108     ontologyLocation = _ontologyLocation;
109   }// setExportFilePath();
110 
111   /** Java bean style accesor for exportFilePath */
112   public URL getOntology(){
113     return ontologyLocation;
114   }// getDtdFileName()
115 
116   /** Initialise this resource, and returns it. */
117   public gate.Resource init() throws ResourceInstantiationException {
118     return this;
119   } // init()
120 
121   /** Run the resource and does the entire export process*/
122   public void execute() throws ExecutionException{
123 
124     // Check if the thing can be run
125     if(document == null) {
126       throw new ExecutionException("No document found to export in APF format!");
127     }
128 
129     if (exportedTypes == null) {
130       throw new ExecutionException("No export types found.");
131     }
132 
133     if (exportedTypes == null) {
134       throw new ExecutionException("No export types found.");
135     }
136 
137 //    StringBuffer rdfDoc = new StringBuffer(10*(document.getContent().size().intValue()));
138 
139     String exportFilePathStr = null;
140 
141     if (exportFilePath == null) {
142       exportFilePathStr = new String(document.getSourceUrl().getFile() + "." +
143                                     EXPORT_EXTS[this.exportFormat]);
144     }
145     else {
146       exportFilePathStr = new String(exportFilePath.getPath()+
147                                     "/" +
148                                     document.getName() + "." +
149                                     EXPORT_EXTS[this.exportFormat]);
150     }
151 //System.out.println("export path:" +exportFilePathStr);
152     // Prepare to write into the xmlFile
153     FileWriter  writer = null;
154     try{
155       writer = new FileWriter(exportFilePathStr,false);
156       annotations2ontology(writer);
157       writer.flush();
158       writer.close();
159     }catch (Exception e){
160       throw new ExecutionException(e);
161     }// End try
162 
163   } // execute()
164 
165   private void annotations2ontology(Writer output) throws Exception {
166 
167     DAMLModel ontologyModel, instanceModel;
168     HashMap ontologies = new HashMap();
169     HashMap instanceMatches = new HashMap();
170     HashSet instanceNames = new HashSet();
171 
172       ontologyModel = new DAMLModelImpl();
173       instanceModel = new DAMLModelImpl();
174 
175       Assert.assertNotNull(ontologyModel);
176       Assert.assertNotNull(instanceModel);
177 
178       //final settings of the model
179       DAMLOntology onto = instanceModel.createDAMLOntology("");
180       onto.prop_comment().addValue("autogenerated from GATE RDFFormatExporter");
181       onto.prop_versionInfo().addValue("1.0");
182 
183       Assert.assertNotNull(this.ontologyLocation);
184       ontologyModel.read(this.ontologyLocation.toString());
185 
186       //get a mapping: class name to DAML class
187       HashMap ontologyMap = ontology2hashmap(ontologyModel);
188       Assert.assertNotNull(ontologyMap);
189 
190       //add the mapping to the ontologies hashmap
191       //key is ontology URL as generated by the OntoGaz
192       ontologies.put(this.ontologyLocation.toString(),ontologyMap);
193 
194       if (null == ontologyModel) {
195         throw new ExecutionException("cannot read ontology");
196       }
197 
198       HashMap defaultClasses = new HashMap((int)ontologyModel.size()/5);
199       Iterator itClasses = ontologyModel.listDAMLClasses();
200       while (itClasses.hasNext()) {
201         DAMLClass cls = (DAMLClass)itClasses.next();
202         String className = cls.getLocalName();
203         defaultClasses.put(className.toLowerCase(),cls);
204       }
205 
206       Iterator itTypes = this.exportedTypes.iterator();
207       while (itTypes.hasNext()) {
208 
209         String type = (String)itTypes.next();
210         AnnotationSet as = this.document.getAnnotations().get(type);
211 
212         if (null == as || true == as.isEmpty()) {
213           continue;
214         }
215 
216         Iterator itAnnotations = as.iterator();
217         while (itAnnotations.hasNext()) {
218 
219           Annotation ann = (Annotation)itAnnotations.next();
220           Assert.assertTrue(ann.getType().equals(type));
221 
222           FeatureMap features = ann.getFeatures();
223           String annClass = (String)features.get(ONTOGAZ_CLASS_FEATURE);
224           String annOntology = (String)features.get(ONTOGAZ_ONTOLOGY_FEATURE);
225           DAMLClass damlClass = null;
226 
227           if (null == annClass) {
228             //no ontological info
229             //try to get proper class from the default ontology
230             if (defaultClasses.containsKey(ann.getType().toLowerCase())) {
231               //bingo
232               //we have a class with the name of the annotation's type
233               damlClass = (DAMLClass)defaultClasses.get(ann.getType().toLowerCase());
234               Assert.assertNotNull(damlClass);
235             }
236             else {
237               continue;
238             }
239           }
240           else {
241             //ontological info available
242             //is this a new ontology?
243             if (false == ontologies.containsKey(annOntology)) {
244               //oops, new ontology:
245               //1. create model for it
246               //2. create class name 2 daml class mapping
247               //3. add it to hashmap
248 
249               //1.
250               DAMLModel model = new DAMLModelImpl();
251               model.read(annOntology);
252 
253               //2.
254               //create mapping between class names and DAML classes
255               HashMap name2class = ontology2hashmap(model);
256               Assert.assertNotNull(name2class);
257 
258               //3.
259               ontologies.put(annOntology,model);
260             }
261 
262             //get the class of the annotation
263             damlClass = (DAMLClass)((HashMap)ontologies.get(annOntology)).get(annClass);
264             Assert.assertNotNull(damlClass);
265           }
266 
267           String instanceName = this.document.getContent().getContent(
268                                                                   ann.getStartNode().getOffset(),
269                                                                   ann.getEndNode().getOffset())
270                                 .toString();
271           Assert.assertNotNull(instanceName);
272 
273           //create instance of proper type only if new
274           if (instanceNames.contains(instanceName)) {
275             continue;
276           }
277 
278           DAMLInstance annInstance = instanceModel.createDAMLInstance(damlClass,instanceName);
279           instanceNames.add(instanceName);
280 
281           //check orhtographic matches
282           List matches = (List)ann.getFeatures().get("matches");
283           if (null != matches) {
284             //try to get equiv instance
285             if (instanceMatches.containsKey(matches)) {
286               DAMLInstance equivInstance = (DAMLInstance)instanceMatches.get(matches);
287 
288               //make sure we don't have duplicated name
289               annInstance.prop_sameIndividualAs().add(equivInstance);
290             }
291             else {
292               //first entry of the coref chain
293               instanceMatches.put(matches,annInstance);
294             }
295           }
296 
297 
298         }//while
299       }//while
300 
301       //print the model into file
302       RDFWriter rdfWriter = new RDFWriterFImpl().getWriter("RDF/XML-ABBREV");
303       rdfWriter.setNsPrefix("gate",this.ontologyLocation.toString()+"#");
304       rdfWriter.write(instanceModel,output,null);
305   }
306 
307   private HashMap ontology2hashmap(DAMLModel ontology) throws Exception {
308 
309     HashMap result = null;
310 
311     //0.
312     Assert.assertNotNull(ontology);
313 
314 
315     result = new HashMap((int)ontology.size()/5);
316 
317     //1.Iterate classes
318     Iterator itClasses = ontology.listDAMLClasses();
319     while (itClasses.hasNext()) {
320       DAMLClass clazz = (DAMLClass)itClasses.next();
321       Assert.assertNotNull(clazz.getLocalName());
322       result.put(clazz.getLocalName(),clazz);
323     }
324 
325     return result;
326   }
327 }