|
RDFFormatExporter |
|
1 2 /* 3 * RDFFormatExporter.java 4 * 5 * Copyright (c) 1998-2002, The University of Sheffield. 6 * 7 * This file is part of GATE (see http://gate.ac.uk/), and is free 8 * software, licenced under the GNU Library General Public License, 9 * Version 2, June 1991 (in the distribution as file licence.html, 10 * and also available at http://gate.ac.uk/gate/licence.html). 11 * 12 * Marin Dimitrov, 07/May/2002 13 * 14 * $Id: RDFFormatExporter.java,v 1.10 2002/05/22 11:27:43 marin Exp $ 15 */ 16 17 package gate.creole; 18 19 import java.util.*; 20 import java.net.*; 21 import java.io.*; 22 23 import junit.framework.*; 24 import com.hp.hpl.jena.daml.*; 25 import com.hp.hpl.jena.daml.common.*; 26 import com.hp.hpl.mesa.rdf.jena.model.*; 27 import com.hp.hpl.mesa.rdf.jena.common.*; 28 import com.hp.hpl.jena.vocabulary.*; 29 import com.hp.hpl.mesa.rdf.jena.common.prettywriter.*; 30 import com.hp.hpl.mesa.rdf.jena.vocabulary.*; 31 import com.hp.hpl.jena.rdf.arp.*; 32 33 import gate.*; 34 35 36 public class RDFFormatExporter extends AbstractLanguageAnalyser { 37 38 private static final int DAML_EXPORT = 0; 39 private static final int RDF_EXPORT = 1; 40 41 private static final String[] EXPORT_FORMATS = {"DAML+OIL","RDF"}; 42 private static final String[] EXPORT_EXTS = {"daml","rdf"}; 43 44 private static final String ONTOGAZ_CLASS_FEATURE = "class"; 45 private static final String ONTOGAZ_ONTOLOGY_FEATURE = "ontology"; 46 47 /** Debug flag */ 48 private static final boolean DEBUG = false; 49 50 private int exportFormat; 51 52 /** This list of strings represents the entities type that will be exported*/ 53 private List exportedTypes = null; 54 55 private URL exportFilePath = null; 56 57 private URL ontologyLocation = null; 58 59 public RDFFormatExporter() { 60 } 61 62 /** Java bean style mutator for exportedTypes */ 63 public void setExportedTypes(List anExportedTypesList){ 64 exportedTypes = anExportedTypesList; 65 }// setExportedTypes(); 66 67 68 /** Java bean style accesor for exportedTypes */ 69 public List getExportedTypes(){ 70 return exportedTypes; 71 }// getExportedTypes() 72 73 /** Java bean style mutator for exportedTypes */ 74 public void setExportFormat(String format){ 75 76 Assert.assertTrue(format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT]) || 77 format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT])); 78 79 if (format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT])) { 80 this.exportFormat = DAML_EXPORT; 81 } 82 else if (format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT])) { 83 this.exportFormat = RDF_EXPORT; 84 } 85 else { 86 Assert.fail(); 87 } 88 89 }// setExportedTypes(); 90 91 /** Java bean style mutator for exportedTypes */ 92 public String getExportFormat() { 93 return EXPORT_FORMATS[this.exportFormat]; 94 }// setExportedTypes(); 95 96 /** Java bean style mutator for exportFilePath */ 97 public void setExportFilePath(URL anExportFilePath){ 98 exportFilePath = anExportFilePath; 99 }// setExportFilePath(); 100 101 /** Java bean style accesor for exportFilePath */ 102 public URL getExportFilePath(){ 103 return exportFilePath; 104 }// getDtdFileName() 105 106 /** Java bean style mutator for exportFilePath */ 107 public void setOntology(URL _ontologyLocation){ 108 ontologyLocation = _ontologyLocation; 109 }// setExportFilePath(); 110 111 /** Java bean style accesor for exportFilePath */ 112 public URL getOntology(){ 113 return ontologyLocation; 114 }// getDtdFileName() 115 116 /** Initialise this resource, and returns it. */ 117 public gate.Resource init() throws ResourceInstantiationException { 118 return this; 119 } // init() 120 121 /** Run the resource and does the entire export process*/ 122 public void execute() throws ExecutionException{ 123 124 // Check if the thing can be run 125 if(document == null) { 126 throw new ExecutionException("No document found to export in APF format!"); 127 } 128 129 if (exportedTypes == null) { 130 throw new ExecutionException("No export types found."); 131 } 132 133 if (exportedTypes == null) { 134 throw new ExecutionException("No export types found."); 135 } 136 137 // StringBuffer rdfDoc = new StringBuffer(10*(document.getContent().size().intValue())); 138 139 String exportFilePathStr = null; 140 141 if (exportFilePath == null) { 142 exportFilePathStr = new String(document.getSourceUrl().getFile() + "." + 143 EXPORT_EXTS[this.exportFormat]); 144 } 145 else { 146 exportFilePathStr = new String(exportFilePath.getPath()+ 147 "/" + 148 document.getName() + "." + 149 EXPORT_EXTS[this.exportFormat]); 150 } 151 //System.out.println("export path:" +exportFilePathStr); 152 // Prepare to write into the xmlFile 153 FileWriter writer = null; 154 try{ 155 writer = new FileWriter(exportFilePathStr,false); 156 annotations2ontology(writer); 157 writer.flush(); 158 writer.close(); 159 }catch (Exception e){ 160 throw new ExecutionException(e); 161 }// End try 162 163 } // execute() 164 165 private void annotations2ontology(Writer output) throws Exception { 166 167 DAMLModel ontologyModel, instanceModel; 168 HashMap ontologies = new HashMap(); 169 HashMap instanceMatches = new HashMap(); 170 HashSet instanceNames = new HashSet(); 171 172 ontologyModel = new DAMLModelImpl(); 173 instanceModel = new DAMLModelImpl(); 174 175 Assert.assertNotNull(ontologyModel); 176 Assert.assertNotNull(instanceModel); 177 178 //final settings of the model 179 DAMLOntology onto = instanceModel.createDAMLOntology(""); 180 onto.prop_comment().addValue("autogenerated from GATE RDFFormatExporter"); 181 onto.prop_versionInfo().addValue("1.0"); 182 183 Assert.assertNotNull(this.ontologyLocation); 184 ontologyModel.read(this.ontologyLocation.toString()); 185 186 //get a mapping: class name to DAML class 187 HashMap ontologyMap = ontology2hashmap(ontologyModel); 188 Assert.assertNotNull(ontologyMap); 189 190 //add the mapping to the ontologies hashmap 191 //key is ontology URL as generated by the OntoGaz 192 ontologies.put(this.ontologyLocation.toString(),ontologyMap); 193 194 if (null == ontologyModel) { 195 throw new ExecutionException("cannot read ontology"); 196 } 197 198 HashMap defaultClasses = new HashMap((int)ontologyModel.size()/5); 199 Iterator itClasses = ontologyModel.listDAMLClasses(); 200 while (itClasses.hasNext()) { 201 DAMLClass cls = (DAMLClass)itClasses.next(); 202 String className = cls.getLocalName(); 203 defaultClasses.put(className.toLowerCase(),cls); 204 } 205 206 Iterator itTypes = this.exportedTypes.iterator(); 207 while (itTypes.hasNext()) { 208 209 String type = (String)itTypes.next(); 210 AnnotationSet as = this.document.getAnnotations().get(type); 211 212 if (null == as || true == as.isEmpty()) { 213 continue; 214 } 215 216 Iterator itAnnotations = as.iterator(); 217 while (itAnnotations.hasNext()) { 218 219 Annotation ann = (Annotation)itAnnotations.next(); 220 Assert.assertTrue(ann.getType().equals(type)); 221 222 FeatureMap features = ann.getFeatures(); 223 String annClass = (String)features.get(ONTOGAZ_CLASS_FEATURE); 224 String annOntology = (String)features.get(ONTOGAZ_ONTOLOGY_FEATURE); 225 DAMLClass damlClass = null; 226 227 if (null == annClass) { 228 //no ontological info 229 //try to get proper class from the default ontology 230 if (defaultClasses.containsKey(ann.getType().toLowerCase())) { 231 //bingo 232 //we have a class with the name of the annotation's type 233 damlClass = (DAMLClass)defaultClasses.get(ann.getType().toLowerCase()); 234 Assert.assertNotNull(damlClass); 235 } 236 else { 237 continue; 238 } 239 } 240 else { 241 //ontological info available 242 //is this a new ontology? 243 if (false == ontologies.containsKey(annOntology)) { 244 //oops, new ontology: 245 //1. create model for it 246 //2. create class name 2 daml class mapping 247 //3. add it to hashmap 248 249 //1. 250 DAMLModel model = new DAMLModelImpl(); 251 model.read(annOntology); 252 253 //2. 254 //create mapping between class names and DAML classes 255 HashMap name2class = ontology2hashmap(model); 256 Assert.assertNotNull(name2class); 257 258 //3. 259 ontologies.put(annOntology,model); 260 } 261 262 //get the class of the annotation 263 damlClass = (DAMLClass)((HashMap)ontologies.get(annOntology)).get(annClass); 264 Assert.assertNotNull(damlClass); 265 } 266 267 String instanceName = this.document.getContent().getContent( 268 ann.getStartNode().getOffset(), 269 ann.getEndNode().getOffset()) 270 .toString(); 271 Assert.assertNotNull(instanceName); 272 273 //create instance of proper type only if new 274 if (instanceNames.contains(instanceName)) { 275 continue; 276 } 277 278 DAMLInstance annInstance = instanceModel.createDAMLInstance(damlClass,instanceName); 279 instanceNames.add(instanceName); 280 281 //check orhtographic matches 282 List matches = (List)ann.getFeatures().get("matches"); 283 if (null != matches) { 284 //try to get equiv instance 285 if (instanceMatches.containsKey(matches)) { 286 DAMLInstance equivInstance = (DAMLInstance)instanceMatches.get(matches); 287 288 //make sure we don't have duplicated name 289 annInstance.prop_sameIndividualAs().add(equivInstance); 290 } 291 else { 292 //first entry of the coref chain 293 instanceMatches.put(matches,annInstance); 294 } 295 } 296 297 298 }//while 299 }//while 300 301 //print the model into file 302 RDFWriter rdfWriter = new RDFWriterFImpl().getWriter("RDF/XML-ABBREV"); 303 rdfWriter.setNsPrefix("gate",this.ontologyLocation.toString()+"#"); 304 rdfWriter.write(instanceModel,output,null); 305 } 306 307 private HashMap ontology2hashmap(DAMLModel ontology) throws Exception { 308 309 HashMap result = null; 310 311 //0. 312 Assert.assertNotNull(ontology); 313 314 315 result = new HashMap((int)ontology.size()/5); 316 317 //1.Iterate classes 318 Iterator itClasses = ontology.listDAMLClasses(); 319 while (itClasses.hasNext()) { 320 DAMLClass clazz = (DAMLClass)itClasses.next(); 321 Assert.assertNotNull(clazz.getLocalName()); 322 result.put(clazz.getLocalName(),clazz); 323 } 324 325 return result; 326 } 327 }
|
RDFFormatExporter |
|