Log in Help
Print
Homereleasesgate-8.4-build5748-ALLpluginsFormat_HTML5Microdatasrcgatecreolemicrodata 〉 MicrodataExporter.java
 
/*
 * MicrodataExporter.java
 * 
 * Copyright (c) 2011-2014, The University of Sheffield.
 * 
 * This file is part of GATE (see http://gate.ac.uk/), and is free software,
 * licenced under the GNU Library General Public License, Version 3, June 2007
 * (in the distribution as file licence.html, and also available at
 * http://gate.ac.uk/gate/licence.html).
 * 
 * Mark A. Greenwood, 11/06/2011
 */

package gate.creole.microdata;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.DocumentExporter;
import gate.FeatureMap;
import gate.GateConstants;
import gate.creole.metadata.AutoInstance;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;

import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.URL;
import java.util.HashSet;
import java.util.Set;

@SuppressWarnings("serial")
@CreoleResource(name = "HTML5 Microdata Exporter", comment = "Exports Annotations as HTML5 Microdata", tool = true, autoinstances = @AutoInstance, icon="HTML5")
public class MicrodataExporter extends DocumentExporter {

  private URL configURL;
  private String annotationSetName;

  public URL getConfigURL() {
    return configURL;
  }
  
  @RunTime
  @CreoleParameter(defaultValue = "resources/schema.org/ANNIE.xml")
  public void setConfigURL(URL configURL) {
    this.configURL = configURL;
  }
  
  public String getAnnotationSetName() {
    return annotationSetName;
  }
  
  @RunTime
  @Optional
  @CreoleParameter
  public void setAnnotationSetName(String annotationSetName) {
    this.annotationSetName = annotationSetName;
  }
  
  public MicrodataExporter() {
    super("HTML5 Microdata","html","text/html");
  }

  @Override
  public void export(Document document, OutputStream out, FeatureMap options)
    throws IOException {

    Microdata microdata = Microdata.load((URL)options.get("configURL"));
    AnnotationSet inputAS = document.getAnnotations((String)options.get("annotationSetName"));
    
    PrintStream pout = new PrintStream(out);
    
    boolean itempropAsMeta = true;

    // TODO list
    // 1. deal with co-reference (each itemscope can have multiple entries but
    // may need to use ids and itemref from the microdata format)

    AnnotationSet originalMarkups =
      document.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);

    AnnotationSet body = originalMarkups.get("html");
    if(body.size() != 1) return;
    body = originalMarkups.get("body");
    if(body.size() != 1) return;

    inputAS =
      inputAS.get(body.firstNode().getOffset(), body.lastNode().getOffset());

    Set<Integer> used = new HashSet<Integer>();
    Set<Integer> created = new HashSet<Integer>();

    try {
      for(ItemScope scope : microdata.getItemscopes()) {
        for(Annotation a : scope.getMatchingAnnotations(inputAS)) {
          if(!used.contains(a.getId())) {
            created.addAll(scope.addMicrodata(a, itempropAsMeta,
              originalMarkups));
            used.add(a.getId());
          }
        }
      }
      
      pout.println(document.toXml(null, false));
      
    } catch(Exception e) {
      throw new IOException(e);
    }
    finally {
      for(Integer id : created) {
        originalMarkups.remove(originalMarkups.get(id));
      }
    }
    
    pout.flush();
  }
}