Log in Help
Print
Homereleasesgate-8.4-build5748-ALLpluginsAlchemyAPIsrcgatealchemyAPI 〉 EntityExtraction.java
 
/*
 * Copyright (c) 2009-2013, The University of Sheffield.
 * 
 * This file is part of GATE (see http://gate.ac.uk/), and is free software,
 * Licensed under the GNU Library General Public License, Version 3, June 2007
 * (in the distribution as file licence.html, and also available at
 * http://gate.ac.uk/gate/licence.html).
 */

package gate.alchemyAPI;

import gate.creole.ExecutionException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.RunTime;

import java.util.ArrayList;
import java.util.List;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.alchemyapi.api.AlchemyAPI_NamedEntityParams;

/**
 * @author Niraj Aswani
 * @author Mark A. Greenwood
 */
@CreoleResource(name = "AlchemyAPI: Entity Extraction", comment = "Runs the AlchemyAPI Entity Extraction service on a GATE document", icon = "Alchemy")
public class EntityExtraction extends KeywordExtraction {

  private static final long serialVersionUID = -8443994795704361590L;

  /** debug */
  private boolean DEBUG = false;

  @RunTime
  @CreoleParameter(defaultValue = "Mention")
  public void setAnnotationType(String annotationType) {
    this.annotationType = annotationType;
  }

  /**
   * Using URLConnection to connect to alchemy API
   * 
   * @param text
   * @return
   * @throws ExecutionException
   */
  @Override
  protected List<TextSpan> process(String text) throws ExecutionException {
    
    if(DEBUG) System.out.println("About to process: " + text);

    // the list of results we will evenutally return
    List<TextSpan> toReturn = new ArrayList<TextSpan>();

    // the params to configure the service
    AlchemyAPI_NamedEntityParams params = new AlchemyAPI_NamedEntityParams();

    // get the XML result doc back from the service
    Document doc = null;
    try {
      doc = alchemy.TextGetRankedNamedEntities(text, params);
    } catch(Exception e) {
      e.printStackTrace();
      throw new ExecutionException(e);
    }

    // convert the XML into TextSpan instances
    NodeList entitiesList = doc.getElementsByTagName("entity");
    for(int i = 0; i < entitiesList.getLength(); i++) {
      Node n = entitiesList.item(i);
      NodeList children = n.getChildNodes();
      TextSpan r = new TextSpan();

      for(int j = 0; j < children.getLength(); j++) {
        Node cn = children.item(j);
        if(cn.getNodeName().equals("type")) {
          r.featureMap.put("type", cn.getTextContent());
        } else if(cn.getNodeName().equals("relevance")) {
          r.featureMap.put("relevance", cn.getTextContent());
        } else if(cn.getNodeName().equals("text")) {
          r.text = cn.getTextContent();
        } else if(cn.getNodeName().equals("disambiguated")) {
          NodeList disambChildren = cn.getChildNodes();
          for(int k = 0; k < disambChildren.getLength(); k++) {
            Node dcn = disambChildren.item(k);
            if(!dcn.getNodeName().equals("#text"))
              r.featureMap.put(dcn.getNodeName(), dcn.getTextContent());
          }
        }
      }
      
      if(DEBUG) System.out.println(r);
      
      toReturn.add(r);
    }
    return toReturn;
  }
}