GATE.ac.uk - releases/gate-8.4.1-build5753-ALL/plugins/UIMA/test/src/gate/uima/test/TestGATEInUIMA.java

/*
 *  Copyright (c) 2005, The University of Sheffield.
 *
 *  This file is part of the GATE/UIMA integration layer, and is free
 *  software, released under the terms of the GNU Lesser General Public
 *  Licence, version 2.1 (or any later version).  A copy of this licence
 *  is provided in the file LICENCE in the distribution.
 *
 *  UIMA is a product of IBM, details are available from
 *  http://alphaworks.ibm.com/tech/uima
 */
package gate.uima.test;

import junit.framework.*;

import java.io.File;

import org.apache.uima.UIMAFramework;
import org.apache.uima.util.XMLParser;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.CAS;

/**
 * Test case for GATE in UIMA (i.e. GATEApplicationAnnotator).
 */
public class TestGATEInUIMA extends TestCase {
  /**
   * Location of gate - passed in as a system property by the test runner.
   */
  private File gateHome;

  /**
   * Location of uima plugin directory - passed in as a system property by the
   * test runner.
   */
  private File uimaPlugin;

  /**
   * test/conf directory under uima plugin.
   */
  private File testConfDir;

  /**
   * The UIMA XML parser, used to parse TAE descriptors.
   */
  private XMLParser uimaXMLParser;

  /**
   * Set up the fixture.
   */
  protected void setUp() throws Exception {
    super.setUp();
    
    // get paths
    uimaPlugin = new File(System.getProperty("gate.uima.plugin.location"));
    gateHome = new File(System.getProperty("gate.home.location"));

    testConfDir = new File(new File(uimaPlugin, "test"), "conf");

    uimaXMLParser = UIMAFramework.getXMLParser();
  }

  /**
   * Clean up after ourselves.
   */
  protected void tearDown() throws Exception {
    super.tearDown();

  }

  public static Test suite() {
    return new TestSuite(TestGATEInUIMA.class);
  }


  public void testGatePOSTagger() throws Exception {
    // load the TAE containing UIMA tokeniser and GATE POS tagger
    File tokAndPOSTaggerDescriptorFile =
      new File(testConfDir, "TokenizerAndPOSTagger.xml");

    XMLInputSource inputSource =
      new XMLInputSource(tokAndPOSTaggerDescriptorFile);

    ResourceSpecifier tokAndPOSTaggerDescriptor =
      uimaXMLParser.parseResourceSpecifier(inputSource);

    AnalysisEngine tokAndPOSTagger =
      UIMAFramework.produceAnalysisEngine(tokAndPOSTaggerDescriptor);

    // create CAS and populate it with initial text.
    CAS cas = tokAndPOSTagger.newCAS();

    cas.setDocumentText(
        "This is a test document. This is the second sentence.");
    // what POS tags do we expect to get back?
    String[] expectedPOSTags = new String[] {
      "DT",    // This
      "VBZ",   // is
      "DT",    // a
      "NN",    // test
      "NN",    // document
      ".",     // .
      "DT",    // This
      "VBZ",   // is
      "DT",    // the
      "JJ",    // second
      "NN",    // sentence
      "."      // .
    };

    // run the beast
    tokAndPOSTagger.process(cas);

    // check the results have the right POS tags
    Type tokenType = cas.getTypeSystem().getType(
        "org.apache.uima.examples.tokenizer.Token");
    assertNotNull("Token type not found in type system", tokenType);

    Feature posFeature = tokenType.getFeatureByBaseName("POS");
    assertNotNull("Token POS feature not found", posFeature);
    
    FSIndex tokensIndex = cas.getAnnotationIndex(tokenType);
    FSIterator tokensIt = tokensIndex.iterator();
    int tokenNo = 0;
    while(tokensIt.isValid()) {
      // make sure we don't have too many tokens
      assertTrue("Found more tokens than expected",
                 tokenNo < expectedPOSTags.length);
      FeatureStructure token = tokensIt.get();
      String actualPOS = token.getStringValue(posFeature);
      assertEquals("Token has wrong part of speech",
                   expectedPOSTags[tokenNo], actualPOS);
      tokensIt.moveToNext();
      tokenNo++;
    }

    assertEquals("Found fewer tokens than expected",
                 tokenNo, expectedPOSTags.length);
  }
}