Log in Help
Print
HomegatepluginsTagger_PennBiosrceduupenncistaggersvariation 〉 VariationSegmentationEvaluator.java
 
/* Copyright (C) 2004 Univ. of Pennsylvania
    This software is provided under the terms of the Common Public License,
    version 1.0, as published by http://www.opensource.org.  For further
    information, see the file `LICENSE' included with this distribution. */

package edu.upenn.cis.taggers.variation;

import java.io.IOException;

import edu.umass.cs.mallet.base.fst.MultiSegmentationEvaluator;
import edu.umass.cs.mallet.base.fst.Transducer;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.Sequence;
import edu.upenn.cis.taggers.Tag;
import edu.upenn.cis.taggers.TagList;

/**
 * This class should never be called directly; it exists as an auxiliary to VariationTagger.
 * @author Kevin Lerman <a href="mailto:klerman@seas.upenn.edu">klerman@seas.upenn.edu</a>
 * @author Ryan McDonald <a href="mailto:ryantm@cis.upenn.edu">ryantm@cis.upenn.edu</a>
 */
public class VariationSegmentationEvaluator extends MultiSegmentationEvaluator
{
  String[] segmentStartTags;
  String[] segmentContinueTags;
  
  public VariationSegmentationEvaluator (String[] segmentStartTags, String[] segmentContinueTags)
  {
    super(segmentStartTags,segmentContinueTags,false);
    this.segmentStartTags = segmentStartTags;
    this.segmentContinueTags = segmentContinueTags;
  }
  
  public TagList output (Transducer crf, InstanceList eval) throws IOException {
      return output(crf,eval,null);
  }
  
  public TagList output (Transducer crf, InstanceList eval, java.util.Vector spans) throws IOException
  {   
    //String toReturn="";
    //for (int i = 0; i < eval.size(); i++) {
      Instance instance = eval.getInstance(eval.size()-1);
      Sequence input = (Sequence) instance.getData();
      Sequence predOutput = crf.viterbiPath(input).output();
      String[] tokens = (String[])instance.getName();
      
      Tag currentTag = null;
      TagList tagList = new TagList();
      for(int j = 0; j < tokens.length; j++) {
          boolean foundTag = false;
          for(int s = 0; s < segmentStartTags.length; s++) {
              if(((String)predOutput.get(j)).equals(segmentStartTags[s])) {
                  foundTag = true;
                  currentTag = new Tag(segmentStartTags[s].substring(2,segmentStartTags[s].length()));
                  currentTag.addSegment(tokens[j], j);
                  if(spans!=null) {
					  currentTag.updateOffset(((int[])spans.get(j))[0],((int[])spans.get(j))[1]);
                  }
				  //toReturn+="<"+segmentStartTags[s].substring(2,segmentStartTags[s].length())+">" + tokens[j];
                  for(int j1 = j+1; j1 < tokens.length && ((String)predOutput.get(j1)).equals(segmentContinueTags[s]); j1++) {
                      currentTag.addSegment(tokens[j1], j1);
                      if(spans!=null) {
						  currentTag.updateOffset(((int[])spans.get(j1))[0],((int[])spans.get(j1))[1]);
					  }
                      //toReturn+=" " + tokens[j1];
                      j=j1;
                  }
                  tagList.addTag(currentTag);
                  currentTag = null;
                  //toReturn+="</"+segmentStartTags[s].substring(2,segmentStartTags[s].length())+">";
                  break;
              }
          }
          //if(!foundTag)
          //    toReturn+=tokens[j];
          
          //if(j < tokens.length-1)
          //    toReturn+=" ";
      }
      //toReturn+="\n";
    //}   
    return tagList;//toReturn;
  }
  
}