Log in Help
Print
HomegatepluginsTagger_PennBiosrceduupenncistaggersmalignancy 〉 MalignancySegmentationEvaluator.java
 
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
 This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
 http://www.cs.umass.edu/~mccallum/mallet
 This software is provided under the terms of the Common Public License,
 version 1.0, as published by http://www.opensource.org.  For further
 information, see the file `LICENSE' included with this distribution. */


/**
 * 
 @author Ryan McDonald
 */

//package edu.umass.cs.mallet.users.ryantm.medline;
package edu.upenn.cis.taggers.malignancy;

import java.io.IOException;

import edu.umass.cs.mallet.base.fst.MultiSegmentationEvaluator;
import edu.umass.cs.mallet.base.fst.Transducer;
import edu.umass.cs.mallet.base.types.Instance;
import edu.umass.cs.mallet.base.types.InstanceList;
import edu.umass.cs.mallet.base.types.Sequence;
import edu.upenn.cis.taggers.Tag;
import edu.upenn.cis.taggers.TagList;

public class MalignancySegmentationEvaluator extends MultiSegmentationEvaluator
{
  String[] segmentStartTags;
  String[] segmentContinueTags;
  
  public MalignancySegmentationEvaluator (String[] segmentStartTags, String[] segmentContinueTags)
  {
    super(segmentStartTags,segmentContinueTags,false);
    this.segmentStartTags = segmentStartTags;
    this.segmentContinueTags = segmentContinueTags;
        
  }

  public TagList output (Transducer crf, InstanceList eval) throws IOException {
    return output(crf,eval,null);
  }
  
  public TagList output (Transducer crf, InstanceList eval, java.util.Vector spans) throws IOException
  {   
    //String toReturn="";
    //for (int i = 0; i < eval.size(); i++) {
      Instance instance = eval.getInstance(eval.size()-1);
      Sequence input = (Sequence) instance.getData();
      Sequence predOutput = crf.viterbiPath(input).output();
      String[] tokens = (String[])instance.getName();
      
      Tag currentTag = null;
      TagList tagList = new TagList();
      for(int j = 0; j < tokens.length; j++) {
          boolean foundTag = false;
          for(int s = 0; s < segmentStartTags.length; s++) {
              if(((String)predOutput.get(j)).equals(segmentStartTags[s])) {
                  foundTag = true;
                  currentTag = new Tag(segmentStartTags[s].substring(2,segmentStartTags[s].length()));
                  currentTag.addSegment(tokens[j], j);
                  if(spans!=null) {
            currentTag.updateOffset(((int[])spans.get(j))[0],((int[])spans.get(j))[1]);
                  }
          //toReturn+="<"+segmentStartTags[s].substring(2,segmentStartTags[s].length())+">" + tokens[j];
                  for(int j1 = j+1; j1 < tokens.length && ((String)predOutput.get(j1)).equals(segmentContinueTags[s]); j1++) {
                      currentTag.addSegment(tokens[j1], j1);
                      if(spans!=null) {
              currentTag.updateOffset(((int[])spans.get(j1))[0],((int[])spans.get(j1))[1]);
            }
                      //toReturn+=" " + tokens[j1];
                      j=j1;
                  }
                  tagList.addTag(currentTag);
                  currentTag = null;
                  //toReturn+="</"+segmentStartTags[s].substring(2,segmentStartTags[s].length())+">";
                  break;
              }
          }
          //if(!foundTag)
          //    toReturn+=tokens[j];
          
          //if(j < tokens.length-1)
          //    toReturn+=" ";
      }
      //toReturn+="\n";
    //}   
    return tagList;//toReturn;
  }
  
  /*public String output (Transducer crf, InstanceList eval) throws IOException
  {
    StringBuffer toReturn = new StringBuffer();
    for (int i = 0; i < eval.size(); i++) {
      Instance instance = eval.getInstance(i);
      Sequence input = (Sequence) instance.getData();
      Sequence predOutput = crf.viterbiPath(input).output();
      String[] words = (String[])instance.getName();
      String[] types = new String[words.length];      
      for (int j = 0; j < predOutput.size(); j++) {
        types[j] = (String)predOutput.get(j);
        
      }
      
      for(int j = 0; j < words.length; j++) {
        boolean f = false;
        for(int s = 0; s < segmentStartTags.length; s++) {
          if(types[j].equals(segmentStartTags[s])) {
            f = true;
            toReturn.append("<"+segmentStartTags[s].substring(2,segmentStartTags[s].length())+">" + words[j]);
            for(int j1 = j+1; j1 < words.length && types[j1].equals(segmentContinueTags[s]); j1++) {
              toReturn.append(" " + words[j1]);
              j=j1;
            }
            toReturn.append("</"+segmentStartTags[s].substring(2,segmentStartTags[s].length())+">");
            break;
          }
        }
        if(!f)
          toReturn.append(words[j]);
        
        if(j < words.length-1)
          toReturn.append(" ");
      }
      toReturn.append("\n");
    }
    return toReturn.toString();
  }*/
  
}