GATE.ac.uk - releases/gate-8.4-build5748-ALL/plugins/Schema_Tools/src/gate/creole/schema/SchemaEnforcer.java

/*
 * SchemaEnforcer.java
 *
 * Copyright (c) 2010, The University of Sheffield.
 *
 * This file is part of GATE (see http://gate.ac.uk/), and is free software,
 * licenced under the GNU Library General Public License, Version 2, June 1991
 * (in the distribution as file licence.html, and also available at
 * http://gate.ac.uk/gate/licence.html).
 *
 * Mark A. Greenwood, 13/08/2010
 */

package gate.creole.schema;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.AnnotationSchema;
import gate.creole.ExecutionException;
import gate.creole.FeatureSchema;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.InvalidOffsetException;

import java.util.ArrayList;
import java.util.List;

@SuppressWarnings("serial")
@CreoleResource(name = "Schema Enforcer", interfaceName = "gate.ProcessingResource", icon = "enforce.png", comment = "Produces an annotation set whose content is restricted by the specified set of schemas", helpURL="http://gate.ac.uk/userguide/sec:misc-creole:schemaenforcer")
public class SchemaEnforcer extends AbstractLanguageAnalyser {

  private String inputASName = null;

  private String outputASName = null;

  private boolean useDefaults = false;

  private List<AnnotationSchema> schemas = new ArrayList<AnnotationSchema>();

  @Override
  public void execute() throws ExecutionException {

    // If there are no schemas selected then throw an exception
    if(schemas.isEmpty())
      throw new ExecutionException(
              "At least one schema must be provided for encforcement!");

    // get the annotation set we are going to store the clean annotations into
    AnnotationSet outputAS = getDocument().getAnnotations(outputASName);

    // to ensure a clean set of annotations the output set must be empty before
    // we start adding annotations to it
    if(!outputAS.isEmpty())
      throw new ExecutionException("Output AnnotationSet must be empty");

    // get the set we are going to put the clean annotations into
    AnnotationSet inputAS = getDocument().getAnnotations(inputASName);

    // loop through the schemas we are cleaning against
    for(AnnotationSchema schema : schemas) {

      // get all the annotations of the same type as the current schema
      AnnotationSet annots = inputAS.get(schema.getAnnotationName());

      // if there are no annotations then move onto the next schema
      if(annots == null) continue;

      // for each of the annotations whose type matches the current schema
      for(Annotation a : annots) {

        // let's assume the annotation is valid wrt the schema
        boolean valid = true;

        // create a FeatureMap to hold any features we want to keep
        FeatureMap params = Factory.newFeatureMap();

        // if the schema specifies features then
        if(schema.getFeatureSchemaSet() != null) {

          // get the features from the existing annotation
          FeatureMap current = a.getFeatures();

          // for each of the features specified in the schema...
          for(FeatureSchema fs : schema.getFeatureSchemaSet()) {

            // get the name of the feature and...
            String fn = fs.getFeatureName();

            // the value of that feature from the existing annotation
            Object fv = current.get(fn);

            if(fv != null) {
              if(fs.getFeatureValueClass().isAssignableFrom(fv.getClass())) {
                if(!fs.isEnumeration() || fs.getPermittedValues().contains(fv)) {
                  // if the feature exists and is valid then copy it into the
                  // FeatureMap we will use to create the clean annotation
                  params.put(fn, fv);
                }
              }
            }

            if(fs.isRequired() && !params.containsKey(fn)) {
              String defaultValue = fs.getRawFeatureValue();
              if(useDefaults && defaultValue != null
                      && !defaultValue.equals("")) {
                // if the user wants to use default values and there is one
                // specified
                // then use this instead of marking the annotation invalid
                params.put(fn, defaultValue);
              } else {
                // if the feature specified in the schema is marked as required
                // but we haven't managed to add it to the cleaned annotation
                // for
                // some reason, then the annotation isn't valid so flag this and
                // abort
                valid = false;
                break;
              }
            }
          }
        }

        if(valid) {
          // if we have a valid clean annotation then...
          try {
            // ... add it to the output annotation set
            outputAS.add(a.getId(), a.getStartNode().getOffset(), a
                    .getEndNode().getOffset(), schema.getAnnotationName(),
                    params);
          } catch(InvalidOffsetException e) {
            // this should be completely impossible
            throw new ExecutionException(e);
          }
        }
      }
    }
  }

  @RunTime
  @CreoleParameter(comment = "should we use the default value to add missing required features", defaultValue = "false")
  public void setUseDefaults(Boolean useDefaults) {
    this.useDefaults = useDefaults;
  }

  public Boolean getUseDefaults() {
    return useDefaults;
  }

  @RunTime
  @Optional
  @CreoleParameter(comment = "the annotation set used as input to this PR")
  public void setInputASName(String name) {
    inputASName = name;
  }

  public String getInputASName() {
    return inputASName;
  }

  @RunTime
  @CreoleParameter(comment = "the annotation set used to store output from this PR")
  public void setOutputASName(String name) {
    outputASName = name;
  }

  public String getOutputASName() {
    return outputASName;
  }

  @RunTime
  @CreoleParameter(comment = "the list of schemas that define the annotations to move from the input to the output annotation set")
  public void setSchemas(List<AnnotationSchema> schemas) {
    this.schemas = schemas;
  }

  public List<AnnotationSchema> getSchemas() {
    return schemas;
  }
}