JAPE and Coordinated Annotations
Contents
1. Using a series of coordinated annotations
sample input:
Number of employees: 7,753 (30/06/2007) 7,569 (30/06/2006)
JAPE rule
Rule: EmployeesYear
(
({Token.string == "Employees"}|
{Token.string == "employees"})
{Token.string == ":"}
(
({Amount}):mention
{Token.string == "("}
({Date}):date
({Token.string == ")"})
({Split})?
)+
)
-->
{
OffsetComparator comparator = new OffsetComparator();
gate.AnnotationSet dateSet = (gate.AnnotationSet) bindings.get("date");
List<Annotation> dateList = new ArrayList<Annotation>(dateSet);
gate.AnnotationSet mentionSet = (gate.AnnotationSet) bindings.get("mention");
List<Annotation> mentionList = new ArrayList<Annotation>(mentionSet);
if ( dateList.size() == mentionList.size() ) {
// Put both Lists in L->R order
Collections.sort(dateList, comparator);
Collections.sort(mentionList, comparator);
// Because we've already checked that the two Lists have the same size,
// their iterators should work the same!
Iterator<Annotation> dateIter = dateList.iterator();
Iterator<Annotation> mentionIter = mentionList.iterator();
gate.Annotation dateAnn, mentionAnn;
while (dateIter.hasNext() ) {
// Use next() on both iterators together to keep them aligned
dateAnn = dateIter.next();
mentionAnn = mentionIter.next();
// get the value of the date and then add
// it as the value of a date
// feature to the employee number
try{
String content = doc.getContent().getContent(dateAnn.getStartNode().getOffset(),
dateAnn.getEndNode().getOffset()).toString();
FeatureMap features = Factory.newFeatureMap();
features.put("class", "Number");
features.put("date", content);
features.put("note", "Number_of_Employees");
features.put("xbrl_id", "nt.employees");
features.put("rule", "EmployeesYear");
outputAS.add(mentionAnn.getStartNode(), mentionAnn.getEndNode(),
"Mention", features);
}
catch(InvalidOffsetException ioe){
//this should never happen
throw new GateRuntimeException(ioe);
}
} // end while (iterating through both lists)
} // end if (length test)
else {
// "this should never happen", as they say
System.err.println("Unequal number of Date and Mention found in EmployeesYear");
}
} // end EmployeesYear RHS
2. Iterate through a list of things and annotate each one with the same information
Macro: LIST
(
{NP}
((AND) {NP})*
)
Rule: FishList1
// herrings, sardines, pilchards, sprats, and anchovies
(
{FishClass}
(AND)
)
(
(LIST)
):mention
-->
{
//get the mention annotations in a list
List annList = new ArrayList((AnnotationSet)bindings.get("mention"));
//sort the list by offset
Collections.sort(annList, new OffsetComparator());
//iterate through the matched annotations
for(int i = 0; i < annList.size(); i++)
{
Annotation anAnn = (Annotation)annList.get(i);
// check that the new annotation is an NP and is not a FishClass already
if ((anAnn.getType().equals("NP")) &&
(! anAnn.getType ().equals("FishClass"))
)
{
FeatureMap features = Factory.newFeatureMap();
features.put("rule", "FishList1");
annotations.add(anAnn.getStartNode(), anAnn.getEndNode(), "FishClass",
features);
}
}
}
3. Putting the antecedents' strings on pronouns identified by the pronoun co-referencer
Rule: Entity
(
({Organization.ENTITY_MENTION_TYPE == "PRONOUN"})
):entity
-->
:entity {
Annotation entity = entityAnnots.iterator().next();
FeatureMap entityFeatures = entity.getFeatures();
List matches = (List) entityFeatures.get("matches");
Object offsetObj = entityFeatures.get("antecedent_offset");
Long antecedentOffset = null;
boolean found = false;
if (offsetObj == null) {
System.err.println("Missing antecedent_offset on " + entity.getId());
}
else if (offsetObj instanceof Long) {
antecedentOffset = (Long) offsetObj;
}
else {
antecedentOffset = Long.parseLong(offsetObj.toString());
}
if (antecedentOffset != null) {
Annotation antecedent = null;
for (Object id : matches) {
antecedent = inputAS.get((Integer) id);
if (antecedent.getStartNode().getOffset().equals(antecedentOffset)) {
found = true;
break;
}
}
if (found) {
Long end = antecedent.getEndNode().getOffset();
try {
String antecedentString = doc.getContent().getContent(antecedentOffset, end).toString();
entityFeatures.put("antecedent_string", antecedentString);
}
catch (InvalidOffsetException e) {
System.out.println("doc = " + doc.getName() + " @" + antecedentOffset + ".." + end);
e.printStackTrace();
}
}
else {
entityFeatures.put("antecedent_error", "error");
}
}
}




