package gatecourse.module8; import java.util.regex.Matcher; import java.util.regex.Pattern; import gate.AnnotationSet; import gate.Document; import gate.Factory; import gate.GateConstants; import gate.Resource; import gate.corpora.MimeType; import gate.corpora.TextualDocumentFormat; import gate.creole.ResourceInstantiationException; import gate.creole.metadata.AutoInstance; import gate.creole.metadata.CreoleResource; import gate.util.DocumentFormatException; import gate.util.InvalidOffsetException; @CreoleResource(name = "YAM Document Format", comment = "Simple document format annotating some elements of YAM syntax", autoinstances = {@AutoInstance}) public class YamDocumentFormat extends TextualDocumentFormat { /** * To keep things simple, this format does not support repositioning. */ @Override public Boolean supportsRepositioning() { return false; } @Override public Resource init() throws ResourceInstantiationException { // TODO implement the format registration logic return this; } protected static final Pattern BOLD_PATTERN = Pattern.compile("(?<!\\\\)\\*(.+?)\\*", Pattern.DOTALL); protected static final Pattern ITALIC_PATTERN = Pattern.compile("(?<!\\\\)_(.+?)_", Pattern.DOTALL); protected static final Pattern TT_PATTERN = Pattern.compile("(?<!\\\\)\\^(.+?)\\^", Pattern.DOTALL); protected static final Pattern HEADING_PATTERN = Pattern.compile("^%(\\d+)\\s+(.*)$", Pattern.MULTILINE); @Override public void unpackMarkup(Document doc) throws DocumentFormatException { try { AnnotationSet om = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME); String docContent = doc.getContent().toString(); Matcher m = BOLD_PATTERN.matcher(docContent); while(m.find()) { om.add((long)m.start(1), (long)m.end(1), "strong", Factory.newFeatureMap()); } m = ITALIC_PATTERN.matcher(docContent); while(m.find()) { om.add((long)m.start(1), (long)m.end(1), "em", Factory.newFeatureMap()); } m = TT_PATTERN.matcher(docContent); while(m.find()) { om.add((long)m.start(1), (long)m.end(1), "tt", Factory.newFeatureMap()); } m = HEADING_PATTERN.matcher(docContent); while(m.find()) { om.add((long)m.start(2), (long)m.end(2), "h" + m.group(1), Factory.newFeatureMap()); } } catch(InvalidOffsetException e) { throw new DocumentFormatException(e); } } }