1
15
16 package gate.creole.nerc;
17
18 import java.io.Serializable;
19
20 import gate.Annotation;
21 import gate.Document;
22 import gate.util.InvalidOffsetException;
23
24
25 public class EntityDescriptor implements Serializable{
26
27
28 public EntityDescriptor(String string, String category, int start, int end) {
29 this.string = normaliseString(string);
30 this.category = category;
31 offsets = new int[2];
32 offsets[0] = start;
33 offsets[1] = end;
34 }
35
36
37 public EntityDescriptor(Document document, Annotation annotation) {
38 offsets = new int[2];
39 offsets[0] = annotation.getStartNode().getOffset().intValue();
40 offsets[1] = annotation.getEndNode().getOffset().intValue();
41 try{
42 string = normaliseString(document.getContent().getContent(
43 annotation.getStartNode().getOffset(),
44 annotation.getEndNode().getOffset()).
45 toString());
46 } catch(InvalidOffsetException ioe){
47 ioe.printStackTrace();
48 }
49 category = annotation.getType();
50 }
51
52
56 public String getString(){
57 return string;
58 }
59
60
61 public String getCategory(){
62 return category;
63 }
64
65
68 public int[] getOffsets(){
69 return offsets;
70 }
71
72
75 public String toString(){
76 return category + " " + offsets[0] + " " + offsets[1] + " " + string;
77 }
78
79 String string;
80 String category;
81 int[] offsets;
82
83
87 protected String normaliseString(String text){
88 StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
90 if(text == null) return null;
91 int charIdx = 0;
92 boolean lastWasSpace = false;
93 while(charIdx < text.length() &&
95 Character.isWhitespace(text.charAt(charIdx))) charIdx++;
96 while(charIdx < text.length()){
98 if(Character.isWhitespace(text.charAt(charIdx))){
99 lastWasSpace = true;
101 }else{
102 if(lastWasSpace) res.append(" ");
105 res.append(text.charAt(charIdx));
107 lastWasSpace = false;
108 }
109 charIdx++;
110 } return res.toString();
112 }
113
114 }
115