|
DFSMState |
|
1 /* 2 * DFSMState.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Valentin Tablan, 27/06/2000 12 * 13 * $Id: DFSMState.java,v 1.17 2002/07/02 13:15:46 nasso Exp $ 14 */ 15 16 /* 17 modified by OntoText, Aug 29 18 19 */ 20 21 package gate.creole.tokeniser; 22 23 import java.util.*; 24 25 import gate.util.*; 26 27 /** Implements a state of the deterministic finite state machine of the 28 * tokeniser. 29 * It differs from {@link FSMState FSMState} by the definition of the 30 * transition function which in this case maps character types to other states 31 * as oposed to the transition function from FSMState which maps character 32 * types to sets of states, hence the nondeterministic character. 33 * {@see FSMState FSMState} 34 */ 35 class DFSMState implements java.io.Serializable { //extends FSMState{ 36 37 /** Debug flag */ 38 private static final boolean DEBUG = false; 39 40 /** Constructs a new DFSMState object and adds it to the list of deterministic 41 * states of the {@link DefaultTokeniser DefaultTokeniser} provided as owner. 42 * @param owner a {@link DefaultTokeniser DefaultTokeniser} object 43 */ 44 public DFSMState(SimpleTokeniser owner){ 45 myIndex = index++; 46 owner.dfsmStates.add(this); 47 } 48 49 /** Adds a new mapping in the transition function of this state 50 * @param type the UnicodeType for this mapping 51 * @state the next state of the FSM Machine when a character of type type 52 * is read from the input. 53 */ 54 void put(UnicodeType type, DFSMState state){ 55 put(type.type, state); 56 } // put(UnicodeType type, DFSMState state) 57 58 /** Adds a new mapping using the actual index in the internal array. 59 * This method is for internal use only. Use 60 * {@link #put(gate.creole.tokeniser.UnicodeType, 61 * gate.creole.tokeniser.DFSMState)} instead. 62 */ 63 void put(int index, DFSMState state){ 64 transitionFunction[index] = state; 65 } // put(int index, DFSMState state) 66 67 /** This method is used to access the transition function of this state. 68 * @param type the Unicode type identifier as the corresponding static value 69 * on {@link java.lang.Character} 70 */ 71 DFSMState next(int type){//UnicodeType type){ 72 return transitionFunction[type]; 73 } // next 74 75 /** Returns a GML (Graph Modelling Language) representation of the edges 76 * emerging from this state 77 */ 78 String getEdgesGML(){ 79 ///String res = ""; 80 //OT 81 StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 82 Set nextSet; 83 Iterator nextSetIter; 84 DFSMState nextState; 85 86 for(int i = 0; i< transitionFunction.length; i++){ 87 nextState = transitionFunction[i]; 88 if(null != nextState){ 89 /* 90 res += "edge [ source " + myIndex + 91 " target " + nextState.getIndex() + 92 " label \""; 93 res += SimpleTokeniser.typeMnemonics[i]; 94 res += "\" ]\n"; 95 */ 96 //OT 97 res.append("edge [ source "); 98 res.append(myIndex); 99 res.append(" target "); 100 res.append(nextState.getIndex()); 101 res.append(" label \""); 102 res.append(SimpleTokeniser.typeMnemonics[i]); 103 res.append("\" ]\n"); 104 } 105 }; 106 return res.toString(); 107 } // getEdgesGML 108 109 /** Builds the token description for the token that will be generated when 110 * this <b>final</b> state will be reached and the action associated with it 111 * will be fired. 112 * See also {@link #setRhs(String)}. 113 */ 114 void buildTokenDesc() throws TokeniserException{ 115 String ignorables = " \t\f"; 116 String token = null, 117 type = null, 118 attribute = null, 119 value = null 120 ///prefix = null, 121 ///read ="" 122 ; 123 //OT 124 StringBuffer prefix = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 125 StringBuffer read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 126 127 LinkedList attributes = new LinkedList(), 128 values = new LinkedList(); 129 StringTokenizer mainSt = 130 new StringTokenizer(rhs, ignorables + "\\\";=", true); 131 132 int descIndex = 0; 133 //phase means: 134 //0 == looking for type; 135 //1 == looking for attribute; 136 //2 == looking for value; 137 //3 == write the attr/value pair 138 int phase = 0; 139 140 while(mainSt.hasMoreTokens()) { 141 token = SimpleTokeniser.skipIgnoreTokens(mainSt); 142 143 if(token.equals("\\")){ 144 if(null == prefix) 145 ///prefix = mainSt.nextToken(); 146 //OT 147 prefix = new StringBuffer(mainSt.nextToken()); 148 else ///prefix += mainSt.nextToken(); 149 //OT 150 prefix.append(mainSt.nextToken()); 151 continue; 152 } else if(null != prefix) { 153 ///read += prefix; 154 //OT 155 read.append(prefix.toString()); 156 prefix = null; 157 } 158 159 if(token.equals("\"")){ 160 ///read = mainSt.nextToken("\""); 161 //OT 162 read = new StringBuffer(mainSt.nextToken("\"")); 163 if(read.equals("\"")) ///read = ""; 164 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 165 else { 166 //delete the remaining enclosing quote and restore the delimiters 167 mainSt.nextToken(ignorables + "\\\";="); 168 } 169 170 } else if(token.equals("=")) { 171 172 if(phase == 1){ 173 ///attribute = read; 174 //OT 175 attribute = read.toString(); 176 ///read = ""; 177 //OT 178 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 179 phase = 2; 180 }else throw new TokeniserException("Invalid attribute format: " + 181 read); 182 } else if(token.equals(";")) { 183 if(phase == 0){ 184 ///type = read; 185 type = read.toString(); 186 ///read = ""; 187 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 188 //Out.print("Type: " + type); 189 attributes.addLast(type); 190 values.addLast(""); 191 phase = 1; 192 } else if(phase == 2) { 193 ///value = read; 194 value = read.toString(); 195 ///read = ""; 196 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE); 197 phase = 3; 198 } else throw new TokeniserException("Invalid value format: " + 199 read); 200 } else ///read += token; 201 read.append(token); 202 203 if(phase == 3) { 204 // Out.print("; " + attribute + "=" + value); 205 attributes.addLast(attribute); 206 values.addLast(value); 207 phase = 1; 208 } 209 } 210 //Out.println(); 211 if(attributes.size() < 1) 212 throw new InvalidRuleException("Invalid right hand side " + rhs); 213 tokenDesc = new String[attributes.size()][2]; 214 215 for(int i = 0; i < attributes.size(); i++) { 216 tokenDesc[i][0] = (String)attributes.get(i); 217 tokenDesc[i][1] = (String)values.get(i); 218 } 219 220 // for(int i = 0; i < attributes.size(); i++){ 221 // Out.println(tokenDesc[i][0] + "=" + 222 // tokenDesc[i][1]); 223 // } 224 } // buildTokenDesc 225 226 /** Sets the right hand side associated with this state. The RHS is 227 * represented as a string value that will be parsed by the 228 * {@link #buildTokenDesc()} method being converted in a table of strings 229 * with 2 columns and as many lines as necessary. 230 * @param rhs the RHS string 231 */ 232 void setRhs(String rhs) { this.rhs = rhs; } 233 234 /** Returns the RHS string*/ 235 String getRhs(){return rhs;} 236 237 /** Checks whether this state is a final one*/ 238 boolean isFinal() { return (null != rhs); } 239 240 /** Returns the unique ID of this state.*/ 241 int getIndex() { return myIndex; } 242 243 /** Returns the token description associated with this state. This description 244 * is built by {@link #buildTokenDesc()} method and consists of a table of 245 * strings having two columns. 246 * The first line of the table contains the annotation type on the first 247 * position and nothing on the second. 248 * Each line after the first one contains a attribute on the first position 249 * and its associated value on the second. 250 */ 251 String[][] getTokenDesc() { 252 return tokenDesc; 253 } 254 255 /** A table of strings describing an annotation. 256 * The first line of the table contains the annotation type on the first 257 * position and nothing on the second. 258 * Each line after the first one contains a attribute on the first position 259 * and its associated value on the second. 260 */ 261 String[][] tokenDesc; 262 263 /** The transition function of this state. 264 */ 265 DFSMState[] transitionFunction = new DFSMState[SimpleTokeniser.maxTypeId]; 266 267 /** The string of the RHS of the rule from which the token 268 * description is built 269 */ 270 String rhs; 271 272 /** The unique index of this state*/ 273 int myIndex; 274 275 /** Used to generate unique indices for all the objects of this class*/ 276 static int index; 277 278 static { 279 index = 0; 280 } 281 282 } // class DFSMState 283
|
DFSMState |
|