1   /*
2    *  ConstraintGroup.java - transducer class
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 24/07/98
12   *
13   *  $Id: ConstraintGroup.java,v 1.8 2001/09/13 12:09:50 kalina Exp $
14   */
15  
16  
17  package gate.jape;
18  
19  import java.util.*;
20  import gate.annotation.*;
21  import gate.util.*;
22  import gate.*;
23  
24  
25  /**
26    * A sequence of conjunctions of PatternElement that form a
27    * disjunction.
28    */
29  public class ConstraintGroup
30  extends PatternElement implements JapeConstants, java.io.Serializable
31  {
32    /** Debug flag */
33    private static final boolean DEBUG = false;
34  
35    /** Anonymous constructor. */
36    public ConstraintGroup() {
37      patternElementDisjunction1 = new ArrayList();
38      currentConjunction = new ArrayList();
39      patternElementDisjunction1.add(currentConjunction);
40    } // Anonymous constructor
41  
42    /** Need cloning for processing of macro references. See comments on
43      * <CODE>PatternElement.clone()</CODE>
44      */
45    public Object clone() {
46      ConstraintGroup newPE = (ConstraintGroup) super.clone();
47  
48      // created by createDisjunction
49      newPE.currentConjunction = null;
50  
51      newPE.patternElementDisjunction1 = new ArrayList();
52      // for each (conjunction) member of the pattern element discjunction
53      for(
54        Iterator disjunction = patternElementDisjunction1.iterator();
55        disjunction.hasNext();
56  
57      ) {
58  
59        newPE.createDisjunction();
60        // for each pattern element making up this conjunction
61        for(
62          Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
63          conjunction.hasNext();
64  
65        ) {
66          PatternElement pat = (PatternElement) conjunction.next();
67  
68          newPE.addPatternElement((PatternElement) pat.clone());
69        } // for each element of the conjunction
70      } // for each conjunction (element of the disjunction)
71  
72      return newPE;
73    } // clone
74  
75    /** An array of arrays that represent PatternElement conjunctions
76      * during parsing of the .jape. Each conjunction is
77      * considered as being disjunct with the next. (I.e. they are
78      * or'd, in the same way as expressions around "||" in C and
79      * Java.) Set during parsing; replaced by finish().
80      */
81    private ArrayList patternElementDisjunction1;
82  
83    /** The pattern element disjunction for transduction - Java arrays. */
84    private PatternElement[][] patternElementDisjunction2;
85  
86    /** An array of PatternElements making up a conjunction. It is a member of
87      * patternElementDisjunction. This is the one we're adding to
88      * at present. Used during parsing, not matching.
89      */
90    private ArrayList currentConjunction;
91  
92    /** Make a new disjunction at this point. */
93    public void createDisjunction() {
94      currentConjunction = new ArrayList();
95      patternElementDisjunction1.add(currentConjunction);
96    } // createDisjunction
97  
98    /** Add an element to the current conjunction. */
99    public void addPatternElement(PatternElement pe) {
100     currentConjunction.add(pe);
101   } // addPatternElement
102 
103   /** Get an list of CPEs that we contain. */
104   protected Iterator getCPEs() {
105     ArrayList cpes = new ArrayList();
106 
107     // for each (conjunction) member of the pattern element discjunction
108     for(
109       Iterator disjunction = patternElementDisjunction1.iterator();
110       disjunction.hasNext();
111     ) {
112       // for each pattern element making up this conjunction
113       for(
114         Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
115         conjunction.hasNext();
116       ) {
117         PatternElement pat = (PatternElement) conjunction.next();
118 
119         Iterator i = null;
120         if(pat instanceof ComplexPatternElement) {
121           cpes.add(pat);
122           i = ((ComplexPatternElement) pat).getCPEs();
123         }
124         else if(pat instanceof ConstraintGroup)
125           i = ((ConstraintGroup) pat).getCPEs();
126 
127         if(i != null)
128           for( ; i.hasNext(); )
129             cpes.add(i.next());
130       } // for each element of the conjunction
131     } // for each conjunction (element of the disjunction)
132 
133     return cpes.iterator();
134   } // getCPEs
135 
136   /** Finish: replace dynamic data structures with Java arrays; called
137     * after parsing.
138     */
139   public void finish() {
140 
141     // index into patternElementDisjunction2
142     int i = 0;
143 
144     // index into the conjunctions (second dimension of pED2)
145     int j = 0;
146 
147     patternElementDisjunction2 =
148       new PatternElement[patternElementDisjunction1.size()][];
149 
150     // for each (conjunction) member of the pattern element discjunction
151     for(
152       Iterator disjuncIter = patternElementDisjunction1.iterator();
153       disjuncIter.hasNext();
154       i++
155     ) {
156       ArrayList conjunction = (ArrayList) disjuncIter.next();
157       patternElementDisjunction2[i] = new PatternElement[conjunction.size()];
158       j = 0;
159 
160       // for each pattern element making up this conjunction
161       for(
162         Iterator conjIter = conjunction.iterator();
163         conjIter.hasNext();
164         j++
165       ) {
166         patternElementDisjunction2[i][j] = (PatternElement) conjIter.next();
167         patternElementDisjunction2[i][j].finish();
168       } // loop on conjunction
169 
170     } // loop on patternElementDisjunction1
171 
172     patternElementDisjunction1 = null;
173   } // finish
174 
175   /** Access to the annotations that have been matched by this group. */
176   public AnnotationSet getMatchedAnnots() {
177     AnnotationSet matchedAnnots = new AnnotationSetImpl((Document) null);
178     int pEDLen = patternElementDisjunction2.length;
179 
180     // for each (conjunction) member of the pattern element disjunction
181     for(int i = 0; i < pEDLen; i++) {
182       int conjLen = patternElementDisjunction2[i].length;
183 
184       // for each pattern element making up this conjunction
185       for(int j = 0; j < conjLen; j++) {
186         PatternElement pat = patternElementDisjunction2[i][j];
187         AnnotationSet patMatchedAnnots = pat.getMatchedAnnots();
188         if(patMatchedAnnots != null)
189           matchedAnnots.addAll(pat.getMatchedAnnots());
190       } // for each element of the conjunction
191 
192     } // for each conjunction (element of the disjunction)
193 
194     return matchedAnnots;
195   } // getMatchedAnnots
196 
197 
198   /** Clear all the annotations that have been matched by this group. */
199   public void reset() {
200     // Debug.pr(this, "CG reset, matchHistory.size() = " + matchHistory.size());
201     int pEDLen = patternElementDisjunction2.length;
202 
203     // for each (conjunction) member of the pattern element disjunction
204     for(int i = 0; i < pEDLen; i++) {
205       int conjLen = patternElementDisjunction2[i].length;
206 
207       // for each pattern element making up this conjunction
208       for(int j = 0; j < conjLen; j++)
209         patternElementDisjunction2[i][j].reset();
210     }
211 
212     super.reset(); // should be redundant: there for if PE.reset changes
213   } // reset
214 
215   /** Multilevel rollback of annot caches etc. */
216   public void rollback(int arity) {
217     // Debug.pr(this, "CG rollback(" + arity + "), matchHistory.size() = " +
218     //                   matchHistory.size());
219     for(int i=0; i<arity; i++) {
220       PatternElement[] conjunction = (PatternElement[]) matchHistory.pop();
221       int conjLen = conjunction.length;
222       for(int j = 0; j < conjLen; j++)
223         conjunction[j].rollback(1);
224     }
225   } // rollback
226 
227 
228   /** Does this element match the document at this position? */
229   public boolean matches(
230     Document doc, int position, MutableInteger newPosition
231   ) {
232     // if a whole conjunction matches, we set newPosition to the max of
233     // rightmost advance of all the composite elements that matched, and
234     // position.
235     int rightmostAdvance = position;
236 
237     // when we fail the whole disjunction, we set newPosition to the max of
238     // leftmost failure point, and position
239     int leftmostFailurePoint = Integer.MAX_VALUE;
240 
241     // outerLoop:
242     // for each conjunction
243     //   for each element in the conjunction
244     //     if it fails continue outerLoop;
245     //   return true;
246     // return false;
247 
248     // for each member of the disjunctions array
249     int savedPosition = position;
250     int pEDLen = patternElementDisjunction2.length;
251     outerLoop:
252     for(int i = 0; i < pEDLen; i++) {
253       int conjLen = patternElementDisjunction2[i].length;
254       position = savedPosition;
255       rightmostAdvance = position;
256 
257       // for each pattern element making up this conjunction
258       for(int j = 0; j < conjLen; j++) {
259         PatternElement pat = patternElementDisjunction2[i][j];
260 
261         if(! pat.matches(doc, position, newPosition)) {
262           // reset the last failure point to the furthest we got so far
263           leftmostFailurePoint =
264             Math.min(leftmostFailurePoint, newPosition.value);
265 
266           // rollback matches done in the previous elements of this conjunction
267           for(int k = j - 1; k >= 0; k--)
268             patternElementDisjunction2[i][k].rollback(1);
269 
270           // try the next conjunction
271           continue outerLoop;
272         }
273 
274         // reset our advance point to the furthest so far
275         position = rightmostAdvance =
276           Math.max(rightmostAdvance, newPosition.value);
277 
278       } // for each element of the conjunction
279 
280       // a whole conjunction matched: record advance and which conj succeeded
281       newPosition.value = rightmostAdvance;
282       matchHistory.push(patternElementDisjunction2[i]);
283       //Debug.pr(this, "CG matches: pushing");
284       return true;
285 
286     } // for each conjunction (element of the disjunction)
287 
288     // we reached the end of the disjunction without matching a
289     // whole conjunction
290     if(leftmostFailurePoint == Integer.MAX_VALUE)
291       leftmostFailurePoint = position + 1;
292     newPosition.value = Math.max(position + 1, leftmostFailurePoint);
293     return false; // annot caches have been rolled back already in inner loop
294   } // matches
295 
296 
297   /** Create a string representation of the object. */
298   public String toString() { return toString(""); }
299 
300   /** Create a string representation of the object. */
301   public String toString(String pad) {
302     String newline = Strings.getNl();
303 
304     StringBuffer buf =
305       new StringBuffer(pad + "CG: disjunction(" + newline);
306     String newPad = Strings.addPadding(pad, INDENT_PADDING);
307 
308     boolean firstTime = true;
309 
310     if(patternElementDisjunction1 != null) { // before finish()
311       // for each (conjunction) member of the pattern element discjunction
312       for(
313         Iterator disjunction = patternElementDisjunction1.iterator();
314         disjunction.hasNext();
315       ) {
316         if(firstTime) firstTime = false;
317         else buf.append(newline + pad + "|" + newline);
318 
319         // for each pattern element making up this conjunction
320         for(
321           Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
322           conjunction.hasNext();
323         ) {
324           buf.append(
325             ((PatternElement) conjunction.next()).toString(newPad) + newline
326           );
327         } // for each element of the conjunction
328       } // for each conjunction (element of the disjunction)
329 
330     } else { // after finish
331       int pEDLen = patternElementDisjunction2.length;
332       if(firstTime) firstTime = false;
333       else buf.append(newline + pad + "|" + newline);
334 
335       for(int i = 0; i < pEDLen; i++) {
336         int conjLen = patternElementDisjunction2[i].length;
337         // for each pattern element making up this conjunction
338         for(int j = 0; j < conjLen; j++)
339           buf.append(
340             patternElementDisjunction2[i][j].toString(newPad) + newline
341           );
342       }
343     }
344 
345     buf.append(pad + ") CG." + newline);
346 
347     return buf.toString();
348   } // toString
349 
350 
351   //needed by FSM
352   public PatternElement[][] getPatternElementDisjunction(){
353     return patternElementDisjunction2;
354   }
355 
356 } // class ConstraintGroup
357 
358 
359 // $Log: ConstraintGroup.java,v $
360 // Revision 1.8  2001/09/13 12:09:50  kalina
361 // Removed completely the use of jgl.objectspace.Array and such.
362 // Instead all sources now use the new Collections, typically ArrayList.
363 // I ran the tests and I ran some documents and compared with keys.
364 // JAPE seems to work well (that's where it all was). If there are problems
365 // maybe look at those new structures first.
366 //
367 // Revision 1.7  2001/09/12 11:59:33  kalina
368 // Changed the old JAPE stuff to use the new Collections API,
369 // instead of com.objectspace stuff. Will eliminate that library
370 // completely very soon! Just one class left to re-implement,
371 //
372 // ParseCPSL.jj changed accordingly. All tested and no smoke.
373 //
374 // Revision 1.6  2000/11/08 16:35:02  hamish
375 // formatting
376 //
377 // Revision 1.5  2000/10/26 10:45:30  oana
378 // Modified in the code style
379 //
380 // Revision 1.4  2000/10/16 16:44:33  oana
381 // Changed the comment of DEBUG variable
382 //
383 // Revision 1.3  2000/10/10 15:36:35  oana
384 // Changed System.out in Out and System.err in Err;
385 // Added the DEBUG variable seted on false;
386 // Added in the header the licence;
387 //
388 // Revision 1.2  2000/04/14 18:02:46  valyt
389 // Added some gate.fsm classes
390 // added some accessor function in old jape classes
391 //
392 // Revision 1.1  2000/02/23 13:46:06  hamish
393 // added
394 //
395 // Revision 1.1.1.1  1999/02/03 16:23:01  hamish
396 // added gate2
397 //
398 // Revision 1.17  1998/11/24 16:18:29  hamish
399 // fixed toString for calls after finish
400 //
401 // Revision 1.16  1998/11/01 21:21:36  hamish
402 // use Java arrays in transduction where possible
403 //
404 // Revision 1.15  1998/11/01 14:55:54  hamish
405 // fixed lFP setting in matches
406 //
407 // Revision 1.14  1998/10/30 14:06:45  hamish
408 // added getTransducer
409 //
410 // Revision 1.13  1998/10/29 12:07:49  hamish
411 // toString change
412 //
413 // Revision 1.12  1998/10/06 16:16:10  hamish
414 // negation percolation during constrain add; position advance when none at end
415 //
416 // Revision 1.11  1998/10/01 16:06:30  hamish
417 // new appelt transduction style, replacing buggy version
418 //
419 // Revision 1.10  1998/09/26 09:19:16  hamish
420 // added cloning of PE macros
421 //
422 // Revision 1.9  1998/09/17 16:48:31  hamish
423 // added macro defs and macro refs on LHS
424 //
425 // Revision 1.8  1998/08/12 19:05:43  hamish
426 // fixed multi-part CG bug; set reset to real reset and fixed multi-doc bug
427 //
428 // Revision 1.7  1998/08/12 15:39:35  hamish
429 // added padding toString methods
430 //
431 // Revision 1.6  1998/08/05 21:58:06  hamish
432 // backend works on simple test
433 //
434 // Revision 1.5  1998/08/03 19:51:20  hamish
435 // rollback added
436 //
437 // Revision 1.4  1998/07/31 13:12:16  hamish
438 // done RHS stuff, not tested
439 //
440 // Revision 1.3  1998/07/30 11:05:16  hamish
441 // more jape
442 //
443 // Revision 1.2  1998/07/29 11:06:56  hamish
444 // first compiling version
445 //
446 // Revision 1.1.1.1  1998/07/28 16:37:46  hamish
447 // gate2 lives
448