1   /*
2    *  SinglePhaseTransducer.java - transducer class
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 24/07/98
12   *
13   *  $Id: SinglePhaseTransducer.java,v 1.57 2001/11/20 09:16:58 nasso Exp $
14   */
15  
16  
17  package gate.jape;
18  
19  import java.io.*;
20  
21  import gate.annotation.*;
22  import gate.util.*;
23  import gate.*;
24  import gate.fsm.*;
25  import gate.gui.*;
26  import gate.creole.*;
27  import gate.event.*;
28  import java.util.*;
29  
30  /**
31    * Represents a complete CPSL grammar, with a phase name, options and
32    * rule set (accessible by name and by sequence).
33    * Implements a transduce method taking a Document as input.
34    * Constructs from String or File.
35    */
36  public class SinglePhaseTransducer
37  extends Transducer implements JapeConstants, java.io.Serializable
38  {
39    /** Debug flag */
40    private static final boolean DEBUG = false;
41  
42    /** Construction from name. */
43    public SinglePhaseTransducer(String name) {
44      this.name = name;
45      rules = new PrioritisedRuleList();
46      finishedAlready = false;
47    } // Construction from name
48  
49    /** Type of rule application (constants defined in JapeConstants). */
50    private int ruleApplicationStyle = BRILL_STYLE;
51  
52    /** Set the type of rule application (types defined in JapeConstants). */
53    public void setRuleApplicationStyle(int style) {
54      ruleApplicationStyle = style;
55    }
56  
57    /** The list of rules in this transducer. Ordered by priority and
58      * addition sequence (which will be file position if they come from
59      * a file).
60      */
61    private PrioritisedRuleList rules;
62  
63    FSM fsm;
64  
65    public FSM getFSM(){
66      return fsm;
67    }
68  
69    /** Add a rule. */
70    public void addRule(Rule rule) {
71      rules.add(rule);
72    } // addRule
73  
74    /** The values of any option settings given. */
75    private java.util.HashMap optionSettings = new java.util.HashMap();
76  
77    /** Add an option setting. If this option is set already, the new
78      * value overwrites the previous one.
79      */
80    public void setOption(String name, String setting) {
81      optionSettings.put(name, setting);
82    } // setOption
83  
84    /** Get the value for a particular option. */
85    public String getOption(String name) {
86      return (String) optionSettings.get(name);
87    } // getOption
88  
89    /** Whether the finish method has been called or not. */
90    private boolean finishedAlready;
91  
92    /** Finish: replace dynamic data structures with Java arrays; called
93      * after parsing.
94      */
95    public void finish() {
96      // both MPT and SPT have finish called on them by the parser...
97      if(finishedAlready)
98        return;
99      else
100       finishedAlready = true;
101 
102     for(Iterator i = rules.iterator(); i.hasNext(); )
103       ((Rule) i.next()).finish();
104     //build the finite state machine transition graph
105     fsm = new FSM(this);
106     //clear the old style data structures
107     rules.clear();
108     rules = null;
109   } // finish
110 
111 //dam: was
112 //  private void addAnnotationsByOffset(Map map, SortedSet keys, Set annotations){
113   private void addAnnotationsByOffset(/*Map map,*/ SimpleSortedSet keys, Set annotations){
114     Iterator annIter = annotations.iterator();
115     while(annIter.hasNext()){
116       Annotation ann = (Annotation)annIter.next();
117       //ignore empty annotations
118       long offset = ann.getStartNode().getOffset().longValue();
119       if(offset == ann.getEndNode().getOffset().longValue())
120         continue;
121 //dam: was
122 /*
123 //      Long offset = ann.getStartNode().getOffset();
124 
125       List annsAtThisOffset = null;
126       if(keys.add(offset)){
127         annsAtThisOffset = new LinkedList();
128         map.put(offset, annsAtThisOffset);
129       }else{
130         annsAtThisOffset = (List)map.get(offset);
131       }
132       annsAtThisOffset.add(ann);
133 */
134 //dam: end
135       keys.add(offset, ann);
136     }
137   }//private void addAnnotationsByOffset()
138 
139 
140   /**
141     * Transduce a document using the annotation set provided and the current
142     * rule application style.
143     */
144   public void transduce(Document doc, AnnotationSet inputAS,
145                         AnnotationSet outputAS) throws JapeException,
146                                                        ExecutionException {
147     interrupted = false;
148     fireProgressChanged(0);
149 
150     //the input annotations will be read from this map
151     //maps offset to list of annotations
152 
153 //dam was
154 /*
155     Map annotationsByOffset = new HashMap();
156 
157     SortedSet offsets = new TreeSet();
158 */
159 //dam: now
160     SimpleSortedSet offsets = new SimpleSortedSet();
161     SimpleSortedSet annotationsByOffset = offsets;
162 //dam: end
163 
164     //select only the annotations of types specified in the input list
165 //    Out.println("Input:" + input);
166     if(input.isEmpty())
167     {
168 //dam: was
169 //        addAnnotationsByOffset(annotationsByOffset, offsets, inputAS);
170 //dam: now
171         addAnnotationsByOffset(offsets, inputAS);
172 //dam: end
173     } else {
174       Iterator typesIter = input.iterator();
175       AnnotationSet ofOneType = null;
176       while(typesIter.hasNext()){
177         ofOneType = inputAS.get((String)typesIter.next());
178         if(ofOneType != null){
179 //dam: was
180 //        addAnnotationsByOffset(annotationsByOffset, offsets, ofOneType);
181 //dam: now
182           addAnnotationsByOffset(offsets, ofOneType);
183 //dam: end
184         }
185       }
186     }
187 
188     if(annotationsByOffset.isEmpty()){
189       fireProcessFinished();
190       return;
191     }
192 
193     annotationsByOffset.sort();
194     //define data structures
195     //FSM instances that haven't blocked yet
196 //    java.util.LinkedList activeFSMInstances = new java.util.LinkedList();
197     java.util.ArrayList activeFSMInstances = new java.util.ArrayList();
198 
199     // FSM instances that have reached a final state
200     // This is a sorted set and the contained objects are sorted by the length
201     // of the document content covered by the matched annotations
202 //dam: was ArrayList has faster add and remove methods then LinkedList
203 //    java.util.LinkedList acceptingFSMInstances = new LinkedList();
204 //dam: now
205     java.util.ArrayList acceptingFSMInstances = new ArrayList();
206 //dam: end
207     FSMInstance currentFSM;
208 
209 
210     //find the first node of the document
211     Node startNode = ((Annotation)
212                       ((ArrayList)annotationsByOffset.
213                              get(offsets.first())).get(0)).
214                       getStartNode();
215 
216     //used to calculate the percentage of processing done
217     long lastNodeOff = doc.getContent().size().longValue();
218 
219     //the offset of the node where the matching currently starts
220     //the value -1 marks no more annotations to parse
221     long startNodeOff = startNode.getOffset().longValue();
222 
223     //used to decide when to fire progress events
224     long oldStartNodeOff = 0;
225 
226     //the big while for the actual parsing
227     while(startNodeOff != -1){
228 //Out.prln();
229 //Out.pr("Start: " + startNodeOff);
230       //while there are more annotations to parse
231       //create initial active FSM instance starting parsing from new startNode
232       //currentFSM = FSMInstance.getNewInstance(
233       currentFSM = new FSMInstance(
234                   fsm,
235                   fsm.getInitialState(),//fresh start
236                   startNode,//the matching starts form the current startNode
237                   startNode,//current position in AG is the start position
238                   new java.util.HashMap(),//no bindings yet!
239                   doc
240                   );
241 
242       // at this point ActiveFSMInstances should always be empty!
243       activeFSMInstances.clear();
244       acceptingFSMInstances.clear();
245 //dam: was used LinkedList
246 //      activeFSMInstances.addLast(currentFSM);
247 //dam: now used ArrayList
248       activeFSMInstances.add(currentFSM);
249 //dam: end
250 
251       //far each active FSM Instance, try to advance
252       whileloop2:
253       while(!activeFSMInstances.isEmpty()){
254         if(interrupted) throw new ExecutionInterruptedException(
255           "The execution of the \"" + getName() +
256           "\" Jape transducer has been abruptly interrupted!");
257 
258 //Out.pr(" <" + acceptingFSMInstances.size() + "/" +
259 //              activeFSMInstances.size() +">");
260         // take the first active FSM instance
261         currentFSM = (FSMInstance)activeFSMInstances.remove(0);
262 
263         // process the current FSM instance
264         if(currentFSM.getFSMPosition().isFinal()){
265           //the current FSM is in a final state
266 //dam: was LinkedList
267 //          acceptingFSMInstances.addLast(currentFSM.clone());
268 //dam: now
269           acceptingFSMInstances.add(currentFSM.clone());
270 //dam: end
271 //          //if we are in APPELT mode clear all the accepting instances
272 //          //apart from the longest one
273 //          if(ruleApplicationStyle == APPELT_STYLE &&
274 //             acceptingFSMInstances.size() > 1){
275 //            Object longestAcceptor = acceptingFSMInstances.last();
276 //            acceptingFSMInstances.clear();
277 //            acceptingFSMInstances.add(longestAcceptor);
278 //          }
279           //if we're only looking for the shortest stop here
280           if(ruleApplicationStyle == FIRST_STYLE) break whileloop2;
281         }
282 
283         //get all the annotations that start where the current FSM finishes
284 //<<< DAM: was using SortedSet
285 //        SortedSet offsetsTailSet = offsets.tailSet(
286 //=== DAM: now
287         SimpleSortedSet offsetsTailSet = offsets.tailSet(
288 //>>> DAM: end
289                                     currentFSM.getAGPosition().getOffset().longValue());
290         ArrayList paths; //was linkedList
291 
292 //<<< DAM: SortedSet speedup
293 /*
294         if(offsetsTailSet.isEmpty()){
295           paths = new ArrayList();
296         }else{
297           paths = (List)annotationsByOffset.get(offsetsTailSet.first());
298         }
299 */
300 //=== DAM: now
301         long theFirst = offsetsTailSet.first();
302         if(theFirst <0)
303           continue;
304 
305           paths = (ArrayList)annotationsByOffset.get(theFirst);
306 //        }
307 //System.out.println("Paths: " + paths + "\n^localInputIndex: " + localInputIndex);
308 //>>> DAM: end
309 
310 //        if(!paths.isEmpty()){
311         if(paths.isEmpty()) continue;
312           Iterator pathsIter = paths.iterator();
313           Annotation onePath;
314           State currentState = currentFSM.getFSMPosition();
315           Iterator transitionsIter;
316 //DAM: doit without intermediate FetureMap
317 //        FeatureMap features = null;//Factory.newFeatureMap();
318           //foreach possible annotation
319           while(pathsIter.hasNext()){
320             onePath = (Annotation)pathsIter.next();
321             transitionsIter = currentState.getTransitions().iterator();
322             Transition currentTransition;
323             Constraint[] currentConstraints;
324             transitionsWhile:
325             while(transitionsIter.hasNext()){
326               currentTransition = (Transition)transitionsIter.next();
327               //check if the current transition can use the curent annotation (path)
328               currentConstraints =
329                            currentTransition.getConstraints().getConstraints();
330               String annType;
331 //DAM: introduce index of the constaint to process
332               int currentConstraintsindex = -1;
333               //we assume that all annotations in a contraint are of the same type
334               for(int i = 0; i<currentConstraints.length; i++){
335                 annType = currentConstraints[i].getAnnotType();
336                 //if wrong type try next transition
337                 if(!annType.equals(onePath.getType()))continue transitionsWhile;
338 //DAM: doit without intermediate FetureMap
339 //                features.clear();
340 //                features.putAll(currentConstraints[i].getAttributeSeq());
341                 currentConstraintsindex = i;
342                 break;
343               }
344 // >>> was
345 //              if(onePath.getFeatures().entrySet().containsAll(features.entrySet())){
346 // >>> NASO, FeatArray optimization
347               if(onePath.getFeatures().subsumes(
348 //dam: was
349 //                features
350 //dam: now
351                 currentConstraints[currentConstraintsindex].getAttributeSeq()
352 //dam: end
353                 )){
354 // >>> end NASO
355                 //we have a match
356   //System.out.println("Match!");
357                 //create a new FSMInstance, advance it over the current annotation
358                 //take care of the bindings  and add it to ActiveFSM
359                 FSMInstance newFSMI = (FSMInstance)currentFSM.clone();
360                 newFSMI.setAGPosition(onePath.getEndNode());
361                 newFSMI.setFSMPosition(currentTransition.getTarget());
362                 //bindings
363                 java.util.Map binds = newFSMI.getBindings();
364                 java.util.Iterator labelsIter =
365                                    currentTransition.getBindings().iterator();
366                 String oneLabel;
367                 AnnotationSet boundAnnots, newSet;
368                 while(labelsIter.hasNext()){
369                   oneLabel = (String)labelsIter.next();
370                   boundAnnots = (AnnotationSet)binds.get(oneLabel);
371                   if(boundAnnots != null)
372                     newSet = new AnnotationSetImpl((AnnotationSet)boundAnnots);
373                   else
374                     newSet = new AnnotationSetImpl(doc);
375                   newSet.add(onePath);
376                   binds.put(oneLabel, newSet);
377 
378                 }//while(labelsIter.hasNext())
379                 activeFSMInstances.add(newFSMI);
380 //Out.pr("^(" + newFSMI.getStartAGPosition().getOffset() +
381 //                               "->" + newFSMI.getAGPosition().getOffset() + ")");
382               }//if match
383             }//while(transitionsIter.hasNext())
384           }//while(pathsIter.hasNext())
385 // dam: reverse the paths.isEmpty check
386 //        }//if(paths != null)
387 // dam
388       }//while(!activeFSMInstances.isEmpty())
389 
390 
391       //FIRE THE RULE
392 //dam: use long
393 //      Long lastAGPosition = null;
394 //dam: now
395       long lastAGPosition = -1;
396 //dam: end
397       if(acceptingFSMInstances.isEmpty()){
398         //no rule to fire, advance to the next input offset
399         lastAGPosition = startNodeOff + 1;
400       } else if(ruleApplicationStyle == BRILL_STYLE) {
401       //System.out.println("Brill acceptor");
402         // fire the rules corresponding to all accepting FSM instances
403         java.util.Iterator accFSMs = acceptingFSMInstances.iterator();
404         FSMInstance currentAcceptor;
405         RightHandSide currentRHS;
406         lastAGPosition = startNode.getOffset().longValue();
407 
408         while(accFSMs.hasNext()){
409           currentAcceptor = (FSMInstance) accFSMs.next();
410 
411           currentRHS = currentAcceptor.getFSMPosition().getAction();
412           currentRHS.transduce(doc, outputAS, currentAcceptor.getBindings());
413 //dam: use long
414 //          Long currentAGPosition = currentAcceptor.getAGPosition().getOffset();
415 //dam: now
416           long currentAGPosition = currentAcceptor.getAGPosition().getOffset().longValue();
417 //dam: end
418           if(currentAGPosition > lastAGPosition)
419             lastAGPosition = currentAGPosition;
420         }
421 
422       } else if(ruleApplicationStyle == APPELT_STYLE ||
423                 ruleApplicationStyle == FIRST_STYLE) {
424 
425 //System.out.println("Appelt acceptor");
426         // AcceptingFSMInstances is an ordered structure:
427         // just execute the longest (last) rule
428 
429         Collections.sort(acceptingFSMInstances, Collections.reverseOrder());
430 
431         FSMInstance currentAcceptor =(FSMInstance)acceptingFSMInstances.get(0);//get(0)
432         if(isDebugMode()){
433           //see if we have any conflicts
434           Iterator accIter = acceptingFSMInstances.iterator();
435           FSMInstance anAcceptor;
436           List conflicts = new ArrayList();
437           while(accIter.hasNext()){
438             anAcceptor = (FSMInstance)accIter.next();
439             if(anAcceptor.equals(currentAcceptor)){
440               conflicts.add(anAcceptor);
441             }else{
442               break;
443             }
444           }
445           if(conflicts.size() > 1){
446             Out.prln("\nConflicts found during matching:" +
447                      "\n================================");
448             accIter = conflicts.iterator();
449             int i = 0;
450             while(accIter.hasNext()){
451               Out.prln(i++ + ") " + accIter.next().toString());
452             }
453           }
454         }
455 
456         RightHandSide currentRHS = currentAcceptor.getFSMPosition().getAction();
457         currentRHS.transduce(doc, outputAS, currentAcceptor.getBindings());
458         //advance in AG
459         lastAGPosition = currentAcceptor.getAGPosition().getOffset().longValue();
460       }
461 //      else if(ruleApplicationStyle == FIRST_STYLE) {
462 //        // AcceptingFSMInstances is an ordered structure:
463 //        // just execute the shortest (first) rule
464 //
465 //        FSMInstance currentAcceptor =(FSMInstance)acceptingFSMInstances.first();
466 //        RightHandSide currentRHS = currentAcceptor.getFSMPosition().getAction();
467 //        currentRHS.transduce(doc, outputAS, currentAcceptor.getBindings());
468 //        //advance in AG
469 //        long lastAGPosition = currentAcceptor.getAGPosition().
470 //                              getOffset().longValue();
471 //        //advance the index on input
472 //        while(inputIndex < annotations.size() &&
473 //              ((Annotation)annotations.get(inputIndex)).
474 //              getStartNode().getOffset().longValue() < lastAGPosition){
475 //          inputIndex++;
476 //        }
477 //      }
478       else throw new RuntimeException("Unknown rule application style!");
479 
480 
481       //advance on input
482 //      SortedSet OffsetsTailSet = offsets.tailSet(lastAGPosition);
483       SimpleSortedSet OffsetsTailSet = offsets.tailSet(lastAGPosition);
484 //<<< DAM: isEmpty speedup
485 /*
486       if(OffsetsTailSet.isEmpty()){
487 */
488 //=== DAM: now
489         long theFirst = OffsetsTailSet.first();
490       if( theFirst < 0){
491 //>>> DAM: end
492         //no more input, phew! :)
493         startNodeOff = -1;
494         fireProcessFinished();
495       }else{
496 //<<< DAM: use long
497 /*
498         Long nextKey = (Long)OffsetsTailSet.first();
499 */
500 //=== DAM: now
501         long nextKey = theFirst;
502 //>>> DAM: end
503         startNode = ((Annotation)
504                       ((ArrayList)annotationsByOffset.get(nextKey)).get(0)). //nextKey
505                     getStartNode();
506         startNodeOff = startNode.getOffset().longValue();
507 
508         //eliminate the possibility for infinite looping
509         if(oldStartNodeOff == startNodeOff){
510 //Out.prln("");
511 //Out.pr("SKIP " + startNodeOff);
512           //we are about to step twice in the same place, ...skip ahead
513           lastAGPosition = startNodeOff + 1;
514           OffsetsTailSet = offsets.tailSet(lastAGPosition);
515 //<<< DAM: isEmpty speedup
516 /*
517           if(OffsetsTailSet.isEmpty()){
518 */
519 //=== DAM: now
520           theFirst = OffsetsTailSet.first();
521           if(theFirst < 0){
522 //>>> DAM: end
523             //no more input, phew! :)
524             startNodeOff = -1;
525             fireProcessFinished();
526           }else{
527 //<<< DAM: use long
528 //            nextKey = (Long)OffsetsTailSet.first();
529 //=== DAM: now
530             nextKey = theFirst;
531 //>>> DAM: end
532             startNode = ((Annotation)
533                           ((List)annotationsByOffset.get(theFirst)).get(0)).
534                         getStartNode();
535             startNodeOff =startNode.getOffset().longValue();
536           }
537 //Out.prln(" ->" + startNodeOff);
538         }//if(oldStartNodeOff == startNodeOff)
539 
540 
541         //fire the progress event
542         if(startNodeOff - oldStartNodeOff > 256){
543           if(isInterrupted()) throw new ExecutionInterruptedException(
544             "The execution of the \"" + getName() +
545             "\" Jape transducer has been abruptly interrupted!");
546 
547           fireProgressChanged((int)(100 * startNodeOff / lastNodeOff));
548           oldStartNodeOff = startNodeOff;
549         }
550       }
551     }//while(startNodeOff != -1)
552     fireProcessFinished();
553   } // transduce
554 
555 
556 
557   /**
558     * Transduce a document using the annotation set provided and the current
559     * rule application style.
560     */
561   public void transduce1(Document doc, AnnotationSet annotationSet)
562                                                           throws JapeException {
563     fireProgressChanged(0);
564 
565     //the input annotations will be read from this set
566     AnnotationSet annotations = null;
567 
568     //select only the annotations of types specified in the input list
569     //Out.println("Input:" + input);
570     if(input.isEmpty()) annotations = annotationSet;
571     else{
572       Iterator typesIter = input.iterator();
573       AnnotationSet ofOneType = null;
574       while(typesIter.hasNext()){
575         ofOneType = annotationSet.get((String)typesIter.next());
576         if(ofOneType != null){
577     //Out.println("Adding " + ofOneType.getAllTypes());
578           if(annotations == null) annotations = ofOneType;
579           else annotations.addAll(ofOneType);
580         }
581       }
582     }
583     if(annotations == null) annotations = new AnnotationSetImpl(doc);
584     //Out.println("Actual input types: " + annotations.getAllTypes() + "\n"+
585     //         "Actual input values: " + annotations + "\n===================");
586     //INITIALISATION Should we move this someplace else?
587     //build the finite state machine transition graph
588     FSM fsm = new FSM(this);
589 
590 
591     //define data structures
592     //FSM instances that haven't blocked yet
593     java.util.LinkedList activeFSMInstances = new java.util.LinkedList();
594 
595     // FSM instances that have reached a final state
596     // This is a sorted set and the contained objects are sorted by the length
597     // of the document content covered by the matched annotations
598     java.util.SortedSet acceptingFSMInstances = new java.util.TreeSet();
599     FSMInstance currentFSM;
600 
601     // startNode: the node from the current matching attepmt starts.
602     // initially startNode = leftMost node
603     gate.Node startNode = annotations.firstNode();
604 
605     // if there are no annotations return
606     if(startNode == null) return;
607 
608     // The last node: where the parsing will stop
609     gate.Node lastNode = annotations.lastNode();
610     int oldStartNodeOff = 0;
611     int lastNodeOff = lastNode.getOffset().intValue();
612     int startNodeOff;
613     //the big while for the actual parsing
614     while(startNode != lastNode){
615       //System.out.println("Offset: " + startNode.getOffset());
616       //while there are more annotations to parse
617       //create initial active FSM instance starting parsing from new startNode
618       //currentFSM = FSMInstance.getNewInstance(
619       currentFSM = new FSMInstance(
620                   fsm,
621                   fsm.getInitialState(),//fresh start
622                   startNode,//the matching starts form the current startNode
623                   startNode,//current position in AG is the start position
624                   new java.util.HashMap(),//no bindings yet!
625                   doc
626                   );
627       // at this point ActiveFSMInstances should always be empty!
628       activeFSMInstances.addLast(currentFSM);
629         while(!activeFSMInstances.isEmpty()){
630         // while there are some "alive" FSM instances
631         // take the first active FSM instance
632         currentFSM = (FSMInstance)activeFSMInstances.removeFirst();
633         // process the current FSM instance
634         if(currentFSM.getFSMPosition().isFinal()){
635           // if the current FSM is in a final state
636           acceptingFSMInstances.add(currentFSM.clone());
637           //  Out.println("==========================\n" +
638           //                     "New Accepting FSM:\n" + currentFSM +
639           //                     "\n==========================");
640         }
641 
642         // this will (should) be optimised
643         State fsmPosition = currentFSM.getFSMPosition();
644         // System.out.println("Current FSM from:" +
645         //                 currentFSM.getStartAGPosition().getOffset() +
646         //                 " to " + currentFSM.getAGPosition().getOffset());
647 // >>> dam: was set
648 /*
649         java.util.Set transitions = fsmPosition.getTransitions();
650 */
651 // >>> dam: TransArray optimisation
652         gate.util.SimpleArraySet transitions = fsmPosition.getTransitions();
653 // >>> dam: end
654         java.util.Iterator transIter = transitions.iterator();
655         while(transIter.hasNext()){
656           // System.out.print("..");
657 
658           //for each transition, check if it is possible and "DO IT!"
659           Transition currentTrans = (Transition)transIter.next();
660           //holds all the matched annotations. In case of success all these
661           //annotations will be added to the bindings Map for the new
662           //FSMInstance
663           //...using LinkedList instead of HashSet because Annotation does not
664           //implement hashCode()
665 
666           //get an empty annotation set.
667           AnnotationSet matchedAnnots = new AnnotationSetImpl(doc);
668 
669           //maps String to gate.FeatureMap
670           java.util.Map constraintsByType = new java.util.HashMap();
671           Constraint[] currentConstraints =
672                        currentTrans.getConstraints().getConstraints();
673           String annType;
674           FeatureMap newAttributes, oldAttributes;
675 
676           for(int i=0; i < currentConstraints.length; i++){
677             annType = currentConstraints[i].getAnnotType();
678             newAttributes = currentConstraints[i].getAttributeSeq();
679             oldAttributes = (FeatureMap)constraintsByType.get(annType);
680             if(newAttributes == null){
681               if(oldAttributes == null){
682                 //no constraints about this type.
683                 constraintsByType.put(annType, Factory.newFeatureMap());
684               }
685             } else {
686               //newAttributes != null
687               if(oldAttributes != null) newAttributes.putAll(oldAttributes);
688               constraintsByType.put(annType, newAttributes);
689             }
690           }//for(int i=0; i < currentConstraints.length; i++)
691           //try to match all the constraints
692 
693           boolean success = true;
694           java.util.Iterator typesIter = constraintsByType.keySet().iterator();
695           AnnotationSet matchedHere = null;
696           Long offset;
697 
698           while(success && typesIter.hasNext()) {
699             //System.out.print("++");
700             //do a query for each annotation type
701             annType = (String)typesIter.next();
702             newAttributes = (FeatureMap)constraintsByType.get(annType);
703             offset = currentFSM.getAGPosition().getOffset();
704             matchedHere = annotations.get(annType,
705                                           newAttributes,
706                                           offset);
707             if(matchedHere == null || matchedHere.isEmpty()) success = false;
708             else{
709               // we have some matched annotations of the current type
710               // let's add them to the list of matched annotations
711               matchedAnnots.addAll(matchedHere);
712             }
713           } // while(success && typesIter.hasNext())
714           if(success){
715             // System.out.println("Success!");
716             // create a new FSMInstance, make it advance in AG and in FSM,
717             // take care of its bindings data structure and
718             // add it to the list of active FSMs.
719             FSMInstance newFSMI = (FSMInstance)currentFSM.clone();
720             newFSMI.setAGPosition(matchedAnnots.lastNode());
721             newFSMI.setFSMPosition(currentTrans.getTarget());
722             // do the bindings
723 
724             // all the annotations matched here will be added to the sets
725             // corresponding to the labels in this list in case of succesful
726             // matching
727             java.util.Iterator labelsIter =
728                                           currentTrans.getBindings().iterator();
729             AnnotationSet oldSet, newSet;
730             String label;
731             java.util.Map binds = newFSMI.getBindings();
732             while(labelsIter.hasNext()){
733               // for each label add the set of matched annotations to the set of
734               // annotations currently bound to that name
735               label = (String)labelsIter.next();
736               oldSet = (AnnotationSet)binds.get(label);
737               if(oldSet != null){
738                 newSet = new AnnotationSetImpl(oldSet);
739                 newSet.addAll(matchedAnnots);
740               }else{
741                 newSet = new AnnotationSetImpl(matchedAnnots);
742               }
743               binds.put(label, newSet);
744             } // while(labelsIter.hasNext())
745             activeFSMInstances.addLast(newFSMI);
746           }
747         } // while(transIter.hasNext())
748        // return currentFSM to the rightful owner :)
749        // FSMInstance.returnInstance(currentFSM);
750        } // while(!activeFSMInstances.isEmpty())
751 
752        //FIRE THE RULE
753       if(acceptingFSMInstances.isEmpty()){
754         // System.out.println("\nNo match...");
755         // advance to next relevant node in the Annotation Graph
756         startNode = annotations.nextNode(startNode);
757 
758         // System.out.println("111111111");
759 
760         // check to see if there are any annotations starting here
761         AnnotationSet annSet = annotations.get(startNode.getOffset());
762 
763         // System.out.println("22222222");
764 
765         if(annSet == null || annSet.isEmpty()){
766           // System.out.println("No way to advance... Bail!");
767           // no more starting annotations beyond this point
768           startNode = lastNode;
769         } else {
770           // System.out.print("Advancing...");
771           // advance to the next node that has starting annotations
772           startNode = ((Annotation)annSet.iterator().next()).getStartNode();
773           // System.out.println("done");
774         }
775 
776       /*
777         AnnotationSet res = annotations.get(startNode.getOffset());
778         if(!res.isEmpty())
779           startNode = ((Annotation)res.iterator().next()).getStartNode();
780         else startNode = lastNode;
781       */
782       } else if(ruleApplicationStyle == BRILL_STYLE) {
783         // fire the rules corresponding to all accepting FSM instances
784         java.util.Iterator accFSMs = acceptingFSMInstances.iterator();
785         FSMInstance currentAcceptor;
786         RightHandSide currentRHS;
787 //        long lastAGPosition = startNode.getOffset().longValue();
788         int lastAGPosition = startNode.getOffset().intValue();
789         //  Out.println("XXXXXXXXXXXXXXXXXXXX All the accepting FSMs are:");
790 
791         while(accFSMs.hasNext()){
792           currentAcceptor = (FSMInstance) accFSMs.next();
793           //  Out.println("==========================\n" +
794                   //                     currentAcceptor +
795                   //                     "\n==========================");
796 
797           currentRHS = currentAcceptor.getFSMPosition().getAction();
798           currentRHS.transduce(doc, annotations, currentAcceptor.getBindings());
799 
800 //          long currentAGPosition =
801 //               currentAcceptor.getAGPosition().getOffset().longValue();
802           int currentAGPosition =
803                currentAcceptor.getAGPosition().getOffset().intValue();
804           if(lastAGPosition <= currentAGPosition){
805             startNode = currentAcceptor.getAGPosition();
806             lastAGPosition = currentAGPosition;
807           }
808         }
809       // Out.println("XXXXXXXXXXXXXXXXXXXX");
810       } else if(ruleApplicationStyle == APPELT_STYLE) {
811         // AcceptingFSMInstances is an ordered structure:
812         // just execute the longest (last) rule.
813         FSMInstance currentAcceptor =
814                                     (FSMInstance)acceptingFSMInstances.last();
815         RightHandSide currentRHS = currentAcceptor.getFSMPosition().getAction();
816         currentRHS.transduce(doc, annotations, currentAcceptor.getBindings());
817         //advance in AG
818         startNode = currentAcceptor.getAGPosition();
819 
820       } else throw new RuntimeException("Unknown rule application style!");
821       //release all the accepting instances as they have done their job
822       /*
823         Iterator acceptors = acceptingFSMInstances.iterator();
824         while(acceptors.hasNext())
825         FSMInstance.returnInstance((FSMInstance)acceptors.next());
826       */
827       acceptingFSMInstances.clear();
828       startNodeOff = startNode.getOffset().intValue();
829 
830       if(startNodeOff - oldStartNodeOff > 1024){
831         fireProgressChanged(100 * startNodeOff / lastNodeOff);
832         oldStartNodeOff = startNodeOff;
833       }
834     } // while(startNode != lastNode)
835     // FSMInstance.clearInstances();
836     fireProcessFinished();
837   } // transduce
838 
839 
840 
841 
842 
843 
844 
845   /**
846     * Transduce a document using the annotation set provided and the current
847     * rule application style.
848     */
849   public void transduce2(Document doc, AnnotationSet inputAS,
850                         AnnotationSet outputAS) throws JapeException {
851     fireProgressChanged(0);
852 
853     //the input annotations will be read from this set
854     AnnotationSet annotations = null;
855 
856     //select only the annotations of types specified in the input list
857     //Out.println("Input:" + input);
858     if(input.isEmpty()) annotations = inputAS;
859     else{
860       annotations = new AnnotationSetImpl(doc);
861       Iterator typesIter = input.iterator();
862       AnnotationSet ofOneType = null;
863       while(typesIter.hasNext()){
864         ofOneType = inputAS.get((String)typesIter.next());
865         if(ofOneType != null){
866     //Out.println("Adding " + ofOneType.getAllTypes());
867           annotations.addAll(ofOneType);
868         }
869       }
870     }
871     if(annotations == null) annotations = new AnnotationSetImpl(doc);
872 
873     //define data structures
874     //FSM instances that haven't blocked yet
875     java.util.LinkedList activeFSMInstances = new java.util.LinkedList();
876 
877     // FSM instances that have reached a final state
878     // This is a sorted set and the contained objects are sorted by the length
879     // of the document content covered by the matched annotations
880     java.util.SortedSet acceptingFSMInstances = new java.util.TreeSet();
881     FSMInstance currentFSM;
882 
883     // startNode: the node from the current matching attepmt starts.
884     // initially startNode = leftMost node
885     gate.Node startNode = annotations.firstNode();
886 
887     // if there are no annotations return
888     if(startNode == null) return;
889 
890     // The last node: where the parsing will stop
891     gate.Node lastNode = annotations.lastNode();
892     int oldStartNodeOff = 0;
893     int lastNodeOff = lastNode.getOffset().intValue();
894     int startNodeOff;
895     //the big while for the actual parsing
896     while(startNode != lastNode){
897       //while there are more annotations to parse
898       //create initial active FSM instance starting parsing from new startNode
899       //currentFSM = FSMInstance.getNewInstance(
900       currentFSM = new FSMInstance(
901                   fsm,
902                   fsm.getInitialState(),//fresh start
903                   startNode,//the matching starts form the current startNode
904                   startNode,//current position in AG is the start position
905                   new java.util.HashMap(),//no bindings yet!
906                   doc
907                   );
908       // at this point ActiveFSMInstances should always be empty!
909       activeFSMInstances.clear();
910       acceptingFSMInstances.clear();
911       activeFSMInstances.addLast(currentFSM);
912       whileloop2:
913       while(!activeFSMInstances.isEmpty()){
914 //System.out.println("Active instances " + activeFSMInstances.size());
915         // while there are some "alive" FSM instances
916         // take the first active FSM instance
917         currentFSM = (FSMInstance)activeFSMInstances.removeFirst();
918         // process the current FSM instance
919         if(currentFSM.getFSMPosition().isFinal()){
920           // if the current FSM is in a final state
921           acceptingFSMInstances.add(currentFSM.clone());
922           //if we are in APPELT mode clear all the accepting instances
923           //apart from the longest one
924           if(ruleApplicationStyle == APPELT_STYLE &&
925              acceptingFSMInstances.size() > 1){
926             Object longestAcceptor = acceptingFSMInstances.last();
927             acceptingFSMInstances.clear();
928             acceptingFSMInstances.add(longestAcceptor);
929           }
930           //if we're only looking for the shortest stop here
931           if(ruleApplicationStyle == FIRST_STYLE) break whileloop2;
932         }
933         //all the annotations that start from the current node.
934         AnnotationSet paths = annotations.get(
935                                 currentFSM.getAGPosition().getOffset());
936         if(paths != null){
937           Iterator pathsIter = paths.iterator();
938           Annotation onePath;
939           State currentState = currentFSM.getFSMPosition();
940           Iterator transitionsIter;
941           FeatureMap features = Factory.newFeatureMap();
942           //foreach possible annotation
943           while(pathsIter.hasNext()){
944             onePath = (Annotation)pathsIter.next();
945             transitionsIter = currentState.getTransitions().iterator();
946             Transition currentTransition;
947             Constraint[] currentConstraints;
948             transitionsWhile:
949             while(transitionsIter.hasNext()){
950               currentTransition = (Transition)transitionsIter.next();
951               //check if the current transition can use the curent annotation (path)
952               currentConstraints =
953                            currentTransition.getConstraints().getConstraints();
954               String annType;
955               //we assume that all annotations in a contraint are of the same type
956               for(int i = 0; i<currentConstraints.length; i++){
957                 annType = currentConstraints[i].getAnnotType();
958                 //if wrong type try next transition
959                 if(!annType.equals(onePath.getType()))continue transitionsWhile;
960                 features.clear();
961                 features.putAll(currentConstraints[i].getAttributeSeq());
962               }
963               if(onePath.getFeatures().entrySet().containsAll(features.entrySet())){
964                 //we have a match
965   //System.out.println("Match!");
966                 //create a new FSMInstance, advance it over the current annotation
967                 //take care of the bindings  and add it to ActiveFSM
968                 FSMInstance newFSMI = (FSMInstance)currentFSM.clone();
969                 newFSMI.setAGPosition(onePath.getEndNode());
970                 newFSMI.setFSMPosition(currentTransition.getTarget());
971                 //bindings
972                 java.util.Map binds = newFSMI.getBindings();
973                 java.util.Iterator labelsIter =
974                                    currentTransition.getBindings().iterator();
975                 String oneLabel;
976                 AnnotationSet boundAnnots, newSet;
977                 while(labelsIter.hasNext()){
978                   oneLabel = (String)labelsIter.next();
979                   boundAnnots = (AnnotationSet)binds.get(oneLabel);
980                   if(boundAnnots != null){
981                     newSet = new AnnotationSetImpl(boundAnnots);
982                   }else{
983                     newSet = new AnnotationSetImpl(doc);
984                   }
985                   newSet.add(onePath);
986                   binds.put(oneLabel, newSet);
987                 }//while(labelsIter.hasNext())
988                 activeFSMInstances.addLast(newFSMI);
989               }//if match
990             }//while(transitionsIter.hasNext())
991           }//while(pathsIter.hasNext())
992         }//if(paths != null)
993       }//while(!activeFSMInstances.isEmpty())
994 
995       Node newStartNode = null;
996       //FIRE THE RULE
997       if(acceptingFSMInstances.isEmpty()){
998 //System.out.println("No acceptor");
999         //no rule to fire just advance to next relevant node in the
1000        //Annotation Graph
1001//System.out.print(startNode.getOffset());
1002        newStartNode = annotations.nextNode(startNode);
1003//System.out.println("->" + startNode.getOffset());
1004        // check to see if there are any annotations starting here
1005        AnnotationSet annSet = annotations.get(newStartNode.getOffset());
1006
1007        if(annSet == null || annSet.isEmpty()){
1008          // no more starting annotations beyond this point
1009          newStartNode = lastNode;
1010        } else {
1011          // advance to the next node that has starting annotations
1012          newStartNode = ((Annotation)annSet.iterator().next()).getStartNode();
1013        }
1014      } else if(ruleApplicationStyle == BRILL_STYLE) {
1015      //System.out.println("Brill acceptor");
1016        // fire the rules corresponding to all accepting FSM instances
1017        java.util.Iterator accFSMs = acceptingFSMInstances.iterator();
1018        FSMInstance currentAcceptor;
1019        RightHandSide currentRHS;
1020//        long lastAGPosition = startNode.getOffset().longValue();
1021        int lastAGPosition = startNode.getOffset().intValue();
1022        //  Out.println("XXXXXXXXXXXXXXXXXXXX All the accepting FSMs are:");
1023
1024        while(accFSMs.hasNext()){
1025          currentAcceptor = (FSMInstance) accFSMs.next();
1026          //  Out.println("==========================\n" +
1027                  //                     currentAcceptor +
1028                  //                     "\n==========================");
1029
1030          currentRHS = currentAcceptor.getFSMPosition().getAction();
1031          currentRHS.transduce(doc, outputAS, currentAcceptor.getBindings());
1032
1033//          long currentAGPosition =
1034//               currentAcceptor.getAGPosition().getOffset().longValue();
1035          int currentAGPosition =
1036               currentAcceptor.getAGPosition().getOffset().intValue();
1037          if(lastAGPosition <= currentAGPosition){
1038            newStartNode = currentAcceptor.getAGPosition();
1039            lastAGPosition = currentAGPosition;
1040          }
1041        }
1042      // Out.println("XXXXXXXXXXXXXXXXXXXX");
1043      } else if(ruleApplicationStyle == APPELT_STYLE) {
1044//System.out.println("Appelt acceptor");
1045        // AcceptingFSMInstances is an ordered structure:
1046        // just execute the longest (last) rule
1047
1048        FSMInstance currentAcceptor =(FSMInstance)acceptingFSMInstances.last();
1049        RightHandSide currentRHS = currentAcceptor.getFSMPosition().getAction();
1050        currentRHS.transduce(doc, outputAS, currentAcceptor.getBindings());
1051        //advance in AG
1052        newStartNode = currentAcceptor.getAGPosition();
1053
1054      } else if(ruleApplicationStyle == FIRST_STYLE) {
1055//System.out.println("Appelt acceptor");
1056        // AcceptingFSMInstances is an ordered structure:
1057        // just execute the shortest (first) rule
1058
1059        FSMInstance currentAcceptor =(FSMInstance)acceptingFSMInstances.first();
1060        RightHandSide currentRHS = currentAcceptor.getFSMPosition().getAction();
1061        currentRHS.transduce(doc, outputAS, currentAcceptor.getBindings());
1062        //advance in AG
1063//System.out.print(startNode.getOffset());
1064        newStartNode = currentAcceptor.getAGPosition();
1065//System.out.println("->" + startNode.getOffset());
1066      } else throw new RuntimeException("Unknown rule application style!");
1067       startNodeOff = newStartNode.getOffset().intValue();
1068
1069      //fire the progress event
1070      if(startNodeOff - oldStartNodeOff > 1024){
1071        fireProgressChanged(100 * startNodeOff / lastNodeOff);
1072        oldStartNodeOff = startNodeOff;
1073      }
1074      if(startNode == newStartNode){
1075        //no advance: we probably matched some annotations that cover no text
1076        //we should force the advance
1077        Err.prln("Infinite loop detected in grammar " + getName() +
1078                 " at position " + startNodeOff + " in " + doc.getSourceUrl() +
1079                 "!\nAdvancing forced!");
1080        newStartNode = annotations.nextNode(startNode);
1081        // check to see if there are any annotations starting here
1082        AnnotationSet annSet = annotations.get(newStartNode.getOffset());
1083        if(annSet == null || annSet.isEmpty()){
1084          // no more starting annotations beyond this point
1085          newStartNode = lastNode;
1086        } else {
1087          // advance to the next node that has starting annotations
1088          newStartNode = ((Annotation)annSet.iterator().next()).getStartNode();
1089        }
1090      }
1091      startNode = newStartNode;
1092//System.out.println("->" + startNodeOff);
1093    } // while(startNode != lastNode)
1094    // FSMInstance.clearInstances();
1095    fireProcessFinished();
1096  } // transduce
1097
1098
1099  //###############end modified versions
1100
1101  /** Clean up (delete action class files, for e.g.). */
1102  public void cleanUp() {
1103//    for(DListIterator i = rules.begin(); ! i.atEnd(); i.advance())
1104//      ((Rule) i.get()).cleanUp();
1105  } // cleanUp
1106
1107  /** A string representation of this object. */
1108  public String toString() {
1109    return toString("");
1110  } // toString()
1111
1112  /** A string representation of this object. */
1113  public String toString(String pad) {
1114    String newline = Strings.getNl();
1115    String newPad = Strings.addPadding(pad, INDENT_PADDING);
1116
1117    StringBuffer buf =
1118      new StringBuffer(pad + "SPT: name(" + name + "); ruleApplicationStyle(");
1119
1120    switch(ruleApplicationStyle) {
1121      case APPELT_STYLE: buf.append("APPELT_STYLE); "); break;
1122      case BRILL_STYLE:  buf.append("BRILL_STYLE); ");  break;
1123      default: break;
1124    }
1125
1126    buf.append("rules(" + newline);
1127    Iterator rulesIterator = rules.iterator();
1128    while(rulesIterator.hasNext())
1129      buf.append(((Rule) rulesIterator.next()).toString(newPad) + " ");
1130
1131    buf.append(newline + pad + ")." + newline);
1132
1133    return buf.toString();
1134  } // toString(pad)
1135
1136  //needed by fsm
1137  public PrioritisedRuleList getRules() {
1138    return rules;
1139  }
1140
1141  /**
1142    * Adds a new type of input annotations used by this transducer.
1143    * If the list of input types is empty this transducer will parse all the
1144    * annotations in the document otherwise the types not found in the input
1145    * list will be completely ignored! To be used with caution!
1146    */
1147  public void addInput(String ident) {
1148    input.add(ident);
1149  }
1150  public synchronized void removeProgressListener(ProgressListener l) {
1151    if (progressListeners != null && progressListeners.contains(l)) {
1152      Vector v = (Vector) progressListeners.clone();
1153      v.removeElement(l);
1154      progressListeners = v;
1155    }
1156  }
1157  public synchronized void addProgressListener(ProgressListener l) {
1158    Vector v = progressListeners == null ? new Vector(2) : (Vector) progressListeners.clone();
1159    if (!v.contains(l)) {
1160      v.addElement(l);
1161      progressListeners = v;
1162    }
1163  }
1164
1165  /**
1166    * Defines the types of input annotations that this transducer reads. If this
1167    * set is empty the transducer will read all the annotations otherwise it
1168    * will only "see" the annotations of types found in this list ignoring all
1169    * other types of annotations.
1170    */
1171  java.util.Set input = new java.util.HashSet();
1172  private transient Vector progressListeners;
1173
1174  protected void fireProgressChanged(int e) {
1175    if (progressListeners != null) {
1176      Vector listeners = progressListeners;
1177      int count = listeners.size();
1178      for (int i = 0; i < count; i++) {
1179        ((ProgressListener) listeners.elementAt(i)).progressChanged(e);
1180      }
1181    }
1182  }
1183  protected void fireProcessFinished() {
1184    if (progressListeners != null) {
1185      Vector listeners = progressListeners;
1186      int count = listeners.size();
1187      for (int i = 0; i < count; i++) {
1188        ((ProgressListener) listeners.elementAt(i)).processFinished();
1189      }
1190    }
1191  }
1192  public int getRuleApplicationStyle() {
1193    return ruleApplicationStyle;
1194  }
1195
1196  /*
1197  private void writeObject(ObjectOutputStream oos) throws IOException {
1198    Out.prln("writing spt");
1199    oos.defaultWriteObject();
1200    Out.prln("finished writing spt");
1201  } // writeObject
1202  */
1203
1204
1205} // class SinglePhaseTransducer
1206
1207/*
1208class SimpleSortedSet {
1209
1210    static final int INCREMENT = 1023;
1211    int[] theArray = new int[INCREMENT];
1212    Object[] theObject = new Object[INCREMENT];
1213    int tsindex = 0;
1214    int size = 0;
1215    public static int avesize = 0;
1216    public static int maxsize = 0;
1217    public static int avecount = 0;
1218    public SimpleSortedSet()
1219    {
1220        avecount++;
1221        java.util.Arrays.fill(theArray, Integer.MAX_VALUE);
1222    }
1223
1224    public Object get(int elValue)
1225    {
1226        int index = java.util.Arrays.binarySearch(theArray, elValue);
1227        if (index >=0)
1228            return theObject[index];
1229        return null;
1230    }
1231
1232    public boolean add(int elValue, Object o)
1233    {
1234        int index = java.util.Arrays.binarySearch(theArray, elValue);
1235        if (index >=0)
1236        {
1237            ((ArrayList)theObject[index]).add(o);
1238            return false;
1239        }
1240        if (size == theArray.length)
1241        {
1242            int[] temp = new int[theArray.length + INCREMENT];
1243            Object[] tempO = new Object[theArray.length + INCREMENT];
1244            System.arraycopy(theArray, 0, temp, 0, theArray.length);
1245            System.arraycopy(theObject, 0, tempO, 0, theArray.length);
1246            java.util.Arrays.fill(temp, theArray.length, temp.length , Integer.MAX_VALUE);
1247            theArray = temp;
1248            theObject = tempO;
1249        }
1250        index = ~index;
1251        System.arraycopy(theArray, index, theArray, index+1, size - index );
1252        System.arraycopy(theObject, index, theObject, index+1, size - index );
1253        theArray[index] = elValue;
1254        theObject[index] = new ArrayList();
1255        ((ArrayList)theObject[index]).add(o);
1256        size++;
1257        return true;
1258    }
1259    public int first()
1260    {
1261        if (tsindex >= size) return -1;
1262        return theArray[tsindex];
1263    }
1264
1265    public Object getFirst()
1266    {
1267        if (tsindex >= size) return null;
1268        return theObject[tsindex];
1269    }
1270
1271    public SimpleSortedSet tailSet(int elValue)
1272    {
1273        if (tsindex < theArray.length && elValue != theArray[tsindex])
1274        {
1275            if (tsindex<(size-1) && elValue > theArray[tsindex] &&
1276                elValue <= theArray[tsindex+1])
1277                {
1278                    tsindex++;
1279                   return this;
1280                }
1281            int index = java.util.Arrays.binarySearch(theArray, elValue);
1282            if (index < 0)
1283                index = ~index;
1284            tsindex = index;
1285        }
1286        return this;
1287    }
1288
1289    public boolean isEmpty()
1290    {
1291        return size ==0;
1292    }
1293};
1294*/