1   /*
2    *  TestJape2.java (Java Annotation Patterns Engine)
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 23/02/2000
12   *
13   *  $Id: TestJape2.java,v 1.10 2001/09/13 12:09:50 kalina Exp $
14   *
15   *  Description: Test class for JAPE.
16   */
17  
18  package gate.jape;
19  
20  import java.util.*;
21  import java.io.*;
22  
23  
24  import gate.jape.parser.*;
25  import gate.*;
26  import gate.annotation.*;
27  import gate.util.*;
28  import gate.creole.*;
29  
30  /**
31    * Second test harness for JAPE.
32    * Uses the Sheffield Tokeniser and Gazetteer, and must be run
33    * from the gate directory.
34    * @author Hamish Cunningham
35    */
36  public class TestJape2 {
37  
38    /** Debug flag */
39    private static final boolean DEBUG = false;
40  
41    /** How much noise to make. */
42    static private boolean verbose = false;
43  
44  
45    /** Take a list of text files and a collection name, and
46      * call tokeniser/gazetteer/jape on them, creating the
47      * collection.
48      */
49    static public void main(String[] args) {
50  
51      // turn debug output on/off
52      //Debug.setDebug(true);
53      //Debug.setDebug(AnnotationSet.class, true);
54      //Debug.setDebug(BasicPatternElement.class, true);
55      //Debug.setDebug(ComplexPatternElement.class, true);
56      //Debug.setDebug(ConstraintGroup.class, true);
57      //Debug.setDebug(SinglePhaseTransducer.class, true);
58  
59      // variables to parse the command line options into
60      String collName = null;
61      String japeName = null;
62      ArrayList fileNames = null;
63  
64      // process options
65      for(int i=0; i<args.length; i++) {
66        if(args[i].equals("-c") && ++i < args.length) // -c = coll name
67          collName = args[i];
68        else if(args[i].equals("-j") && ++i < args.length) // -j: .jape name
69          japeName = args[i];
70        else if(args[i].equals("-v")) // -v = verbose
71          verbose = true;
72        else { // a list of files
73          fileNames = new ArrayList();
74          do {
75            fileNames.add(args[i++]);
76          } while(i < args.length);
77        }
78      } // for each arg
79  
80      // did they give valid options?
81      message("checking options");
82      if(collName == null || japeName == null || fileNames == null)
83        usage("you must supply collection, transducer and file names");
84  
85      // create a collection and run the tokeniser
86      message("creating coll, tokenising and gazetteering");
87      Corpus coll = null;
88      try {
89        coll = tokAndGaz(collName, fileNames);
90      } catch(ResourceInstantiationException e) {
91        usage("couldn't open collection: " + e);
92      }
93  /*
94      // run the parser test
95      message("parsing the .jape file (or deserialising the .ser file)");
96      Batch batch = null;
97      try { batch = new Batch(japeName);
98      } catch(JapeException e) {
99        usage("can't create transducer " + e.getMessage());
100     }
101 */
102     /*Transducer transducer = parseJape(japeName);
103     //Out.println(transducer);
104     if(transducer == null)
105       System.exit(1);*/
106 
107     // test the transducers from the parser
108 /*
109     message("running the transducer");
110     try { batch.transduce(coll); } catch(JapeException e) {
111       usage("couldn't run transducer " + e.getMessage());
112     }
113     //runTransducer(transducer, coll);
114     //Out.println(transducer);
115 
116     message("done\n\r");
117     //System.exit(0);
118 */
119   } // main
120 
121 
122   /**
123     * Create a collection and put tokenised and gazetteered docs in it.
124     */
125   static public Corpus tokAndGaz(String collName, ArrayList fileNames)
126   throws ResourceInstantiationException {
127 
128     // create or overwrite the collection
129     Corpus collection = null;
130     File collDir = new File(collName);
131     collection = Factory.newCorpus(
132       collDir.getAbsolutePath()
133     );
134 
135     // add all the documents
136     for(Iterator i = fileNames.iterator(); i.hasNext(); ) {
137       String fname = (String) i.next();
138 
139       File f = new File(fname);
140       FeatureMap attrs = Factory.newFeatureMap();
141       Document doc = null;
142 
143       try {
144         AnnotationSet annots = new AnnotationSetImpl(doc);
145         collection.add(
146           Factory.newDocument(f.getAbsolutePath())
147         );
148       } catch(ResourceInstantiationException e) {
149         e.printStackTrace();
150       }
151 
152       /*
153       // Tokenise the document
154       Tokeniser tokeniser = new Tokeniser(doc, Tokeniser.HMM);
155       try { tokeniser.hmmTokenSequence(); }
156       catch(sheffield.creole.tokeniser.ParseException ex) {
157         ex.printStackTrace();
158         return null;
159       } catch (CreoleException ex) {
160         ex.printStackTrace();
161         return null;
162       }
163 
164       // Gazetteer the document
165       gate.creole.Annotator gazetteer = new GazetteerAnnotator();
166       gazetteer.annotate(doc, null);
167       */
168     } // for each doc name
169 
170     // return the annotated collection
171     return collection;
172 
173   } //tokAndGaz
174 
175 
176   /**
177     * Must be run from the gate directory.
178     * Parse the .jape file.
179     */
180     /*
181     static public Transducer parseJape(String japeName) {
182     Transducer transducer = null;
183 
184     if(japeName.endsWith(".ser")) { // it's compiled already
185       message("deserialising " + japeName);
186       File f = new File(japeName);
187       if(! f.exists())
188         Out.println(japeName + " not found");
189 
190       try {
191         FileInputStream fis = new FileInputStream(f.getPath());
192         ObjectInputStream ois = new ObjectInputStream(fis);
193         transducer = (Transducer) ois.readObject();
194         ois.close();
195       } catch (Exception ex) {
196         Err.println(
197           "Can't read from " + f.getName() + ": " + ex.toString()
198         );
199       }
200     } else { // parse it
201       message("parsing " + japeName);
202       try {
203         ParseCpsl cpslParser = new ParseCpsl(japeName);
204         transducer = cpslParser.MultiPhaseTransducer();
205       } catch(IOException e) {
206         e.printStackTrace();
207       } catch(gate.jape.parser.ParseException ee) {
208         Err.println("Error parsing transducer: " + ee.getMessage());
209       }
210     }
211 
212     return transducer;
213   } // parseJape
214 
215 
216   static public void runTransducer(
217     Transducer transducer, Corpus coll
218   ) {
219 
220     try {
221       Document doc = coll.firstDocument();
222       do {
223         message("doing document " + doc.getId());
224         transducer.transduce(doc);
225         // Out.println(transducer.toString());
226       } while( (doc = coll.nextDocument()) != null );
227     } catch(JdmException e) {
228       e.printStackTrace();
229     } catch(JapeException e) {
230       e.printStackTrace();
231     }
232   } // runTransducer
233   */
234 
235   /** You got something wrong, dumbo. */
236   public static void usage(String errorMessage) {
237     String usageMessage =
238       "usage: java gate.jape.TestJape2.main [-v] " +
239         "-j JapePatternFile -c CollectionName FileName(s)";
240 
241     Err.println(errorMessage);
242     Err.println(usageMessage);
243     //System.exit(1);
244 
245   } // usage
246 
247 
248   /** Hello? Anybody there?? */
249   public static void message(String mess) {
250     if(verbose) Out.println("TestJape2: " + mess);
251   } // message
252 
253 } // class TestJape2
254 
255 
256 // $Log: TestJape2.java,v $
257 // Revision 1.10  2001/09/13 12:09:50  kalina
258 // Removed completely the use of jgl.objectspace.Array and such.
259 // Instead all sources now use the new Collections, typically ArrayList.
260 // I ran the tests and I ran some documents and compared with keys.
261 // JAPE seems to work well (that's where it all was). If there are problems
262 // maybe look at those new structures first.
263 //
264 // Revision 1.9  2001/02/08 13:46:06  valyt
265 // Added full Unicode support for the gazetteer and Jape
266 // converted the gazetteer files to UTF-8
267 //
268 // Revision 1.8  2001/01/30 14:18:02  hamish
269 // fixed some hard-coded paths
270 //
271 // Revision 1.7  2000/11/08 16:35:04  hamish
272 // formatting
273 //
274 // Revision 1.6  2000/10/26 10:45:31  oana
275 // Modified in the code style
276 //
277 // Revision 1.5  2000/10/23 21:50:42  hamish
278 // cleaned up exception handling in gate.creole and added
279 // ResourceInstantiationException;
280 //
281 // changed Factory.newDocument(URL u) to use the new instantiation
282 // facilities;
283 //
284 // added COMMENT to resource metadata / ResourceData;
285 //
286 // changed Document and DocumentImpl to follow beans style, and moved
287 // constructor logic to init(); changed all the Factory newDocument methods to
288 // use the new resource creation stuff;
289 //
290 // added builtin document and corpus metadata to creole/creole.xml (copied from
291 // gate.ac.uk/tests/creole.xml);
292 //
293 // changed Corpus to the new style too;
294 //
295 // removed CreoleRegister.init()
296 //
297 // Revision 1.4  2000/10/18 13:26:48  hamish
298 // Factory.createResource now working, with a utility method that uses reflection (via java.beans.Introspector) to set properties on a resource from the
299 //     parameter list fed to createResource.
300 //     resources may now have both an interface and a class; they are indexed by interface type; the class is used to instantiate them
301 //     moved createResource from CR to Factory
302 //     removed Transients; use Factory instead
303 //
304 // Revision 1.3  2000/10/16 16:44:34  oana
305 // Changed the comment of DEBUG variable
306 //
307 // Revision 1.2  2000/10/10 15:36:37  oana
308 // Changed System.out in Out and System.err in Err;
309 // Added the DEBUG variable seted on false;
310 // Added in the header the licence;
311 //
312 // Revision 1.1  2000/02/23 13:46:12  hamish
313 // added
314 //
315 // Revision 1.1.1.1  1999/02/03 16:23:03  hamish
316 // added gate2
317 //
318 // Revision 1.9  1998/10/29 12:13:55  hamish
319 // reorganised to use Batch
320 //
321 // Revision 1.8  1998/10/01 16:06:41  hamish
322 // new appelt transduction style, replacing buggy version
323 //
324 // Revision 1.7  1998/09/26 09:19:21  hamish
325 // added cloning of PE macros
326 //
327 // Revision 1.6  1998/09/23 12:48:03  hamish
328 // negation added; noncontiguous BPEs disallowed
329 //
330 // Revision 1.5  1998/09/17 12:53:09  hamish
331 // fixed for new tok; new construction pattern
332 //
333 // Revision 1.4  1998/09/17 10:24:05  hamish
334 // added options support, and Appelt-style rule application
335 //
336 // Revision 1.3  1998/08/19 20:21:46  hamish
337 // new RHS assignment expression stuff added
338 //
339 // Revision 1.2  1998/08/18 14:37:45  hamish
340 // added some messages
341 //
342 // Revision 1.1  1998/08/18 12:43:11  hamish
343 // fixed SPT bug, not advancing newPosition
344