1   /*
2    *  Scratch.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 22/03/00
12   *
13   *  $Id: Scratch.java,v 1.64 2003/01/16 15:39:33 valyt Exp $
14   */
15  
16  
17  package gate.util;
18  
19  import java.util.*;
20  import java.net.*;
21  import java.io.*;
22  import java.util.zip.*;
23  
24  import gate.*;
25  import gate.creole.*;
26  import gate.creole.ir.*;
27  import gate.creole.tokeniser.*;
28  import gate.creole.gazetteer.*;
29  import gate.persist.*;
30  import gate.gui.*;
31  
32  import org.xml.sax.*;
33  import javax.xml.parsers.*;
34  import org.w3c.www.mime.*;
35  
36  /** A scratch pad for experimenting.
37    */
38  public class Scratch
39  {
40    /** Debug flag */
41    private static final boolean DEBUG = false;
42  
43    public static void main(String args[]) throws Exception {
44  //    Map charsets = java.nio.charset.Charset.availableCharsets();
45  //    Iterator namesIter = charsets.keySet().iterator();
46  //    while(namesIter.hasNext()){
47  //      String name = (String)namesIter.next();
48  //      System.out.println(name + " : " + charsets.get(name));
49  //    }
50      System.out.println(System.getProperty("file.encoding"));
51      System.out.println(java.nio.charset.Charset.forName(System.getProperty("file.encoding")).name());
52  //    System.out.println(new Character((char)0xa3));
53  //    Gate.init();
54  //
55  //    List classes = Tools.findSubclasses(gate.creole.ir.Search.class);
56  //    if(classes != null) for(int i = 0; i < classes.size(); i++){
57  //      Out.prln(classes.get(i).toString());
58  //    }
59  //    createIndex();
60  //    URL anURL = new URL("file:/z:/a/b/c/d.txt");
61  //    URL anotherURL = new URL("file:/z:/a/b/c/d.txt");
62  //    String relPath = gate.util.persistence.PersistenceManager.
63  //                     getRelativePath(anURL, anotherURL);
64  //    Out.prln("Context: " + anURL);
65  //    Out.prln("Target: " + anotherURL);
66  //    Out.prln("Relative path: " + relPath);
67  //    Out.prln("Result " + new URL(anURL, relPath));
68  //    javax.swing.text.FlowView fv;
69  //    javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName());
70  //    Map uidefaults  = (Map)javax.swing.UIManager.getDefaults();
71  //    List keys = new ArrayList(uidefaults.keySet());
72  //    Collections.sort(keys);
73  //    Iterator keyIter = keys.iterator();
74  //    while(keyIter.hasNext()){
75  //      Object key = keyIter.next();
76  //      System.out.println(key + " : " + uidefaults.get(key));
77  //    }
78  
79      // initialise the thing
80  //    Gate.setNetConnected(false);
81  //    Gate.setLocalWebServer(false);
82  //    Gate.init();
83  
84  //    Scratch oneOfMe = new Scratch();
85  //    try{
86  //      oneOfMe.runNerc();
87  //    } catch (Exception e) {
88  //      e.printStackTrace(Out.getPrintWriter());
89  //    }
90  
91  
92  //    CreoleRegister reg = Gate.getCreoleRegister();
93  //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema"));
94  //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema"));
95  
96  //System.out.println("VRs for " + reg.getAnnotationVRs("Tree"));
97  //System.out.println("VRs for " + reg.getAnnotationVRs());
98  
99  //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl"));
100 
101   } // main
102 
103   /** Example of using an exit-time hook. */
104   public static void exitTimeHook() {
105     Runtime.getRuntime().addShutdownHook(new Thread() {
106       public void run() {
107         System.out.println("shutting down");
108         System.out.flush();
109 
110         // create a File to store the state in
111         File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
112 
113         // dump the state into the new File
114         try {
115           ObjectOutputStream oos = new ObjectOutputStream(
116             new GZIPOutputStream(new FileOutputStream(stateFile))
117           );
118           System.out.println("writing main frame");
119           System.out.flush();
120           oos.writeObject(Main.getMainFrame());
121           oos.close();
122         } catch(Exception e) {
123           System.out.println("Couldn't write to state file: " + e);
124         }
125 
126         System.out.println("done");
127         System.out.flush();
128       }
129     });
130   } // exitTimeHook()
131 
132   /**
133    * ***** <B>Failed</B> *****
134    * attempt to serialise whole gui state - various swing components
135    * don't like to be serialised :-(. might be worth trying again when
136    * jdk1.4 arrives.
137    */
138   public static void dumpGuiState() {
139     System.out.println("dumping gui state...");
140     System.out.flush();
141 
142     // create a File to store the state in
143     File stateFile = new File("z:\\tmp", "GateGuiState.gzsr");
144 
145     // dump the state into the new File
146     try {
147       ObjectOutputStream oos = new ObjectOutputStream(
148         new GZIPOutputStream(new FileOutputStream(stateFile))
149       );
150       MainFrame mf = Main.getMainFrame();
151 
152       // wait for 1 sec
153       long startTime = System.currentTimeMillis();
154       long timeNow = System.currentTimeMillis();
155       while(timeNow - startTime < 3000){
156         try {
157           Thread.sleep(150);
158           timeNow = System.currentTimeMillis();
159         } catch(InterruptedException ie) {}
160       }
161 
162       System.out.println("writing main frame");
163       System.out.flush();
164       oos.writeObject(mf);
165       oos.close();
166     } catch(Exception e) {
167       System.out.println("Couldn't write to state file: " + e);
168     }
169 
170     System.out.println("...done gui dump");
171     System.out.flush();
172   } // dumpGuiState
173 
174   /**
175    * Run NERC and print out the various stages (doesn't actually
176    * use Nerc but the individual bits), and serialise then deserialise
177    * the NERC system.
178    */
179   public void runNerc() throws Exception {
180     long startTime = System.currentTimeMillis();
181 
182     Out.prln("gate init");
183     Gate.setLocalWebServer(false);
184     Gate.setNetConnected(false);
185     Gate.init();
186 
187     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
188     Out.prln("creating resources");
189 
190     // a controller
191     Controller c1 = (Controller) Factory.createResource(
192       "gate.creole.SerialController",
193       Factory.newFeatureMap()
194     );
195     c1.setName("Scratch controller");
196 
197     //get a document
198     FeatureMap params = Factory.newFeatureMap();
199     params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html"));
200     params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
201     Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl",
202                                                     params);
203 
204     //create a default tokeniser
205     params = Factory.newFeatureMap();
206     params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME,
207       "gate:/creole/tokeniser/DefaultTokeniser.rules");
208     params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8");
209     params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
210     ProcessingResource tokeniser = (ProcessingResource) Factory.createResource(
211       "gate.creole.tokeniser.DefaultTokeniser", params
212     );
213 
214     //create a default gazetteer
215     params = Factory.newFeatureMap();
216     params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc);
217     params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
218       "gate:/creole/gazeteer/default/lists.def");
219     ProcessingResource gaz = (ProcessingResource) Factory.createResource(
220       "gate.creole.gazetteer.DefaultGazetteer", params
221     );
222 
223     //create a default transducer
224     params = Factory.newFeatureMap();
225     params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc);
226     //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL());
227     ProcessingResource trans = (ProcessingResource) Factory.createResource(
228       "gate.creole.Transducer", params
229     );
230 
231     // get the controller to encapsulate the tok and gaz
232     c1.getPRs().add(tokeniser);
233     c1.getPRs().add(gaz);
234     c1.getPRs().add(trans);
235 
236     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
237     Out.prln("dumping state");
238 
239     // create a File to store the state in
240     File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr");
241 
242     // dump the state into the new File
243     try {
244       ObjectOutputStream oos = new ObjectOutputStream(
245         new GZIPOutputStream(new FileOutputStream(stateFile))
246       );
247       oos.writeObject(new SessionState());
248       oos.close();
249     } catch(IOException e) {
250       throw new GateException("Couldn't write to state file: " + e);
251     }
252 
253     Out.prln(System.getProperty("user.home"));
254 
255     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
256     Out.prln("reinstating");
257 
258     try {
259       FileInputStream fis = new FileInputStream(stateFile);
260       GZIPInputStream zis = new GZIPInputStream(fis);
261       ObjectInputStream ois = new ObjectInputStream(zis);
262       SessionState state = (SessionState) ois.readObject();
263       ois.close();
264     } catch(IOException e) {
265       throw
266         new GateException("Couldn't read file "+stateFile+": "+e);
267     } catch(ClassNotFoundException ee) {
268       throw
269         new GateException("Couldn't find class: "+ee);
270     }
271 
272     Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
273     Out.prln("done");
274   } // runNerc()
275 
276 
277   /** Inner class for holding CR and DSR for serialisation experiments */
278   class SessionState implements Serializable {
279     SessionState() {
280       cr = Gate.getCreoleRegister();
281       dsr = Gate.getDataStoreRegister();
282     }
283 
284     CreoleRegister cr;
285 
286     DataStoreRegister dsr;
287 
288     // other state from Gate? and elsewhere?
289   } // SessionState
290 
291   /** Generate a random integer for file naming. */
292   protected static int random() {
293     return randomiser.nextInt(9999);
294   } // random
295 
296   /**
297    * Generates an index for a corpus in a datastore on Valy's computer in order
298    * to have some test data.
299    */
300   public static void createIndex() throws Exception{
301     String dsURLString = "file:///d:/temp/ds";
302     String indexLocation = "d:/temp/ds.idx";
303 
304     Gate.init();
305 
306     //open the datastore
307     SerialDataStore sds = (SerialDataStore)Factory.openDataStore(
308                             "gate.persist.SerialDataStore", dsURLString);
309     sds.open();
310     List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
311     IndexedCorpus corpus = (IndexedCorpus)
312                            sds.getLr("gate.corpora.SerialCorpusImpl",
313 
314                                      corporaIds.get(0));
315     DefaultIndexDefinition did = new DefaultIndexDefinition();
316     did.setIrEngineClassName(gate.creole.ir.lucene.
317                              LuceneIREngine.class.getName());
318 
319     did.setIndexLocation(indexLocation);
320     did.addIndexField(new IndexField("body", new ContentPropertyReader(), false));
321 
322     corpus.setIndexDefinition(did);
323 
324     Out.prln("removing old index");
325     corpus.getIndexManager().deleteIndex();
326     Out.prln("building new index");
327     corpus.getIndexManager().createIndex();
328     Out.prln("optimising new index");
329     corpus.getIndexManager().optimizeIndex();
330     Out.prln("saving corpus");
331     sds.sync(corpus);
332     Out.prln("done!");
333   }
334 
335   /**
336    *
337    * @param file a TXT file containing the text
338    */
339   public static void tokeniseFile(File file) throws Exception{
340     //initialise GATE (only call it once!!)
341     Gate.init();
342     //create the document
343     Document doc = Factory.newDocument(file.toURL());
344     //create the tokeniser
345     DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource(
346       "gate.creole.tokeniser.DefaultTokeniser");
347 
348     //tokenise the document
349     tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc);
350     tokeniser.execute();
351 
352     //extract data from document
353     //we need tokens and spaces
354     Set annotationTypes = new HashSet();
355     annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE);
356     annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE);
357 
358     List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes));
359     Collections.sort(tokenList, new OffsetComparator());
360 
361     //iterate through the tokens
362     Iterator tokIter = tokenList.iterator();
363     while(tokIter.hasNext()){
364       Annotation anAnnotation = (Annotation)tokIter.next();
365       System.out.println("Annotation: (" +
366                         anAnnotation.getStartNode().getOffset().toString() +
367                         ", " + anAnnotation.getEndNode().getOffset().toString() +
368                         "[type: " + anAnnotation.getType() +
369                          ", features: " + anAnnotation.getFeatures().toString()+
370                          "]" );
371     }
372   }
373 
374 
375   public static class ContentPropertyReader implements PropertyReader{
376     public String getPropertyValue(gate.Document doc){
377       return doc.getContent().toString();
378     }
379   }
380 
381   /** Random number generator */
382   protected static Random randomiser = new Random();
383 
384 } // class Scratch
385 
386