|
Scratch |
|
1 /* 2 * Scratch.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 22/03/00 12 * 13 * $Id: Scratch.java,v 1.64 2003/01/16 15:39:33 valyt Exp $ 14 */ 15 16 17 package gate.util; 18 19 import java.util.*; 20 import java.net.*; 21 import java.io.*; 22 import java.util.zip.*; 23 24 import gate.*; 25 import gate.creole.*; 26 import gate.creole.ir.*; 27 import gate.creole.tokeniser.*; 28 import gate.creole.gazetteer.*; 29 import gate.persist.*; 30 import gate.gui.*; 31 32 import org.xml.sax.*; 33 import javax.xml.parsers.*; 34 import org.w3c.www.mime.*; 35 36 /** A scratch pad for experimenting. 37 */ 38 public class Scratch 39 { 40 /** Debug flag */ 41 private static final boolean DEBUG = false; 42 43 public static void main(String args[]) throws Exception { 44 // Map charsets = java.nio.charset.Charset.availableCharsets(); 45 // Iterator namesIter = charsets.keySet().iterator(); 46 // while(namesIter.hasNext()){ 47 // String name = (String)namesIter.next(); 48 // System.out.println(name + " : " + charsets.get(name)); 49 // } 50 System.out.println(System.getProperty("file.encoding")); 51 System.out.println(java.nio.charset.Charset.forName(System.getProperty("file.encoding")).name()); 52 // System.out.println(new Character((char)0xa3)); 53 // Gate.init(); 54 // 55 // List classes = Tools.findSubclasses(gate.creole.ir.Search.class); 56 // if(classes != null) for(int i = 0; i < classes.size(); i++){ 57 // Out.prln(classes.get(i).toString()); 58 // } 59 // createIndex(); 60 // URL anURL = new URL("file:/z:/a/b/c/d.txt"); 61 // URL anotherURL = new URL("file:/z:/a/b/c/d.txt"); 62 // String relPath = gate.util.persistence.PersistenceManager. 63 // getRelativePath(anURL, anotherURL); 64 // Out.prln("Context: " + anURL); 65 // Out.prln("Target: " + anotherURL); 66 // Out.prln("Relative path: " + relPath); 67 // Out.prln("Result " + new URL(anURL, relPath)); 68 // javax.swing.text.FlowView fv; 69 // javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName()); 70 // Map uidefaults = (Map)javax.swing.UIManager.getDefaults(); 71 // List keys = new ArrayList(uidefaults.keySet()); 72 // Collections.sort(keys); 73 // Iterator keyIter = keys.iterator(); 74 // while(keyIter.hasNext()){ 75 // Object key = keyIter.next(); 76 // System.out.println(key + " : " + uidefaults.get(key)); 77 // } 78 79 // initialise the thing 80 // Gate.setNetConnected(false); 81 // Gate.setLocalWebServer(false); 82 // Gate.init(); 83 84 // Scratch oneOfMe = new Scratch(); 85 // try{ 86 // oneOfMe.runNerc(); 87 // } catch (Exception e) { 88 // e.printStackTrace(Out.getPrintWriter()); 89 // } 90 91 92 // CreoleRegister reg = Gate.getCreoleRegister(); 93 //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema")); 94 //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema")); 95 96 //System.out.println("VRs for " + reg.getAnnotationVRs("Tree")); 97 //System.out.println("VRs for " + reg.getAnnotationVRs()); 98 99 //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl")); 100 101 } // main 102 103 /** Example of using an exit-time hook. */ 104 public static void exitTimeHook() { 105 Runtime.getRuntime().addShutdownHook(new Thread() { 106 public void run() { 107 System.out.println("shutting down"); 108 System.out.flush(); 109 110 // create a File to store the state in 111 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr"); 112 113 // dump the state into the new File 114 try { 115 ObjectOutputStream oos = new ObjectOutputStream( 116 new GZIPOutputStream(new FileOutputStream(stateFile)) 117 ); 118 System.out.println("writing main frame"); 119 System.out.flush(); 120 oos.writeObject(Main.getMainFrame()); 121 oos.close(); 122 } catch(Exception e) { 123 System.out.println("Couldn't write to state file: " + e); 124 } 125 126 System.out.println("done"); 127 System.out.flush(); 128 } 129 }); 130 } // exitTimeHook() 131 132 /** 133 * ***** <B>Failed</B> ***** 134 * attempt to serialise whole gui state - various swing components 135 * don't like to be serialised :-(. might be worth trying again when 136 * jdk1.4 arrives. 137 */ 138 public static void dumpGuiState() { 139 System.out.println("dumping gui state..."); 140 System.out.flush(); 141 142 // create a File to store the state in 143 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr"); 144 145 // dump the state into the new File 146 try { 147 ObjectOutputStream oos = new ObjectOutputStream( 148 new GZIPOutputStream(new FileOutputStream(stateFile)) 149 ); 150 MainFrame mf = Main.getMainFrame(); 151 152 // wait for 1 sec 153 long startTime = System.currentTimeMillis(); 154 long timeNow = System.currentTimeMillis(); 155 while(timeNow - startTime < 3000){ 156 try { 157 Thread.sleep(150); 158 timeNow = System.currentTimeMillis(); 159 } catch(InterruptedException ie) {} 160 } 161 162 System.out.println("writing main frame"); 163 System.out.flush(); 164 oos.writeObject(mf); 165 oos.close(); 166 } catch(Exception e) { 167 System.out.println("Couldn't write to state file: " + e); 168 } 169 170 System.out.println("...done gui dump"); 171 System.out.flush(); 172 } // dumpGuiState 173 174 /** 175 * Run NERC and print out the various stages (doesn't actually 176 * use Nerc but the individual bits), and serialise then deserialise 177 * the NERC system. 178 */ 179 public void runNerc() throws Exception { 180 long startTime = System.currentTimeMillis(); 181 182 Out.prln("gate init"); 183 Gate.setLocalWebServer(false); 184 Gate.setNetConnected(false); 185 Gate.init(); 186 187 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 188 Out.prln("creating resources"); 189 190 // a controller 191 Controller c1 = (Controller) Factory.createResource( 192 "gate.creole.SerialController", 193 Factory.newFeatureMap() 194 ); 195 c1.setName("Scratch controller"); 196 197 //get a document 198 FeatureMap params = Factory.newFeatureMap(); 199 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html")); 200 params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false"); 201 Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl", 202 params); 203 204 //create a default tokeniser 205 params = Factory.newFeatureMap(); 206 params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME, 207 "gate:/creole/tokeniser/DefaultTokeniser.rules"); 208 params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8"); 209 params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc); 210 ProcessingResource tokeniser = (ProcessingResource) Factory.createResource( 211 "gate.creole.tokeniser.DefaultTokeniser", params 212 ); 213 214 //create a default gazetteer 215 params = Factory.newFeatureMap(); 216 params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc); 217 params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME, 218 "gate:/creole/gazeteer/default/lists.def"); 219 ProcessingResource gaz = (ProcessingResource) Factory.createResource( 220 "gate.creole.gazetteer.DefaultGazetteer", params 221 ); 222 223 //create a default transducer 224 params = Factory.newFeatureMap(); 225 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc); 226 //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL()); 227 ProcessingResource trans = (ProcessingResource) Factory.createResource( 228 "gate.creole.Transducer", params 229 ); 230 231 // get the controller to encapsulate the tok and gaz 232 c1.getPRs().add(tokeniser); 233 c1.getPRs().add(gaz); 234 c1.getPRs().add(trans); 235 236 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 237 Out.prln("dumping state"); 238 239 // create a File to store the state in 240 File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr"); 241 242 // dump the state into the new File 243 try { 244 ObjectOutputStream oos = new ObjectOutputStream( 245 new GZIPOutputStream(new FileOutputStream(stateFile)) 246 ); 247 oos.writeObject(new SessionState()); 248 oos.close(); 249 } catch(IOException e) { 250 throw new GateException("Couldn't write to state file: " + e); 251 } 252 253 Out.prln(System.getProperty("user.home")); 254 255 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 256 Out.prln("reinstating"); 257 258 try { 259 FileInputStream fis = new FileInputStream(stateFile); 260 GZIPInputStream zis = new GZIPInputStream(fis); 261 ObjectInputStream ois = new ObjectInputStream(zis); 262 SessionState state = (SessionState) ois.readObject(); 263 ois.close(); 264 } catch(IOException e) { 265 throw 266 new GateException("Couldn't read file "+stateFile+": "+e); 267 } catch(ClassNotFoundException ee) { 268 throw 269 new GateException("Couldn't find class: "+ee); 270 } 271 272 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 273 Out.prln("done"); 274 } // runNerc() 275 276 277 /** Inner class for holding CR and DSR for serialisation experiments */ 278 class SessionState implements Serializable { 279 SessionState() { 280 cr = Gate.getCreoleRegister(); 281 dsr = Gate.getDataStoreRegister(); 282 } 283 284 CreoleRegister cr; 285 286 DataStoreRegister dsr; 287 288 // other state from Gate? and elsewhere? 289 } // SessionState 290 291 /** Generate a random integer for file naming. */ 292 protected static int random() { 293 return randomiser.nextInt(9999); 294 } // random 295 296 /** 297 * Generates an index for a corpus in a datastore on Valy's computer in order 298 * to have some test data. 299 */ 300 public static void createIndex() throws Exception{ 301 String dsURLString = "file:///d:/temp/ds"; 302 String indexLocation = "d:/temp/ds.idx"; 303 304 Gate.init(); 305 306 //open the datastore 307 SerialDataStore sds = (SerialDataStore)Factory.openDataStore( 308 "gate.persist.SerialDataStore", dsURLString); 309 sds.open(); 310 List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl"); 311 IndexedCorpus corpus = (IndexedCorpus) 312 sds.getLr("gate.corpora.SerialCorpusImpl", 313 314 corporaIds.get(0)); 315 DefaultIndexDefinition did = new DefaultIndexDefinition(); 316 did.setIrEngineClassName(gate.creole.ir.lucene. 317 LuceneIREngine.class.getName()); 318 319 did.setIndexLocation(indexLocation); 320 did.addIndexField(new IndexField("body", new ContentPropertyReader(), false)); 321 322 corpus.setIndexDefinition(did); 323 324 Out.prln("removing old index"); 325 corpus.getIndexManager().deleteIndex(); 326 Out.prln("building new index"); 327 corpus.getIndexManager().createIndex(); 328 Out.prln("optimising new index"); 329 corpus.getIndexManager().optimizeIndex(); 330 Out.prln("saving corpus"); 331 sds.sync(corpus); 332 Out.prln("done!"); 333 } 334 335 /** 336 * 337 * @param file a TXT file containing the text 338 */ 339 public static void tokeniseFile(File file) throws Exception{ 340 //initialise GATE (only call it once!!) 341 Gate.init(); 342 //create the document 343 Document doc = Factory.newDocument(file.toURL()); 344 //create the tokeniser 345 DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource( 346 "gate.creole.tokeniser.DefaultTokeniser"); 347 348 //tokenise the document 349 tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc); 350 tokeniser.execute(); 351 352 //extract data from document 353 //we need tokens and spaces 354 Set annotationTypes = new HashSet(); 355 annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE); 356 annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE); 357 358 List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes)); 359 Collections.sort(tokenList, new OffsetComparator()); 360 361 //iterate through the tokens 362 Iterator tokIter = tokenList.iterator(); 363 while(tokIter.hasNext()){ 364 Annotation anAnnotation = (Annotation)tokIter.next(); 365 System.out.println("Annotation: (" + 366 anAnnotation.getStartNode().getOffset().toString() + 367 ", " + anAnnotation.getEndNode().getOffset().toString() + 368 "[type: " + anAnnotation.getType() + 369 ", features: " + anAnnotation.getFeatures().toString()+ 370 "]" ); 371 } 372 } 373 374 375 public static class ContentPropertyReader implements PropertyReader{ 376 public String getPropertyValue(gate.Document doc){ 377 return doc.getContent().toString(); 378 } 379 } 380 381 /** Random number generator */ 382 protected static Random randomiser = new Random(); 383 384 } // class Scratch 385 386
|
Scratch |
|