|
Scratch |
|
1 /* 2 * Scratch.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 22/03/00 12 * 13 * $Id: Scratch.java,v 1.60 2002/07/12 13:24:30 valyt Exp $ 14 */ 15 16 17 package gate.util; 18 19 import java.util.*; 20 import java.net.*; 21 import java.io.*; 22 import java.util.zip.*; 23 24 import gate.*; 25 import gate.creole.*; 26 import gate.creole.ir.*; 27 import gate.creole.tokeniser.*; 28 import gate.creole.gazetteer.*; 29 import gate.persist.*; 30 import gate.gui.*; 31 32 import org.xml.sax.*; 33 import javax.xml.parsers.*; 34 import org.w3c.www.mime.*; 35 36 /** A scratch pad for experimenting. 37 */ 38 public class Scratch 39 { 40 /** Debug flag */ 41 private static final boolean DEBUG = false; 42 43 public static void main(String args[]) throws Exception { 44 // Gate.init(); 45 // 46 // List classes = Tools.findSubclasses(gate.creole.ir.Search.class); 47 // if(classes != null) for(int i = 0; i < classes.size(); i++){ 48 // Out.prln(classes.get(i).toString()); 49 // } 50 // createIndex(); 51 // URL anURL = new URL("file:/z:/a/b/c/d.txt"); 52 // URL anotherURL = new URL("file:/z:/a/b/c/d.txt"); 53 // String relPath = gate.util.persistence.PersistenceManager. 54 // getRelativePath(anURL, anotherURL); 55 // Out.prln("Context: " + anURL); 56 // Out.prln("Target: " + anotherURL); 57 // Out.prln("Relative path: " + relPath); 58 // Out.prln("Result " + new URL(anURL, relPath)); 59 // javax.swing.text.FlowView fv; 60 // javax.swing.UIManager.setLookAndFeel(javax.swing.UIManager.getSystemLookAndFeelClassName()); 61 // Map uidefaults = (Map)javax.swing.UIManager.getDefaults(); 62 // List keys = new ArrayList(uidefaults.keySet()); 63 // Collections.sort(keys); 64 // Iterator keyIter = keys.iterator(); 65 // while(keyIter.hasNext()){ 66 // Object key = keyIter.next(); 67 // System.out.println(key + " : " + uidefaults.get(key)); 68 // } 69 70 // initialise the thing 71 // Gate.setNetConnected(false); 72 // Gate.setLocalWebServer(false); 73 // Gate.init(); 74 75 // Scratch oneOfMe = new Scratch(); 76 // try{ 77 // oneOfMe.runNerc(); 78 // } catch (Exception e) { 79 // e.printStackTrace(Out.getPrintWriter()); 80 // } 81 82 83 // CreoleRegister reg = Gate.getCreoleRegister(); 84 //System.out.println("Instances for " + reg.getLrInstances("gate.creole.AnnotationSchema")); 85 //System.out.println("Instances for " + reg.getAllInstances ("gate.creole.AnnotationSchema")); 86 87 //System.out.println("VRs for " + reg.getAnnotationVRs("Tree")); 88 //System.out.println("VRs for " + reg.getAnnotationVRs()); 89 90 //System.out.println(reg.getLargeVRsForResource("gate.corpora.DocumentImpl")); 91 92 } // main 93 94 /** Example of using an exit-time hook. */ 95 public static void exitTimeHook() { 96 Runtime.getRuntime().addShutdownHook(new Thread() { 97 public void run() { 98 System.out.println("shutting down"); 99 System.out.flush(); 100 101 // create a File to store the state in 102 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr"); 103 104 // dump the state into the new File 105 try { 106 ObjectOutputStream oos = new ObjectOutputStream( 107 new GZIPOutputStream(new FileOutputStream(stateFile)) 108 ); 109 System.out.println("writing main frame"); 110 System.out.flush(); 111 oos.writeObject(Main.getMainFrame()); 112 oos.close(); 113 } catch(Exception e) { 114 System.out.println("Couldn't write to state file: " + e); 115 } 116 117 System.out.println("done"); 118 System.out.flush(); 119 } 120 }); 121 } // exitTimeHook() 122 123 /** 124 * ***** <B>Failed</B> ***** 125 * attempt to serialise whole gui state - various swing components 126 * don't like to be serialised :-(. might be worth trying again when 127 * jdk1.4 arrives. 128 */ 129 public static void dumpGuiState() { 130 System.out.println("dumping gui state..."); 131 System.out.flush(); 132 133 // create a File to store the state in 134 File stateFile = new File("z:\\tmp", "GateGuiState.gzsr"); 135 136 // dump the state into the new File 137 try { 138 ObjectOutputStream oos = new ObjectOutputStream( 139 new GZIPOutputStream(new FileOutputStream(stateFile)) 140 ); 141 MainFrame mf = Main.getMainFrame(); 142 143 // wait for 1 sec 144 long startTime = System.currentTimeMillis(); 145 long timeNow = System.currentTimeMillis(); 146 while(timeNow - startTime < 3000){ 147 try { 148 Thread.sleep(150); 149 timeNow = System.currentTimeMillis(); 150 } catch(InterruptedException ie) {} 151 } 152 153 System.out.println("writing main frame"); 154 System.out.flush(); 155 oos.writeObject(mf); 156 oos.close(); 157 } catch(Exception e) { 158 System.out.println("Couldn't write to state file: " + e); 159 } 160 161 System.out.println("...done gui dump"); 162 System.out.flush(); 163 } // dumpGuiState 164 165 /** 166 * Run NERC and print out the various stages (doesn't actually 167 * use Nerc but the individual bits), and serialise then deserialise 168 * the NERC system. 169 */ 170 public void runNerc() throws Exception { 171 long startTime = System.currentTimeMillis(); 172 173 Out.prln("gate init"); 174 Gate.setLocalWebServer(false); 175 Gate.setNetConnected(false); 176 Gate.init(); 177 178 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 179 Out.prln("creating resources"); 180 181 // a controller 182 Controller c1 = (Controller) Factory.createResource( 183 "gate.creole.SerialController", 184 Factory.newFeatureMap() 185 ); 186 c1.setName("Scratch controller"); 187 188 //get a document 189 FeatureMap params = Factory.newFeatureMap(); 190 params.put(Document.DOCUMENT_URL_PARAMETER_NAME, Gate.getUrl("tests/doc0.html")); 191 params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false"); 192 Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl", 193 params); 194 195 //create a default tokeniser 196 params = Factory.newFeatureMap(); 197 params.put(DefaultTokeniser.DEF_TOK_TOKRULES_URL_PARAMETER_NAME, 198 "gate:/creole/tokeniser/DefaultTokeniser.rules"); 199 params.put(DefaultTokeniser.DEF_TOK_ENCODING_PARAMETER_NAME, "UTF-8"); 200 params.put(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc); 201 ProcessingResource tokeniser = (ProcessingResource) Factory.createResource( 202 "gate.creole.tokeniser.DefaultTokeniser", params 203 ); 204 205 //create a default gazetteer 206 params = Factory.newFeatureMap(); 207 params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, doc); 208 params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME, 209 "gate:/creole/gazeteer/default/lists.def"); 210 ProcessingResource gaz = (ProcessingResource) Factory.createResource( 211 "gate.creole.gazetteer.DefaultGazetteer", params 212 ); 213 214 //create a default transducer 215 params = Factory.newFeatureMap(); 216 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, doc); 217 //params.put("grammarURL", new File("z:\\tmp\\main.jape").toURL()); 218 ProcessingResource trans = (ProcessingResource) Factory.createResource( 219 "gate.creole.Transducer", params 220 ); 221 222 // get the controller to encapsulate the tok and gaz 223 c1.getPRs().add(tokeniser); 224 c1.getPRs().add(gaz); 225 c1.getPRs().add(trans); 226 227 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 228 Out.prln("dumping state"); 229 230 // create a File to store the state in 231 File stateFile = new File("z:\\tmp", "SerialisedGateState.gzsr"); 232 233 // dump the state into the new File 234 try { 235 ObjectOutputStream oos = new ObjectOutputStream( 236 new GZIPOutputStream(new FileOutputStream(stateFile)) 237 ); 238 oos.writeObject(new SessionState()); 239 oos.close(); 240 } catch(IOException e) { 241 throw new GateException("Couldn't write to state file: " + e); 242 } 243 244 Out.prln(System.getProperty("user.home")); 245 246 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 247 Out.prln("reinstating"); 248 249 try { 250 FileInputStream fis = new FileInputStream(stateFile); 251 GZIPInputStream zis = new GZIPInputStream(fis); 252 ObjectInputStream ois = new ObjectInputStream(zis); 253 SessionState state = (SessionState) ois.readObject(); 254 ois.close(); 255 } catch(IOException e) { 256 throw 257 new GateException("Couldn't read file "+stateFile+": "+e); 258 } catch(ClassNotFoundException ee) { 259 throw 260 new GateException("Couldn't find class: "+ee); 261 } 262 263 Out.prln((System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); 264 Out.prln("done"); 265 } // runNerc() 266 267 268 /** Inner class for holding CR and DSR for serialisation experiments */ 269 class SessionState implements Serializable { 270 SessionState() { 271 cr = Gate.getCreoleRegister(); 272 dsr = Gate.getDataStoreRegister(); 273 } 274 275 CreoleRegister cr; 276 277 DataStoreRegister dsr; 278 279 // other state from Gate? and elsewhere? 280 } // SessionState 281 282 /** Generate a random integer for file naming. */ 283 protected static int random() { 284 return randomiser.nextInt(9999); 285 } // random 286 287 /** 288 * Generates an index for a corpus in a datastore on Valy's computer in order 289 * to have some test data. 290 */ 291 public static void createIndex() throws Exception{ 292 String dsURLString = "file:///d:/temp/ds"; 293 String indexLocation = "d:/temp/ds.idx"; 294 295 Gate.init(); 296 297 //open the datastore 298 SerialDataStore sds = (SerialDataStore)Factory.openDataStore( 299 "gate.persist.SerialDataStore", dsURLString); 300 sds.open(); 301 List corporaIds = sds.getLrIds("gate.corpora.SerialCorpusImpl"); 302 IndexedCorpus corpus = (IndexedCorpus) 303 sds.getLr("gate.corpora.SerialCorpusImpl", 304 305 corporaIds.get(0)); 306 DefaultIndexDefinition did = new DefaultIndexDefinition(); 307 did.setIrEngineClassName(gate.creole.ir.lucene. 308 LuceneIREngine.class.getName()); 309 310 did.setIndexLocation(indexLocation); 311 did.addIndexField(new IndexField("body", new ContentPropertyReader(), false)); 312 313 corpus.setIndexDefinition(did); 314 315 Out.prln("removing old index"); 316 corpus.getIndexManager().deleteIndex(); 317 Out.prln("building new index"); 318 corpus.getIndexManager().createIndex(); 319 Out.prln("optimising new index"); 320 corpus.getIndexManager().optimizeIndex(); 321 Out.prln("saving corpus"); 322 sds.sync(corpus); 323 Out.prln("done!"); 324 } 325 326 /** 327 * 328 * @param file a TXT file containing the text 329 */ 330 public static void tokeniseFile(File file) throws Exception{ 331 //initialise GATE (only call it once!!) 332 Gate.init(); 333 //create the document 334 Document doc = Factory.newDocument(file.toURL()); 335 //create the tokeniser 336 DefaultTokeniser tokeniser = (DefaultTokeniser)Factory.createResource( 337 "gate.creole.tokeniser.DefaultTokeniser"); 338 339 //tokenise the document 340 tokeniser.setParameterValue(DefaultTokeniser.DEF_TOK_DOCUMENT_PARAMETER_NAME, doc); 341 tokeniser.execute(); 342 343 //extract data from document 344 //we need tokens and spaces 345 Set annotationTypes = new HashSet(); 346 annotationTypes.add(ANNIEConstants.TOKEN_ANNOTATION_TYPE); 347 annotationTypes.add(ANNIEConstants.SPACE_TOKEN_ANNOTATION_TYPE); 348 349 List tokenList = new ArrayList(doc.getAnnotations().get(annotationTypes)); 350 Collections.sort(tokenList, new OffsetComparator()); 351 352 //iterate through the tokens 353 Iterator tokIter = tokenList.iterator(); 354 while(tokIter.hasNext()){ 355 Annotation anAnnotation = (Annotation)tokIter.next(); 356 System.out.println("Annotation: (" + 357 anAnnotation.getStartNode().getOffset().toString() + 358 ", " + anAnnotation.getEndNode().getOffset().toString() + 359 "[type: " + anAnnotation.getType() + 360 ", features: " + anAnnotation.getFeatures().toString()+ 361 "]" ); 362 } 363 } 364 365 366 public static class ContentPropertyReader implements PropertyReader{ 367 public String getPropertyValue(gate.Document doc){ 368 return doc.getContent().toString(); 369 } 370 } 371 372 /** Random number generator */ 373 protected static Random randomiser = new Random(); 374 375 } // class Scratch 376 377
|
Scratch |
|