|
DefaultTokeniser |
|
1 package gate.creole.tokeniser; 2 3 import gate.*; 4 import gate.util.*; 5 import gate.creole.*; 6 import gate.event.*; 7 import java.util.*; 8 9 /** 10 * A composed tokeniser containing a {@link SimpleTokeniser} and a 11 * {@link gate.creole.Transducer}. 12 * The simple tokeniser tokenises the document and the transducer processes its 13 * output. 14 */ 15 public class DefaultTokeniser extends AbstractLanguageAnalyser { 16 17 public DefaultTokeniser() { 18 } 19 20 21 /** Initialise this resource, and return it. */ 22 public Resource init() throws ResourceInstantiationException{ 23 try{ 24 //init super object 25 super.init(); 26 //create all the componets 27 FeatureMap params; 28 FeatureMap features; 29 30 //tokeniser 31 fireStatusChanged("Creating a tokeniser"); 32 params = Factory.newFeatureMap(); 33 if(tokeniserRulesURL != null) params.put("rulesURL", 34 tokeniserRulesURL); 35 params.put("encoding", encoding); 36 if(DEBUG) Out.prln("Parameters for the tokeniser: \n" + params); 37 features = Factory.newFeatureMap(); 38 Gate.setHiddenAttribute(features, true); 39 tokeniser = (SimpleTokeniser)Factory.createResource( 40 "gate.creole.tokeniser.SimpleTokeniser", 41 params, features); 42 tokeniser.setName("Tokeniser " + System.currentTimeMillis()); 43 44 fireProgressChanged(50); 45 46 //transducer 47 fireStatusChanged("Creating a Jape transducer"); 48 params.clear(); 49 if(transducerGrammarURL != null) params.put("grammarURL", 50 transducerGrammarURL); 51 params.put("encoding", encoding); 52 if(DEBUG) Out.prln("Parameters for the transducer: \n" + params); 53 features.clear(); 54 Gate.setHiddenAttribute(features, true); 55 transducer = (Transducer)Factory.createResource("gate.creole.Transducer", 56 params, features); 57 fireProgressChanged(100); 58 fireProcessFinished(); 59 transducer.setName("Transducer " + System.currentTimeMillis()); 60 }catch(ResourceInstantiationException rie){ 61 throw rie; 62 }catch(Exception e){ 63 throw new ResourceInstantiationException(e); 64 } 65 return this; 66 } 67 68 public void execute() throws ExecutionException{ 69 interrupted = false; 70 //set the parameters 71 try{ 72 FeatureMap params = Factory.newFeatureMap(); 73 fireProgressChanged(0); 74 //tokeniser 75 params.put("document", document); 76 params.put("annotationSetName", annotationSetName); 77 tokeniser.setParameterValues(params); 78 79 //transducer 80 params.clear(); 81 params.put("document", document); 82 params.put("inputASName", annotationSetName); 83 params.put("outputASName", annotationSetName); 84 transducer.setParameterValues(params); 85 }catch(ResourceInstantiationException rie){ 86 throw new ExecutionException(rie); 87 } 88 89 ProgressListener pListener = null; 90 StatusListener sListener = null; 91 fireProgressChanged(5); 92 pListener = new IntervalProgressListener(5, 50); 93 sListener = new StatusListener(){ 94 public void statusChanged(String text){ 95 fireStatusChanged(text); 96 } 97 }; 98 99 //tokeniser 100 if(isInterrupted()) throw new ExecutionInterruptedException( 101 "The execution of the \"" + getName() + 102 "\" tokeniser has been abruptly interrupted!"); 103 tokeniser.addProgressListener(pListener); 104 tokeniser.addStatusListener(sListener); 105 try{ 106 tokeniser.execute(); 107 }catch(ExecutionInterruptedException eie){ 108 throw new ExecutionInterruptedException( 109 "The execution of the \"" + getName() + 110 "\" tokeniser has been abruptly interrupted!"); 111 } 112 tokeniser.removeProgressListener(pListener); 113 tokeniser.removeStatusListener(sListener); 114 115 //transducer 116 if(isInterrupted()) throw new ExecutionInterruptedException( 117 "The execution of the \"" + getName() + 118 "\" tokeniser has been abruptly interrupted!"); 119 pListener = new IntervalProgressListener(50, 100); 120 transducer.addProgressListener(pListener); 121 transducer.addStatusListener(sListener); 122 123 transducer.execute(); 124 transducer.removeProgressListener(pListener); 125 transducer.removeStatusListener(sListener); 126 }//execute 127 128 129 /** 130 * Notifies all the PRs in this controller that they should stop their 131 * execution as soon as possible. 132 */ 133 public synchronized void interrupt(){ 134 interrupted = true; 135 tokeniser.interrupt(); 136 transducer.interrupt(); 137 } 138 139 public void setTokeniserRulesURL(java.net.URL tokeniserRulesURL) { 140 this.tokeniserRulesURL = tokeniserRulesURL; 141 } 142 public java.net.URL getTokeniserRulesURL() { 143 return tokeniserRulesURL; 144 } 145 public void setEncoding(String encoding) { 146 this.encoding = encoding; 147 } 148 public String getEncoding() { 149 return encoding; 150 } 151 public void setTransducerGrammarURL(java.net.URL transducerGrammarURL) { 152 this.transducerGrammarURL = transducerGrammarURL; 153 } 154 public java.net.URL getTransducerGrammarURL() { 155 return transducerGrammarURL; 156 } 157 // init() 158 159 private static final boolean DEBUG = false; 160 161 /** the simple tokeniser used for tokenisation*/ 162 protected SimpleTokeniser tokeniser; 163 164 /** the transducer used for post-processing*/ 165 protected Transducer transducer; 166 private java.net.URL tokeniserRulesURL; 167 private String encoding; 168 private java.net.URL transducerGrammarURL; 169 private String annotationSetName; 170 171 172 public void setAnnotationSetName(String annotationSetName) { 173 this.annotationSetName = annotationSetName; 174 } 175 public String getAnnotationSetName() { 176 return annotationSetName; 177 }/////////class CustomProgressListener implements ProgressListener 178 }
|
DefaultTokeniser |
|