|
DefaultTokeniser |
|
1 package gate.creole.tokeniser; 2 3 import gate.*; 4 import gate.util.*; 5 import gate.creole.*; 6 import gate.event.*; 7 import java.util.*; 8 9 /** 10 * A composed tokeniser containing a {@link SimpleTokeniser} and a 11 * {@link gate.creole.Transducer}. 12 * The simple tokeniser tokenises the document and the transducer processes its 13 * output. 14 */ 15 public class DefaultTokeniser extends AbstractLanguageAnalyser { 16 17 public static final String 18 DEF_TOK_DOCUMENT_PARAMETER_NAME = "document"; 19 20 public static final String 21 DEF_TOK_ANNOT_SET_PARAMETER_NAME = "annotationSetName"; 22 23 public static final String 24 DEF_TOK_TOKRULES_URL_PARAMETER_NAME = "tokeniserRulesURL"; 25 26 public static final String 27 DEF_TOK_GRAMRULES_URL_PARAMETER_NAME = "transducerGrammarURL"; 28 29 public static final String 30 DEF_TOK_ENCODING_PARAMETER_NAME = "encoding"; 31 32 public DefaultTokeniser() { 33 } 34 35 36 /** Initialise this resource, and return it. */ 37 public Resource init() throws ResourceInstantiationException{ 38 try{ 39 //init super object 40 super.init(); 41 //create all the componets 42 FeatureMap params; 43 FeatureMap features; 44 45 //tokeniser 46 fireStatusChanged("Creating a tokeniser"); 47 params = Factory.newFeatureMap(); 48 if(tokeniserRulesURL != null) 49 params.put(SimpleTokeniser.SIMP_TOK_RULES_URL_PARAMETER_NAME, 50 tokeniserRulesURL); 51 params.put(SimpleTokeniser.SIMP_TOK_ENCODING_PARAMETER_NAME, encoding); 52 if(DEBUG) Out.prln("Parameters for the tokeniser: \n" + params); 53 features = Factory.newFeatureMap(); 54 Gate.setHiddenAttribute(features, true); 55 tokeniser = (SimpleTokeniser)Factory.createResource( 56 "gate.creole.tokeniser.SimpleTokeniser", 57 params, features); 58 tokeniser.setName("Tokeniser " + System.currentTimeMillis()); 59 60 fireProgressChanged(50); 61 62 //transducer 63 fireStatusChanged("Creating a Jape transducer"); 64 params.clear(); 65 if(transducerGrammarURL != null) 66 params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, 67 transducerGrammarURL); 68 params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, encoding); 69 if(DEBUG) Out.prln("Parameters for the transducer: \n" + params); 70 features.clear(); 71 Gate.setHiddenAttribute(features, true); 72 transducer = (Transducer)Factory.createResource("gate.creole.Transducer", 73 params, features); 74 fireProgressChanged(100); 75 fireProcessFinished(); 76 transducer.setName("Transducer " + System.currentTimeMillis()); 77 }catch(ResourceInstantiationException rie){ 78 throw rie; 79 }catch(Exception e){ 80 throw new ResourceInstantiationException(e); 81 } 82 return this; 83 } 84 85 public void execute() throws ExecutionException{ 86 interrupted = false; 87 //set the parameters 88 try{ 89 FeatureMap params = Factory.newFeatureMap(); 90 fireProgressChanged(0); 91 //tokeniser 92 params.put(SimpleTokeniser.SIMP_TOK_DOCUMENT_PARAMETER_NAME, document); 93 params.put( 94 SimpleTokeniser.SIMP_TOK_ANNOT_SET_PARAMETER_NAME, annotationSetName); 95 tokeniser.setParameterValues(params); 96 97 //transducer 98 params.clear(); 99 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, document); 100 params.put(Transducer.TRANSD_INPUT_AS_PARAMETER_NAME, annotationSetName); 101 params.put(Transducer.TRANSD_OUTPUT_AS_PARAMETER_NAME, annotationSetName); 102 transducer.setParameterValues(params); 103 }catch(ResourceInstantiationException rie){ 104 throw new ExecutionException(rie); 105 } 106 107 ProgressListener pListener = null; 108 StatusListener sListener = null; 109 fireProgressChanged(5); 110 pListener = new IntervalProgressListener(5, 50); 111 sListener = new StatusListener(){ 112 public void statusChanged(String text){ 113 fireStatusChanged(text); 114 } 115 }; 116 117 //tokeniser 118 if(isInterrupted()) throw new ExecutionInterruptedException( 119 "The execution of the \"" + getName() + 120 "\" tokeniser has been abruptly interrupted!"); 121 tokeniser.addProgressListener(pListener); 122 tokeniser.addStatusListener(sListener); 123 try{ 124 tokeniser.execute(); 125 }catch(ExecutionInterruptedException eie){ 126 throw new ExecutionInterruptedException( 127 "The execution of the \"" + getName() + 128 "\" tokeniser has been abruptly interrupted!"); 129 } 130 tokeniser.removeProgressListener(pListener); 131 tokeniser.removeStatusListener(sListener); 132 133 //transducer 134 if(isInterrupted()) throw new ExecutionInterruptedException( 135 "The execution of the \"" + getName() + 136 "\" tokeniser has been abruptly interrupted!"); 137 pListener = new IntervalProgressListener(50, 100); 138 transducer.addProgressListener(pListener); 139 transducer.addStatusListener(sListener); 140 141 transducer.execute(); 142 transducer.removeProgressListener(pListener); 143 transducer.removeStatusListener(sListener); 144 }//execute 145 146 147 /** 148 * Notifies all the PRs in this controller that they should stop their 149 * execution as soon as possible. 150 */ 151 public synchronized void interrupt(){ 152 interrupted = true; 153 tokeniser.interrupt(); 154 transducer.interrupt(); 155 } 156 157 public void setTokeniserRulesURL(java.net.URL tokeniserRulesURL) { 158 this.tokeniserRulesURL = tokeniserRulesURL; 159 } 160 public java.net.URL getTokeniserRulesURL() { 161 return tokeniserRulesURL; 162 } 163 public void setEncoding(String encoding) { 164 this.encoding = encoding; 165 } 166 public String getEncoding() { 167 return encoding; 168 } 169 public void setTransducerGrammarURL(java.net.URL transducerGrammarURL) { 170 this.transducerGrammarURL = transducerGrammarURL; 171 } 172 public java.net.URL getTransducerGrammarURL() { 173 return transducerGrammarURL; 174 } 175 // init() 176 177 private static final boolean DEBUG = false; 178 179 /** the simple tokeniser used for tokenisation*/ 180 protected SimpleTokeniser tokeniser; 181 182 /** the transducer used for post-processing*/ 183 protected Transducer transducer; 184 private java.net.URL tokeniserRulesURL; 185 private String encoding; 186 private java.net.URL transducerGrammarURL; 187 private String annotationSetName; 188 189 190 public void setAnnotationSetName(String annotationSetName) { 191 this.annotationSetName = annotationSetName; 192 } 193 public String getAnnotationSetName() { 194 return annotationSetName; 195 }/////////class CustomProgressListener implements ProgressListener 196 }
|
DefaultTokeniser |
|