1
13
14
15 package gate.creole.splitter;
16
17 import gate.*;
18 import gate.creole.*;
19 import gate.creole.gazetteer.DefaultGazetteer;
20 import gate.event.ProgressListener;
21 import gate.event.StatusListener;
22 import gate.util.InvalidOffsetException;
23
30 public class SentenceSplitter extends AbstractLanguageAnalyser{
31
32 public static final String
33 SPLIT_DOCUMENT_PARAMETER_NAME = "document";
34
35 public static final String
36 SPLIT_INPUT_AS_PARAMETER_NAME = "inputASName";
37
38 public static final String
39 SPLIT_OUTPUT_AS_PARAMETER_NAME = "outputASName";
40
41 public static final String
42 SPLIT_ENCODING_PARAMETER_NAME = "encoding";
43
44 public static final String
45 SPLIT_GAZ_URL_PARAMETER_NAME = "gazetteerListsURL";
46
47 public static final String
48 SPLIT_TRANSD_URL_PARAMETER_NAME = "transducerURL";
49
50 public Resource init()throws ResourceInstantiationException{
51 FeatureMap params;
53 FeatureMap features;
54
55 fireStatusChanged("Creating the gazetteer");
57 params = Factory.newFeatureMap();
58 if(gazetteerListsURL != null)
59 params.put(DefaultGazetteer.DEF_GAZ_LISTS_URL_PARAMETER_NAME,
60 gazetteerListsURL);
61 params.put(DefaultGazetteer.DEF_GAZ_ENCODING_PARAMETER_NAME, encoding);
62 features = Factory.newFeatureMap();
63 Gate.setHiddenAttribute(features, true);
64
65
66 gazetteer = (DefaultGazetteer)Factory.createResource(
67 "gate.creole.gazetteer.DefaultGazetteer",
68 params, features);
69 gazetteer.setName("Gazetteer " + System.currentTimeMillis());
70 fireProgressChanged(10);
71
72 fireStatusChanged("Creating the JAPE transducer");
74
75 params = Factory.newFeatureMap();
76 if(transducerURL != null)
77 params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, transducerURL);
78 params.put(Transducer.TRANSD_ENCODING_PARAMETER_NAME, encoding);
79 features = Factory.newFeatureMap();
80 Gate.setHiddenAttribute(features, true);
81
82 transducer = (Transducer)Factory.createResource(
83 "gate.creole.Transducer",
84 params, features);
85 transducer.setName("Transducer " + System.currentTimeMillis());
86
87 fireProgressChanged(100);
88 fireProcessFinished();
89
90 return this;
91 }
92
93 public void execute() throws ExecutionException{
94 interrupted = false;
95 FeatureMap params;
97 if(inputASName != null && inputASName.equals("")) inputASName = null;
98 if(outputASName != null && outputASName.equals("")) outputASName = null;
99 try{
100 fireProgressChanged(0);
101 params = Factory.newFeatureMap();
102 params.put(DefaultGazetteer.DEF_GAZ_DOCUMENT_PARAMETER_NAME, document);
103 params.put(DefaultGazetteer.DEF_GAZ_ANNOT_SET_PARAMETER_NAME, inputASName);
104 gazetteer.setParameterValues(params);
105
106 params = Factory.newFeatureMap();
107 params.put(Transducer.TRANSD_DOCUMENT_PARAMETER_NAME, document);
108 params.put(Transducer.TRANSD_INPUT_AS_PARAMETER_NAME, inputASName);
109 params.put(Transducer.TRANSD_OUTPUT_AS_PARAMETER_NAME, inputASName);
110 transducer.setParameterValues(params);
111 }catch(Exception e){
112 throw new ExecutionException(e);
113 }
114 ProgressListener pListener = null;
115 StatusListener sListener = null;
116 fireProgressChanged(5);
117
118 if(isInterrupted()) throw new ExecutionInterruptedException(
120 "The execution of the \"" + getName() +
121 "\" sentence splitter has been abruptly interrupted!");
122 pListener = new IntervalProgressListener(5, 10);
123 sListener = new StatusListener(){
124 public void statusChanged(String text){
125 fireStatusChanged(text);
126 }
127 };
128 gazetteer.addProgressListener(pListener);
129 gazetteer.addStatusListener(sListener);
130 gazetteer.execute();
131 gazetteer.removeProgressListener(pListener);
132 gazetteer.removeStatusListener(sListener);
133
134 if(isInterrupted()) throw new ExecutionInterruptedException(
136 "The execution of the \"" + getName() +
137 "\" sentence splitter has been abruptly interrupted!");
138 pListener = new IntervalProgressListener(11, 90);
139 transducer.addProgressListener(pListener);
140 transducer.addStatusListener(sListener);
141 transducer.execute();
142 transducer.removeProgressListener(pListener);
143 transducer.removeStatusListener(sListener);
144
145 AnnotationSet inputAS = (inputASName == null) ?
147 document.getAnnotations() :
148 document.getAnnotations(inputASName);
149
150 AnnotationSet outputAS = (outputASName == null) ?
151 document.getAnnotations() :
152 document.getAnnotations(outputASName);
153
154 if(inputAS != outputAS){
156 outputAS.addAll(inputAS.get(SENTENCE_ANNOTATION_TYPE));
157 }
158
159 AnnotationSet sentences = outputAS.get(SENTENCE_ANNOTATION_TYPE);
161 if(sentences == null || sentences.isEmpty()){
162 outputAS.add(outputAS.firstNode(), outputAS.lastNode(),
163 SENTENCE_ANNOTATION_TYPE,
164 Factory.newFeatureMap());;
165 }else{
166 Long endSentences = sentences.lastNode().getOffset();
168 AnnotationSet remainingTokens = inputAS.get(TOKEN_ANNOTATION_TYPE, endSentences,
169 inputAS.lastNode().getOffset());
170 if(remainingTokens != null && !remainingTokens.isEmpty()){
171 try{
172 outputAS.add(remainingTokens.firstNode().getOffset(),
173 remainingTokens.lastNode().getOffset(),
174 SENTENCE_ANNOTATION_TYPE,
175 Factory.newFeatureMap());
176 }catch(InvalidOffsetException ioe){
177 throw new ExecutionException(ioe);
178 }
179 }
180 }
181 fireProcessFinished();
182 }
184
188 public synchronized void interrupt(){
189 interrupted = true;
190 gazetteer.interrupt();
191 transducer.interrupt();
192 }
193
194 public void setTransducerURL(java.net.URL newTransducerURL) {
195 transducerURL = newTransducerURL;
196 }
197 public java.net.URL getTransducerURL() {
198 return transducerURL;
199 }
200 DefaultGazetteer gazetteer;
201 Transducer transducer;
202 private java.net.URL transducerURL;
203 private String encoding;
204 private java.net.URL gazetteerListsURL;
205
206
207 public void setEncoding(String newEncoding) {
208 encoding = newEncoding;
209 }
210 public String getEncoding() {
211 return encoding;
212 }
213 public void setGazetteerListsURL(java.net.URL newGazetteerListsURL) {
214 gazetteerListsURL = newGazetteerListsURL;
215 }
216 public java.net.URL getGazetteerListsURL() {
217 return gazetteerListsURL;
218 }
219 public void setInputASName(String newInputASName) {
220 inputASName = newInputASName;
221 }
222
223 public String getInputASName() {
224 return inputASName;
225 }
226 public void setOutputASName(String newOutputASName) {
227 outputASName = newOutputASName;
228 }
229 public String getOutputASName() {
230 return outputASName;
231 }
232
233
234
235 private static final boolean DEBUG = false;
236 private String inputASName;
237 private String outputASName;
238 }