1   /*
2    *  Copyright (c) 1998-2001, The University of Sheffield.
3    *
4    *  This file is part of GATE (see http://gate.ac.uk/), and is free
5    *  software, licenced under the GNU Library General Public License,
6    *  Version 2, June 1991 (in the distribution as file licence.html,
7    *  and also available at http://gate.ac.uk/gate/licence.html).
8    *
9    *  Valentin Tablan, 01 Feb 2000
10   *
11   *  $Id: SentenceSplitter.java,v 1.21 2001/12/03 10:57:01 valyt Exp $
12   */
13  
14  
15  package gate.creole.splitter;
16  
17  import gate.*;
18  import gate.util.*;
19  import gate.event.*;
20  import gate.creole.tokeniser.*;
21  import gate.creole.gazetteer.*;
22  import gate.creole.*;
23  
24  import java.util.*;
25  /**
26   * A sentence splitter. This is module similar to a
27   * {@link gate.creole.nerc.Nerc} in the fact that it conatins a tokeniser, a
28   * gazetteer and a Jape grammar. This class is used so we can have a different
29   * entry in the creole.xml file describing the default resources and to add
30   * some minor processing after running the components in order to extract the
31   * results in a usable form.
32   */
33  public class SentenceSplitter extends AbstractLanguageAnalyser{
34  
35    public Resource init()throws ResourceInstantiationException{
36      //create all the componets
37      FeatureMap params;
38      FeatureMap features;
39  
40      //gazetteer
41      fireStatusChanged("Creating the gazetteer");
42      params = Factory.newFeatureMap();
43      if(gazetteerListsURL != null) params.put("listsURL",
44                                               gazetteerListsURL);
45      params.put("encoding", encoding);
46      features = Factory.newFeatureMap();
47      Gate.setHiddenAttribute(features, true);
48  
49  
50      gazetteer = (DefaultGazetteer)Factory.createResource(
51                      "gate.creole.gazetteer.DefaultGazetteer",
52                      params, features);
53      gazetteer.setName("Gazetteer " + System.currentTimeMillis());
54      fireProgressChanged(10);
55  
56      //transducer
57      fireStatusChanged("Creating the JAPE transducer");
58  
59      params = Factory.newFeatureMap();
60      if(transducerURL != null) params.put("grammarURL", transducerURL);
61      params.put("encoding", encoding);
62      features = Factory.newFeatureMap();
63      Gate.setHiddenAttribute(features, true);
64  
65      transducer = (Transducer)Factory.createResource(
66                      "gate.creole.Transducer",
67                      params, features);
68      transducer.setName("Transducer " + System.currentTimeMillis());
69  
70      fireProgressChanged(100);
71      fireProcessFinished();
72  
73      return this;
74    }
75  
76    public void execute() throws ExecutionException{
77      interrupted = false;
78      //set the runtime parameters
79      FeatureMap params;
80      if(inputASName != null && inputASName.equals("")) inputASName = null;
81      if(outputASName != null && outputASName.equals("")) outputASName = null;
82      try{
83        fireProgressChanged(0);
84        params = Factory.newFeatureMap();
85        params.put("document", document);
86        params.put("annotationSetName", inputASName);
87        gazetteer.setParameterValues(params);
88  
89        params = Factory.newFeatureMap();
90        params.put("document", document);
91        params.put("inputASName", inputASName);
92        params.put("outputASName", inputASName);
93        transducer.setParameterValues(params);
94      }catch(Exception e){
95        throw new ExecutionException(e);
96      }
97      ProgressListener pListener = null;
98      StatusListener sListener = null;
99      fireProgressChanged(5);
100 
101     //run the gazetteer
102     if(isInterrupted()) throw new ExecutionInterruptedException(
103         "The execution of the \"" + getName() +
104         "\" sentence splitter has been abruptly interrupted!");
105     pListener = new IntervalProgressListener(5, 10);
106     sListener = new StatusListener(){
107       public void statusChanged(String text){
108         fireStatusChanged(text);
109       }
110     };
111     gazetteer.addProgressListener(pListener);
112     gazetteer.addStatusListener(sListener);
113     gazetteer.execute();
114     gazetteer.removeProgressListener(pListener);
115     gazetteer.removeStatusListener(sListener);
116 
117     //run the transducer
118     if(isInterrupted()) throw new ExecutionInterruptedException(
119         "The execution of the \"" + getName() +
120         "\" sentence splitter has been abruptly interrupted!");
121     pListener = new IntervalProgressListener(11, 90);
122     transducer.addProgressListener(pListener);
123     transducer.addStatusListener(sListener);
124     transducer.execute();
125     transducer.removeProgressListener(pListener);
126     transducer.removeStatusListener(sListener);
127 
128     //get pointers to the annotation sets
129     AnnotationSet inputAS = (inputASName == null) ?
130                             document.getAnnotations() :
131                             document.getAnnotations(inputASName);
132 
133     AnnotationSet outputAS = (outputASName == null) ?
134                              document.getAnnotations() :
135                              document.getAnnotations(outputASName);
136 
137     //copy the results to the output set if they are different
138     if(inputAS != outputAS){
139       outputAS.addAll(inputAS.get("Sentence"));
140     }
141 
142     //create one big sentence if none were found
143     AnnotationSet sentences = outputAS.get("Sentence");
144     if(sentences == null || sentences.isEmpty()){
145       outputAS.add(outputAS.firstNode(), outputAS.lastNode(),
146                    "Sentence", Factory.newFeatureMap());;
147     }
148     fireProcessFinished();
149   }//execute()
150 
151   /**
152    * Notifies all the PRs in this controller that they should stop their
153    * execution as soon as possible.
154    */
155   public synchronized void interrupt(){
156     interrupted = true;
157     gazetteer.interrupt();
158     transducer.interrupt();
159   }
160 
161   public void setTransducerURL(java.net.URL newTransducerURL) {
162     transducerURL = newTransducerURL;
163   }
164   public java.net.URL getTransducerURL() {
165     return transducerURL;
166   }
167   DefaultGazetteer gazetteer;
168   Transducer transducer;
169   private java.net.URL transducerURL;
170   private String encoding;
171   private java.net.URL gazetteerListsURL;
172 
173 
174   public void setEncoding(String newEncoding) {
175     encoding = newEncoding;
176   }
177   public String getEncoding() {
178     return encoding;
179   }
180   public void setGazetteerListsURL(java.net.URL newGazetteerListsURL) {
181     gazetteerListsURL = newGazetteerListsURL;
182   }
183   public java.net.URL getGazetteerListsURL() {
184     return gazetteerListsURL;
185   }
186   public void setInputASName(String newInputASName) {
187     inputASName = newInputASName;
188   }
189 
190   public String getInputASName() {
191     return inputASName;
192   }
193   public void setOutputASName(String newOutputASName) {
194     outputASName = newOutputASName;
195   }
196   public String getOutputASName() {
197     return outputASName;
198   }
199 
200 
201 
202   private static final boolean DEBUG = false;
203   private String inputASName;
204   private String outputASName;
205 }//public class SentenceSplitter extends Nerc