|
SentenceSplitter |
|
1 /* 2 * Copyright (c) 1998-2001, The University of Sheffield. 3 * 4 * This file is part of GATE (see http://gate.ac.uk/), and is free 5 * software, licenced under the GNU Library General Public License, 6 * Version 2, June 1991 (in the distribution as file licence.html, 7 * and also available at http://gate.ac.uk/gate/licence.html). 8 * 9 * Valentin Tablan, 01 Feb 2000 10 * 11 * $Id: SentenceSplitter.java,v 1.21 2001/12/03 10:57:01 valyt Exp $ 12 */ 13 14 15 package gate.creole.splitter; 16 17 import gate.*; 18 import gate.util.*; 19 import gate.event.*; 20 import gate.creole.tokeniser.*; 21 import gate.creole.gazetteer.*; 22 import gate.creole.*; 23 24 import java.util.*; 25 /** 26 * A sentence splitter. This is module similar to a 27 * {@link gate.creole.nerc.Nerc} in the fact that it conatins a tokeniser, a 28 * gazetteer and a Jape grammar. This class is used so we can have a different 29 * entry in the creole.xml file describing the default resources and to add 30 * some minor processing after running the components in order to extract the 31 * results in a usable form. 32 */ 33 public class SentenceSplitter extends AbstractLanguageAnalyser{ 34 35 public Resource init()throws ResourceInstantiationException{ 36 //create all the componets 37 FeatureMap params; 38 FeatureMap features; 39 40 //gazetteer 41 fireStatusChanged("Creating the gazetteer"); 42 params = Factory.newFeatureMap(); 43 if(gazetteerListsURL != null) params.put("listsURL", 44 gazetteerListsURL); 45 params.put("encoding", encoding); 46 features = Factory.newFeatureMap(); 47 Gate.setHiddenAttribute(features, true); 48 49 50 gazetteer = (DefaultGazetteer)Factory.createResource( 51 "gate.creole.gazetteer.DefaultGazetteer", 52 params, features); 53 gazetteer.setName("Gazetteer " + System.currentTimeMillis()); 54 fireProgressChanged(10); 55 56 //transducer 57 fireStatusChanged("Creating the JAPE transducer"); 58 59 params = Factory.newFeatureMap(); 60 if(transducerURL != null) params.put("grammarURL", transducerURL); 61 params.put("encoding", encoding); 62 features = Factory.newFeatureMap(); 63 Gate.setHiddenAttribute(features, true); 64 65 transducer = (Transducer)Factory.createResource( 66 "gate.creole.Transducer", 67 params, features); 68 transducer.setName("Transducer " + System.currentTimeMillis()); 69 70 fireProgressChanged(100); 71 fireProcessFinished(); 72 73 return this; 74 } 75 76 public void execute() throws ExecutionException{ 77 interrupted = false; 78 //set the runtime parameters 79 FeatureMap params; 80 if(inputASName != null && inputASName.equals("")) inputASName = null; 81 if(outputASName != null && outputASName.equals("")) outputASName = null; 82 try{ 83 fireProgressChanged(0); 84 params = Factory.newFeatureMap(); 85 params.put("document", document); 86 params.put("annotationSetName", inputASName); 87 gazetteer.setParameterValues(params); 88 89 params = Factory.newFeatureMap(); 90 params.put("document", document); 91 params.put("inputASName", inputASName); 92 params.put("outputASName", inputASName); 93 transducer.setParameterValues(params); 94 }catch(Exception e){ 95 throw new ExecutionException(e); 96 } 97 ProgressListener pListener = null; 98 StatusListener sListener = null; 99 fireProgressChanged(5); 100 101 //run the gazetteer 102 if(isInterrupted()) throw new ExecutionInterruptedException( 103 "The execution of the \"" + getName() + 104 "\" sentence splitter has been abruptly interrupted!"); 105 pListener = new IntervalProgressListener(5, 10); 106 sListener = new StatusListener(){ 107 public void statusChanged(String text){ 108 fireStatusChanged(text); 109 } 110 }; 111 gazetteer.addProgressListener(pListener); 112 gazetteer.addStatusListener(sListener); 113 gazetteer.execute(); 114 gazetteer.removeProgressListener(pListener); 115 gazetteer.removeStatusListener(sListener); 116 117 //run the transducer 118 if(isInterrupted()) throw new ExecutionInterruptedException( 119 "The execution of the \"" + getName() + 120 "\" sentence splitter has been abruptly interrupted!"); 121 pListener = new IntervalProgressListener(11, 90); 122 transducer.addProgressListener(pListener); 123 transducer.addStatusListener(sListener); 124 transducer.execute(); 125 transducer.removeProgressListener(pListener); 126 transducer.removeStatusListener(sListener); 127 128 //get pointers to the annotation sets 129 AnnotationSet inputAS = (inputASName == null) ? 130 document.getAnnotations() : 131 document.getAnnotations(inputASName); 132 133 AnnotationSet outputAS = (outputASName == null) ? 134 document.getAnnotations() : 135 document.getAnnotations(outputASName); 136 137 //copy the results to the output set if they are different 138 if(inputAS != outputAS){ 139 outputAS.addAll(inputAS.get("Sentence")); 140 } 141 142 //create one big sentence if none were found 143 AnnotationSet sentences = outputAS.get("Sentence"); 144 if(sentences == null || sentences.isEmpty()){ 145 outputAS.add(outputAS.firstNode(), outputAS.lastNode(), 146 "Sentence", Factory.newFeatureMap());; 147 } 148 fireProcessFinished(); 149 }//execute() 150 151 /** 152 * Notifies all the PRs in this controller that they should stop their 153 * execution as soon as possible. 154 */ 155 public synchronized void interrupt(){ 156 interrupted = true; 157 gazetteer.interrupt(); 158 transducer.interrupt(); 159 } 160 161 public void setTransducerURL(java.net.URL newTransducerURL) { 162 transducerURL = newTransducerURL; 163 } 164 public java.net.URL getTransducerURL() { 165 return transducerURL; 166 } 167 DefaultGazetteer gazetteer; 168 Transducer transducer; 169 private java.net.URL transducerURL; 170 private String encoding; 171 private java.net.URL gazetteerListsURL; 172 173 174 public void setEncoding(String newEncoding) { 175 encoding = newEncoding; 176 } 177 public String getEncoding() { 178 return encoding; 179 } 180 public void setGazetteerListsURL(java.net.URL newGazetteerListsURL) { 181 gazetteerListsURL = newGazetteerListsURL; 182 } 183 public java.net.URL getGazetteerListsURL() { 184 return gazetteerListsURL; 185 } 186 public void setInputASName(String newInputASName) { 187 inputASName = newInputASName; 188 } 189 190 public String getInputASName() { 191 return inputASName; 192 } 193 public void setOutputASName(String newOutputASName) { 194 outputASName = newOutputASName; 195 } 196 public String getOutputASName() { 197 return outputASName; 198 } 199 200 201 202 private static final boolean DEBUG = false; 203 private String inputASName; 204 private String outputASName; 205 }//public class SentenceSplitter extends Nerc
|
SentenceSplitter |
|