|
WebAnnie |
|
1 package gate.util.web; 2 3 import java.util.*; 4 import java.io.*; 5 import java.net.*; 6 7 import gate.*; 8 import gate.creole.*; 9 import gate.util.*; 10 import gate.gui.*; 11 12 import javax.servlet.*; 13 14 /** 15 * This class is designed to demonstrate ANNIE in a web context. It should be 16 * called from either a servlet or a JSP. 17 */ 18 public class WebAnnie { 19 20 public static final String GATE_INIT_KEY = "gate.init"; 21 public static final String ANNIE_CONTROLLER_KEY = "annie.controller"; 22 23 /** The Corpus Pipeline application to contain ANNIE */ 24 private SerialAnalyserController annieController; 25 26 private String filePath = ""; 27 28 /** 29 * Initialise the ANNIE system. This creates a "corpus pipeline" 30 * application that can be used to run sets of documents through 31 * the extraction system. 32 */ 33 private void initAnnie() throws GateException { 34 35 // create a serial analyser controller to run ANNIE with 36 annieController = (SerialAnalyserController) 37 Factory.createResource("gate.creole.SerialAnalyserController", 38 Factory.newFeatureMap(), 39 Factory.newFeatureMap(), 40 "ANNIE_" + Gate.genSym() 41 ); 42 43 // Load tokenizer 44 ProcessingResource tokeniser = (ProcessingResource) 45 Factory.createResource("gate.creole.tokeniser.DefaultTokeniser", 46 Factory.newFeatureMap()); 47 48 annieController.add(tokeniser); 49 50 // Load sentence splitter 51 ProcessingResource split = (ProcessingResource) 52 Factory.createResource("gate.creole.splitter.SentenceSplitter", 53 Factory.newFeatureMap()); 54 55 annieController.add(split); 56 57 // Load POS tagger 58 ProcessingResource postagger = (ProcessingResource) 59 Factory.createResource("gate.creole.POSTagger", 60 Factory.newFeatureMap()); 61 62 annieController.add(postagger); 63 64 65 // Load Gazetteer -- this is a two step process 66 FeatureMap gazetteerFeatures = Factory.newFeatureMap(); 67 gazetteerFeatures.put("encoding","ISO-8859-1"); 68 69 // Step one: Locate the gazetteer file 70 try { 71 URL gazetteerURL = 72 new URL("jar:file:" + filePath + 73 "muse.jar!/muse/resources/gazetteer/lists.def"); 74 gazetteerFeatures.put("listsURL", gazetteerURL); 75 } catch(MalformedURLException e) { 76 e.printStackTrace(); 77 } 78 79 // Step two: Load the gazetteer from the file 80 ProcessingResource gazetteer = (ProcessingResource) 81 Factory.createResource("gate.creole.gazetteer.DefaultGazetteer", 82 gazetteerFeatures); 83 84 annieController.add(gazetteer); 85 86 // Load Grammar -- similar to gazetteer 87 FeatureMap grammarFeatures = Factory.newFeatureMap(); 88 89 try { 90 URL grammarURL = 91 new URL("jar:file:" + filePath + 92 "muse.jar!/muse/resources/grammar/main/main.jape"); 93 grammarFeatures.put("grammarURL", grammarURL); 94 } catch(MalformedURLException e) { 95 e.printStackTrace(); 96 } 97 98 ProcessingResource grammar = (ProcessingResource) 99 Factory.createResource("gate.creole.ANNIETransducer", 100 grammarFeatures); 101 102 annieController.add(grammar); 103 104 // Load Ortho Matcher 105 ProcessingResource orthoMatcher = (ProcessingResource) 106 Factory.createResource("gate.creole.orthomatcher.OrthoMatcher", 107 Factory.newFeatureMap()); 108 109 annieController.add(orthoMatcher); 110 111 } // initAnnie() 112 113 /** 114 * This method should be called from a servlet or JSP. 115 * @param app The current servlet context, eg the JSP implicit variable "application" 116 * @param url The url of the file to be analysed 117 * @param annotations An array of annotations 118 */ 119 public String process(ServletContext app, String url, String[] annotations) 120 throws GateException, IOException { 121 122 if (app.getAttribute(GATE_INIT_KEY) == null) { 123 Gate.setLocalWebServer(false); 124 Gate.setNetConnected(false); 125 126 System.setProperty("java.protocol.handler.pkgs", 127 "gate.util.protocols"); 128 129 // Do the deed 130 Gate.init(); 131 132 app.setAttribute(GATE_INIT_KEY, "true"); 133 } 134 135 if (app.getAttribute(ANNIE_CONTROLLER_KEY) == null) { 136 // initialise ANNIE (this may take several minutes) 137 138 filePath = app.getInitParameter("muse.path"); 139 this.initAnnie(); 140 141 app.setAttribute(ANNIE_CONTROLLER_KEY, annieController); 142 } 143 else { 144 annieController = (SerialAnalyserController) 145 app.getAttribute(ANNIE_CONTROLLER_KEY); 146 } 147 148 149 // create a GATE corpus and add a document from the URL specified 150 Corpus corpus = 151 (Corpus) Factory.createResource("gate.corpora.CorpusImpl"); 152 URL u = new URL(url); 153 FeatureMap params = Factory.newFeatureMap(); 154 params.put("sourceUrl", u); 155 156 Document doc = (Document) 157 Factory.createResource("gate.corpora.DocumentImpl", params); 158 corpus.add(doc); 159 160 161 // tell the pipeline about the corpus and run it 162 annieController.setCorpus(corpus); 163 annieController.execute(); 164 165 // Get XML marked up document 166 AnnotationSet defaultAnnotSet = doc.getAnnotations(); 167 Set annotTypesRequired = new HashSet(); 168 169 if (annotations != null) { 170 for (int i=0;i<annotations.length;i++) { 171 annotTypesRequired.add(annotations[i]); 172 } 173 AnnotationSet selectedAnnotations = 174 defaultAnnotSet.get(annotTypesRequired); 175 return doc.toXml(selectedAnnotations, true); 176 } 177 else { 178 return doc.toXml(); 179 } 180 181 } // process 182 183 } // class WebAnnie 184
|
WebAnnie |
|