1 package gate.util.web;
2
3 import java.io.IOException;
4 import java.net.MalformedURLException;
5 import java.net.URL;
6 import java.util.HashSet;
7 import java.util.Set;
8
9 import javax.servlet.ServletContext;
10
11 import gate.*;
12 import gate.creole.SerialAnalyserController;
13 import gate.util.GateException;
14
15
19 public class WebAnnie {
20
21 public static final String GATE_INIT_KEY = "gate.init";
22 public static final String ANNIE_CONTROLLER_KEY = "annie.controller";
23
24
25 private SerialAnalyserController annieController;
26
27 private String filePath = "";
28
29
34 private void initAnnie() throws GateException {
35
36 annieController = (SerialAnalyserController)
38 Factory.createResource("gate.creole.SerialAnalyserController",
39 Factory.newFeatureMap(),
40 Factory.newFeatureMap(),
41 "ANNIE_" + Gate.genSym()
42 );
43
44 ProcessingResource tokeniser = (ProcessingResource)
46 Factory.createResource("gate.creole.tokeniser.DefaultTokeniser",
47 Factory.newFeatureMap());
48
49 annieController.add(tokeniser);
50
51 ProcessingResource split = (ProcessingResource)
53 Factory.createResource("gate.creole.splitter.SentenceSplitter",
54 Factory.newFeatureMap());
55
56 annieController.add(split);
57
58 ProcessingResource postagger = (ProcessingResource)
60 Factory.createResource("gate.creole.POSTagger",
61 Factory.newFeatureMap());
62
63 annieController.add(postagger);
64
65
66 FeatureMap gazetteerFeatures = Factory.newFeatureMap();
68 gazetteerFeatures.put("encoding","ISO-8859-1");
69
70 try {
72 URL gazetteerURL =
73 new URL("jar:file:" + filePath +
74 "muse.jar!/muse/resources/gazetteer/lists.def");
75 gazetteerFeatures.put("listsURL", gazetteerURL);
76 } catch(MalformedURLException e) {
77 e.printStackTrace();
78 }
79
80 ProcessingResource gazetteer = (ProcessingResource)
82 Factory.createResource("gate.creole.gazetteer.DefaultGazetteer",
83 gazetteerFeatures);
84
85 annieController.add(gazetteer);
86
87 FeatureMap grammarFeatures = Factory.newFeatureMap();
89
90 try {
91 URL grammarURL =
92 new URL("jar:file:" + filePath +
93 "muse.jar!/muse/resources/grammar/main/main.jape");
94 grammarFeatures.put("grammarURL", grammarURL);
95 } catch(MalformedURLException e) {
96 e.printStackTrace();
97 }
98
99 ProcessingResource grammar = (ProcessingResource)
100 Factory.createResource("gate.creole.ANNIETransducer",
101 grammarFeatures);
102
103 annieController.add(grammar);
104
105 ProcessingResource orthoMatcher = (ProcessingResource)
107 Factory.createResource("gate.creole.orthomatcher.OrthoMatcher",
108 Factory.newFeatureMap());
109
110 annieController.add(orthoMatcher);
111
112 }
114
120 public String process(ServletContext app, String url, String[] annotations)
121 throws GateException, IOException {
122
123 if (app.getAttribute(GATE_INIT_KEY) == null) {
124 Gate.setLocalWebServer(false);
125 Gate.setNetConnected(false);
126
127 System.setProperty("java.protocol.handler.pkgs",
128 "gate.util.protocols");
129
130 Gate.init();
132
133 app.setAttribute(GATE_INIT_KEY, "true");
134 }
135
136 if (app.getAttribute(ANNIE_CONTROLLER_KEY) == null) {
137
139 filePath = app.getInitParameter("muse.path");
140 this.initAnnie();
141
142 app.setAttribute(ANNIE_CONTROLLER_KEY, annieController);
143 }
144 else {
145 annieController = (SerialAnalyserController)
146 app.getAttribute(ANNIE_CONTROLLER_KEY);
147 }
148
149
150 Corpus corpus =
152 (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
153 URL u = new URL(url);
154 FeatureMap params = Factory.newFeatureMap();
155 params.put("sourceUrl", u);
156
157 Document doc = (Document)
158 Factory.createResource("gate.corpora.DocumentImpl", params);
159 corpus.add(doc);
160
161
162 annieController.setCorpus(corpus);
164 annieController.execute();
165
166 AnnotationSet defaultAnnotSet = doc.getAnnotations();
168 Set annotTypesRequired = new HashSet();
169
170 String output = null;
171 if (annotations != null) {
172 for (int i=0;i<annotations.length;i++) {
173 annotTypesRequired.add(annotations[i]);
174 }
175 AnnotationSet selectedAnnotations =
176 defaultAnnotSet.get(annotTypesRequired);
177 output = doc.toXml(selectedAnnotations, true);
178 }
179 else {
180 output = doc.toXml();
181 }
182 Factory.deleteResource(doc);
184 Factory.deleteResource(corpus);
185 return output;
186 }
188 }