1   /*
2    *  DumpingPR.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Kalina Bontcheva, 19/10/2001
12   *
13   *  $Id: DumpingPR.java,v 1.8 2002/03/06 17:15:42 kalina Exp $
14   */
15  
16  package gate.creole.dumpingPR;
17  
18  import java.util.*;
19  import gate.*;
20  import gate.creole.*;
21  import gate.util.*;
22  import java.net.URL;
23  import java.io.*;
24  
25  /**
26   * This class implements a DumpingPR which exports a given set of annotation
27   * types + the original markup, back into the document's native format.
28   * The export might also include the GATE features of those annotations or
29   * not (the default). One can also control whether the export files have a
30   * new suffix (useSuffixForDumpFiles) and what this suffix is
31   * (suffixForDumpFiles). By default, a suffix is used and it is .gate.
32   */
33  public class DumpingPR extends AbstractLanguageAnalyser
34    implements ProcessingResource {
35  
36    public static final String
37      DPR_DOCUMENT_PARAMETER_NAME = "document";
38  
39    public static final String
40      DPR_ANN_SET_PARAMETER_NAME = "annotationSetName";
41  
42    public static final String
43      DPR_ANN_TYPES_PARAMETER_NAME = "annotationTypes";
44  
45    public static final String
46      DPR_DUMP_TYPES_PARAMETER_NAME = "dumpTypes";
47  
48    public static final String
49      DPR_OUTPUR_URL_PARAMETER_NAME = "outputFileUrl";
50  
51    public static final String
52      DPR_INCLUDE_FEAT_PARAMETER_NAME = "includeFeatures";
53  
54    public static final String
55      DPR_USE_SUFFIX_PARAMETER_NAME = "useSuffixForDumpFiles";
56  
57    public static final String
58      DPR_FILE_SUFFIX_PARAMETER_NAME = "suffixForDumpFiles";
59  
60    private static final boolean DEBUG = false;
61  
62    /**
63     * A list of annotation types, which are to be dumped into the output file
64     */
65    protected List annotationTypes;
66  
67    /**
68     * A list of strings specifying new names to be used instead of the original
69     * annotation types given in the annotationTypes parameter. For example, if
70     * annotationTypes was set to [Location, Date], then if dumpTypes is set to
71     * [Place, Date-expr], then the labels <Place> and <Date-expr> will be inserted
72     * instead of <Location> and <Date>.
73     */
74    protected List dumpTypes;
75  
76    /**the name of the annotation set
77     * from which to take the annotations for dumping
78     */
79    protected String annotationSetName;
80  
81    /**
82     * Whether or not to include the annotation features during export
83     */
84    protected boolean includeFeatures = false;
85  
86    /**
87     * What suffix to use for the dump files. .gate by default, but can be
88     * changed via the set method.
89     */
90    protected String suffixForDumpFiles = ".gate";
91  
92    /**
93     * Whether or not to use the special suffix fo the dump files. True by
94     * default.
95     */
96    protected boolean useSuffixForDumpFiles = true;
97  
98    protected java.net.URL outputFileUrl;
99  
100   private static final String DUMPING_PR_SET = "DumpingPRTempSet";
101 
102   /** Initialise this resource, and return it. */
103   public Resource init() throws ResourceInstantiationException
104   {
105     return super.init();
106   } // init()
107 
108   /**
109   * Reinitialises the processing resource. After calling this method the
110   * resource should be in the state it is after calling init.
111   * If the resource depends on external resources (such as rules files) then
112   * the resource will re-read those resources. If the data used to create
113   * the resource has changed since the resource has been created then the
114   * resource will change too after calling reInit().
115   */
116   public void reInit() throws ResourceInstantiationException
117   {
118     init();
119   } // reInit()
120 
121   /** Run the resource. */
122   public void execute() throws ExecutionException {
123 
124     if(document == null)
125       throw new GateRuntimeException("No document to process!");
126 
127     AnnotationSet allAnnots;
128     // get the annotations from document
129     if ((annotationSetName == null)|| (annotationSetName.equals("")))
130       allAnnots = document.getAnnotations();
131     else
132       allAnnots = document.getAnnotations(annotationSetName);
133 
134     //if none found, print warning and exit
135     if ((allAnnots == null) || allAnnots.isEmpty()) {
136       Out.prln("DumpingPR Warning: No annotations found for export. "
137                + "Including only those from the Original markups set.");
138       write2File(null);
139       return;
140     }
141 
142     //first transfer the annotation types from a list to a set
143     //don't I just hate this!
144     Set types2Export = new HashSet();
145     for(int i=0; i<annotationTypes.size(); i++)
146       types2Export.add(annotationTypes.get(i));
147 
148     //then get the annotations for export
149     AnnotationSet annots2Export = allAnnots.get(types2Export);
150 
151     //check whether we want the annotations to be renamed before
152     //export (that's what dumpTypes is for)
153     if (dumpTypes != null && !dumpTypes.isEmpty()) {
154       HashMap renameMap = new HashMap();
155       for(int i=0; i<dumpTypes.size() && i<annotationTypes.size(); i++) {
156         //check if we have a corresponding annotationType and if yes,
157         //then add to the hash map for renaming
158         renameMap.put(annotationTypes.get(i), dumpTypes.get(i));
159       }//for
160       //if we have to rename annotations, then do so
161       if(!renameMap.isEmpty() && annots2Export != null)
162         annots2Export = renameAnnotations(annots2Export, renameMap);
163     }//if
164 
165     write2File(annots2Export);
166     document.removeAnnotationSet(this.DUMPING_PR_SET);
167 
168   } // execute()
169 
170   protected void write2File(AnnotationSet exportSet) {
171     File outputFile;
172     String source = (String) document.getFeatures().get("gate.SourceURL");
173     try {
174       URL sourceURL = new URL(source);
175       StringBuffer tempBuff = new StringBuffer(sourceURL.getFile());
176       //now append the special suffix if we want to use it
177       if (useSuffixForDumpFiles)
178         tempBuff.append(this.suffixForDumpFiles);
179       String outputPath = tempBuff.toString();
180       if (DEBUG)
181         Out.prln(outputPath);
182       outputFile = new File(outputPath);
183     } catch (java.net.MalformedURLException ex) {
184       if (outputFileUrl != null)
185         outputFile = new File(outputFileUrl.getFile());
186       else
187         throw new GateRuntimeException("Cannot export GATE annotations because"
188                      + "document does not have a valid source URL.");
189     }
190 
191     try {
192       // Prepare to write into the xmlFile using UTF-8 encoding
193       OutputStreamWriter writer = new OutputStreamWriter(
194                             new FileOutputStream(outputFile),"UTF-8");
195 
196       // Write (test the toXml() method)
197       // This Action is added only when a gate.Document is created.
198       // So, is for sure that the resource is a gate.Document
199       writer.write(document.toXml(exportSet, includeFeatures));
200       writer.flush();
201       writer.close();
202     } catch (IOException ex) {
203       throw new GateRuntimeException("Dumping PR: Error writing document "
204                                      + document.getName() + ": "
205                                      + ex.getMessage());
206     }
207 
208 
209   }//write2File
210 
211   protected AnnotationSet renameAnnotations(AnnotationSet annots2Export,
212                                    HashMap renameMap){
213     Iterator iter = annots2Export.iterator();
214     AnnotationSet as = document.getAnnotations(DUMPING_PR_SET);
215     if (!as.isEmpty())
216       as.clear();
217     while(iter.hasNext()) {
218       Annotation annot = (Annotation) iter.next();
219       //first check whether this type needs to be renamed
220       //if not, continue
221       if (!renameMap.containsKey(annot.getType()))
222         renameMap.put(annot.getType(), annot.getType());
223       try{
224         as.add(annot.getId(),
225             annot.getStartNode().getOffset(),
226             annot.getEndNode().getOffset(),
227             (String) renameMap.get(annot.getType()),
228             annot.getFeatures());
229       } catch (InvalidOffsetException ex) {
230         throw new GateRuntimeException("DumpingPR: " + ex.getMessage());
231       }
232     }//while
233     return as;
234   }//renameAnnotations
235 
236 
237   /**get the name of the annotation set*/
238   public String getAnnotationSetName() {
239     return annotationSetName;
240   }//getAnnotationSetName
241 
242   /** set the annotation set name*/
243   public void setAnnotationSetName(String newAnnotationSetName) {
244     annotationSetName = newAnnotationSetName;
245   }//setAnnotationSetName
246 
247   public List getAnnotationTypes() {
248     return this.annotationTypes;
249   }
250 
251   public void setAnnotationTypes(List newTypes) {
252     annotationTypes = newTypes;
253   }
254 
255   public List getDumpTypes() {
256     return this.dumpTypes;
257   }
258 
259   public void setDumpTypes(List newTypes) {
260     dumpTypes = newTypes;
261   }
262 
263   public URL getOutputFileUrl() {
264     return this.outputFileUrl;
265   }
266 
267   public void setOutputFileUrl(URL file) {
268     outputFileUrl = file;
269   }
270 
271   public void setIncludeFeatures(Boolean inclFeatures) {
272     if (inclFeatures != null)
273       includeFeatures = inclFeatures.booleanValue();
274   }
275 
276   public Boolean getIncludeFeatures() {
277     return new Boolean(includeFeatures);
278   }
279 
280   public String getSuffixForDumpFiles() {
281     return suffixForDumpFiles;
282   }
283 
284   public void setSuffixForDumpFiles(String newSuffix) {
285     this.suffixForDumpFiles = newSuffix;
286   }
287 
288   public Boolean getUseSuffixForDumpFiles() {
289     return new Boolean(this.useSuffixForDumpFiles);
290   }
291 
292   public void setUseSuffixForDumpFiles(Boolean useOrNot) {
293     if (useOrNot != null)
294       this.useSuffixForDumpFiles = useOrNot.booleanValue();
295   }
296 
297 } // class AnnotationSetTransfer
298