|
DumpingPR |
|
1 /* 2 * DumpingPR.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Kalina Bontcheva, 19/10/2001 12 * 13 * $Id: DumpingPR.java,v 1.9 2002/04/12 14:35:21 kalina Exp $ 14 */ 15 16 package gate.creole.dumpingPR; 17 18 import java.util.*; 19 import gate.*; 20 import gate.creole.*; 21 import gate.corpora.DocumentImpl; 22 import gate.util.*; 23 import java.net.URL; 24 import java.io.*; 25 26 /** 27 * This class implements a DumpingPR which exports a given set of annotation 28 * types + the original markup, back into the document's native format. 29 * The export might also include the GATE features of those annotations or 30 * not (the default). One can also control whether the export files have a 31 * new suffix (useSuffixForDumpFiles) and what this suffix is 32 * (suffixForDumpFiles). By default, a suffix is used and it is .gate. 33 */ 34 public class DumpingPR extends AbstractLanguageAnalyser 35 implements ProcessingResource { 36 37 public static final String 38 DPR_DOCUMENT_PARAMETER_NAME = "document"; 39 40 public static final String 41 DPR_ANN_SET_PARAMETER_NAME = "annotationSetName"; 42 43 public static final String 44 DPR_ANN_TYPES_PARAMETER_NAME = "annotationTypes"; 45 46 public static final String 47 DPR_DUMP_TYPES_PARAMETER_NAME = "dumpTypes"; 48 49 public static final String 50 DPR_OUTPUR_URL_PARAMETER_NAME = "outputFileUrl"; 51 52 public static final String 53 DPR_INCLUDE_FEAT_PARAMETER_NAME = "includeFeatures"; 54 55 public static final String 56 DPR_USE_SUFFIX_PARAMETER_NAME = "useSuffixForDumpFiles"; 57 58 public static final String 59 DPR_FILE_SUFFIX_PARAMETER_NAME = "suffixForDumpFiles"; 60 61 private static final boolean DEBUG = false; 62 63 /** 64 * A list of annotation types, which are to be dumped into the output file 65 */ 66 protected List annotationTypes; 67 68 /** 69 * A list of strings specifying new names to be used instead of the original 70 * annotation types given in the annotationTypes parameter. For example, if 71 * annotationTypes was set to [Location, Date], then if dumpTypes is set to 72 * [Place, Date-expr], then the labels <Place> and <Date-expr> will be inserted 73 * instead of <Location> and <Date>. 74 */ 75 protected List dumpTypes; 76 77 /**the name of the annotation set 78 * from which to take the annotations for dumping 79 */ 80 protected String annotationSetName; 81 82 /** 83 * Whether or not to include the annotation features during export 84 */ 85 protected boolean includeFeatures = false; 86 87 /** 88 * What suffix to use for the dump files. .gate by default, but can be 89 * changed via the set method. 90 */ 91 protected String suffixForDumpFiles = ".gate"; 92 93 /** 94 * Whether or not to use the special suffix fo the dump files. True by 95 * default. 96 */ 97 protected boolean useSuffixForDumpFiles = true; 98 99 protected java.net.URL outputFileUrl; 100 101 private static final String DUMPING_PR_SET = "DumpingPRTempSet"; 102 103 /** Initialise this resource, and return it. */ 104 public Resource init() throws ResourceInstantiationException 105 { 106 return super.init(); 107 } // init() 108 109 /** 110 * Reinitialises the processing resource. After calling this method the 111 * resource should be in the state it is after calling init. 112 * If the resource depends on external resources (such as rules files) then 113 * the resource will re-read those resources. If the data used to create 114 * the resource has changed since the resource has been created then the 115 * resource will change too after calling reInit(). 116 */ 117 public void reInit() throws ResourceInstantiationException 118 { 119 init(); 120 } // reInit() 121 122 /** Run the resource. */ 123 public void execute() throws ExecutionException { 124 125 if(document == null) 126 throw new GateRuntimeException("No document to process!"); 127 128 AnnotationSet allAnnots; 129 // get the annotations from document 130 if ((annotationSetName == null)|| (annotationSetName.equals(""))) 131 allAnnots = document.getAnnotations(); 132 else 133 allAnnots = document.getAnnotations(annotationSetName); 134 135 //if none found, print warning and exit 136 if ((allAnnots == null) || allAnnots.isEmpty()) { 137 Out.prln("DumpingPR Warning: No annotations found for export. " 138 + "Including only those from the Original markups set."); 139 write2File(null); 140 return; 141 } 142 143 //first transfer the annotation types from a list to a set 144 //don't I just hate this! 145 Set types2Export = new HashSet(); 146 for(int i=0; i<annotationTypes.size(); i++) 147 types2Export.add(annotationTypes.get(i)); 148 149 //then get the annotations for export 150 AnnotationSet annots2Export = allAnnots.get(types2Export); 151 152 //check whether we want the annotations to be renamed before 153 //export (that's what dumpTypes is for) 154 if (dumpTypes != null && !dumpTypes.isEmpty()) { 155 HashMap renameMap = new HashMap(); 156 for(int i=0; i<dumpTypes.size() && i<annotationTypes.size(); i++) { 157 //check if we have a corresponding annotationType and if yes, 158 //then add to the hash map for renaming 159 renameMap.put(annotationTypes.get(i), dumpTypes.get(i)); 160 }//for 161 //if we have to rename annotations, then do so 162 if(!renameMap.isEmpty() && annots2Export != null) 163 annots2Export = renameAnnotations(annots2Export, renameMap); 164 }//if 165 166 write2File(annots2Export); 167 document.removeAnnotationSet(this.DUMPING_PR_SET); 168 169 } // execute() 170 171 protected void write2File(AnnotationSet exportSet) { 172 File outputFile; 173 String source = (String) document.getFeatures().get("gate.SourceURL"); 174 try { 175 URL sourceURL = new URL(source); 176 StringBuffer tempBuff = new StringBuffer(sourceURL.getFile()); 177 //now append the special suffix if we want to use it 178 if (useSuffixForDumpFiles) 179 tempBuff.append(this.suffixForDumpFiles); 180 String outputPath = tempBuff.toString(); 181 if (DEBUG) 182 Out.prln(outputPath); 183 outputFile = new File(outputPath); 184 } catch (java.net.MalformedURLException ex) { 185 if (outputFileUrl != null) 186 outputFile = new File(outputFileUrl.getFile()); 187 else 188 throw new GateRuntimeException("Cannot export GATE annotations because" 189 + "document does not have a valid source URL."); 190 } 191 192 try { 193 // Prepare to write into the xmlFile using the doc's encoding if there 194 OutputStreamWriter writer; 195 if (document instanceof DocumentImpl) { 196 String encoding = ((DocumentImpl) document).getEncoding(); 197 if (encoding == null || "".equals(encoding)) 198 writer = new OutputStreamWriter(new FileOutputStream(outputFile)); 199 else 200 writer = new OutputStreamWriter( 201 new FileOutputStream(outputFile), encoding); 202 } else 203 writer = new OutputStreamWriter( 204 new FileOutputStream(outputFile)); 205 206 // Write (test the toXml() method) 207 // This Action is added only when a gate.Document is created. 208 // So, is for sure that the resource is a gate.Document 209 writer.write(document.toXml(exportSet, includeFeatures)); 210 writer.flush(); 211 writer.close(); 212 } catch (IOException ex) { 213 throw new GateRuntimeException("Dumping PR: Error writing document " 214 + document.getName() + ": " 215 + ex.getMessage()); 216 } 217 218 219 }//write2File 220 221 protected AnnotationSet renameAnnotations(AnnotationSet annots2Export, 222 HashMap renameMap){ 223 Iterator iter = annots2Export.iterator(); 224 AnnotationSet as = document.getAnnotations(DUMPING_PR_SET); 225 if (!as.isEmpty()) 226 as.clear(); 227 while(iter.hasNext()) { 228 Annotation annot = (Annotation) iter.next(); 229 //first check whether this type needs to be renamed 230 //if not, continue 231 if (!renameMap.containsKey(annot.getType())) 232 renameMap.put(annot.getType(), annot.getType()); 233 try{ 234 as.add(annot.getId(), 235 annot.getStartNode().getOffset(), 236 annot.getEndNode().getOffset(), 237 (String) renameMap.get(annot.getType()), 238 annot.getFeatures()); 239 } catch (InvalidOffsetException ex) { 240 throw new GateRuntimeException("DumpingPR: " + ex.getMessage()); 241 } 242 }//while 243 return as; 244 }//renameAnnotations 245 246 247 /**get the name of the annotation set*/ 248 public String getAnnotationSetName() { 249 return annotationSetName; 250 }//getAnnotationSetName 251 252 /** set the annotation set name*/ 253 public void setAnnotationSetName(String newAnnotationSetName) { 254 annotationSetName = newAnnotationSetName; 255 }//setAnnotationSetName 256 257 public List getAnnotationTypes() { 258 return this.annotationTypes; 259 } 260 261 public void setAnnotationTypes(List newTypes) { 262 annotationTypes = newTypes; 263 } 264 265 public List getDumpTypes() { 266 return this.dumpTypes; 267 } 268 269 public void setDumpTypes(List newTypes) { 270 dumpTypes = newTypes; 271 } 272 273 public URL getOutputFileUrl() { 274 return this.outputFileUrl; 275 } 276 277 public void setOutputFileUrl(URL file) { 278 outputFileUrl = file; 279 } 280 281 public void setIncludeFeatures(Boolean inclFeatures) { 282 if (inclFeatures != null) 283 includeFeatures = inclFeatures.booleanValue(); 284 } 285 286 public Boolean getIncludeFeatures() { 287 return new Boolean(includeFeatures); 288 } 289 290 public String getSuffixForDumpFiles() { 291 return suffixForDumpFiles; 292 } 293 294 public void setSuffixForDumpFiles(String newSuffix) { 295 this.suffixForDumpFiles = newSuffix; 296 } 297 298 public Boolean getUseSuffixForDumpFiles() { 299 return new Boolean(this.useSuffixForDumpFiles); 300 } 301 302 public void setUseSuffixForDumpFiles(Boolean useOrNot) { 303 if (useOrNot != null) 304 this.useSuffixForDumpFiles = useOrNot.booleanValue(); 305 } 306 307 } // class AnnotationSetTransfer 308
|
DumpingPR |
|