|
DumpingPR |
|
1 /* 2 * DumpingPR.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Kalina Bontcheva, 19/10/2001 12 * 13 * $Id: DumpingPR.java,v 1.8 2002/03/06 17:15:42 kalina Exp $ 14 */ 15 16 package gate.creole.dumpingPR; 17 18 import java.util.*; 19 import gate.*; 20 import gate.creole.*; 21 import gate.util.*; 22 import java.net.URL; 23 import java.io.*; 24 25 /** 26 * This class implements a DumpingPR which exports a given set of annotation 27 * types + the original markup, back into the document's native format. 28 * The export might also include the GATE features of those annotations or 29 * not (the default). One can also control whether the export files have a 30 * new suffix (useSuffixForDumpFiles) and what this suffix is 31 * (suffixForDumpFiles). By default, a suffix is used and it is .gate. 32 */ 33 public class DumpingPR extends AbstractLanguageAnalyser 34 implements ProcessingResource { 35 36 public static final String 37 DPR_DOCUMENT_PARAMETER_NAME = "document"; 38 39 public static final String 40 DPR_ANN_SET_PARAMETER_NAME = "annotationSetName"; 41 42 public static final String 43 DPR_ANN_TYPES_PARAMETER_NAME = "annotationTypes"; 44 45 public static final String 46 DPR_DUMP_TYPES_PARAMETER_NAME = "dumpTypes"; 47 48 public static final String 49 DPR_OUTPUR_URL_PARAMETER_NAME = "outputFileUrl"; 50 51 public static final String 52 DPR_INCLUDE_FEAT_PARAMETER_NAME = "includeFeatures"; 53 54 public static final String 55 DPR_USE_SUFFIX_PARAMETER_NAME = "useSuffixForDumpFiles"; 56 57 public static final String 58 DPR_FILE_SUFFIX_PARAMETER_NAME = "suffixForDumpFiles"; 59 60 private static final boolean DEBUG = false; 61 62 /** 63 * A list of annotation types, which are to be dumped into the output file 64 */ 65 protected List annotationTypes; 66 67 /** 68 * A list of strings specifying new names to be used instead of the original 69 * annotation types given in the annotationTypes parameter. For example, if 70 * annotationTypes was set to [Location, Date], then if dumpTypes is set to 71 * [Place, Date-expr], then the labels <Place> and <Date-expr> will be inserted 72 * instead of <Location> and <Date>. 73 */ 74 protected List dumpTypes; 75 76 /**the name of the annotation set 77 * from which to take the annotations for dumping 78 */ 79 protected String annotationSetName; 80 81 /** 82 * Whether or not to include the annotation features during export 83 */ 84 protected boolean includeFeatures = false; 85 86 /** 87 * What suffix to use for the dump files. .gate by default, but can be 88 * changed via the set method. 89 */ 90 protected String suffixForDumpFiles = ".gate"; 91 92 /** 93 * Whether or not to use the special suffix fo the dump files. True by 94 * default. 95 */ 96 protected boolean useSuffixForDumpFiles = true; 97 98 protected java.net.URL outputFileUrl; 99 100 private static final String DUMPING_PR_SET = "DumpingPRTempSet"; 101 102 /** Initialise this resource, and return it. */ 103 public Resource init() throws ResourceInstantiationException 104 { 105 return super.init(); 106 } // init() 107 108 /** 109 * Reinitialises the processing resource. After calling this method the 110 * resource should be in the state it is after calling init. 111 * If the resource depends on external resources (such as rules files) then 112 * the resource will re-read those resources. If the data used to create 113 * the resource has changed since the resource has been created then the 114 * resource will change too after calling reInit(). 115 */ 116 public void reInit() throws ResourceInstantiationException 117 { 118 init(); 119 } // reInit() 120 121 /** Run the resource. */ 122 public void execute() throws ExecutionException { 123 124 if(document == null) 125 throw new GateRuntimeException("No document to process!"); 126 127 AnnotationSet allAnnots; 128 // get the annotations from document 129 if ((annotationSetName == null)|| (annotationSetName.equals(""))) 130 allAnnots = document.getAnnotations(); 131 else 132 allAnnots = document.getAnnotations(annotationSetName); 133 134 //if none found, print warning and exit 135 if ((allAnnots == null) || allAnnots.isEmpty()) { 136 Out.prln("DumpingPR Warning: No annotations found for export. " 137 + "Including only those from the Original markups set."); 138 write2File(null); 139 return; 140 } 141 142 //first transfer the annotation types from a list to a set 143 //don't I just hate this! 144 Set types2Export = new HashSet(); 145 for(int i=0; i<annotationTypes.size(); i++) 146 types2Export.add(annotationTypes.get(i)); 147 148 //then get the annotations for export 149 AnnotationSet annots2Export = allAnnots.get(types2Export); 150 151 //check whether we want the annotations to be renamed before 152 //export (that's what dumpTypes is for) 153 if (dumpTypes != null && !dumpTypes.isEmpty()) { 154 HashMap renameMap = new HashMap(); 155 for(int i=0; i<dumpTypes.size() && i<annotationTypes.size(); i++) { 156 //check if we have a corresponding annotationType and if yes, 157 //then add to the hash map for renaming 158 renameMap.put(annotationTypes.get(i), dumpTypes.get(i)); 159 }//for 160 //if we have to rename annotations, then do so 161 if(!renameMap.isEmpty() && annots2Export != null) 162 annots2Export = renameAnnotations(annots2Export, renameMap); 163 }//if 164 165 write2File(annots2Export); 166 document.removeAnnotationSet(this.DUMPING_PR_SET); 167 168 } // execute() 169 170 protected void write2File(AnnotationSet exportSet) { 171 File outputFile; 172 String source = (String) document.getFeatures().get("gate.SourceURL"); 173 try { 174 URL sourceURL = new URL(source); 175 StringBuffer tempBuff = new StringBuffer(sourceURL.getFile()); 176 //now append the special suffix if we want to use it 177 if (useSuffixForDumpFiles) 178 tempBuff.append(this.suffixForDumpFiles); 179 String outputPath = tempBuff.toString(); 180 if (DEBUG) 181 Out.prln(outputPath); 182 outputFile = new File(outputPath); 183 } catch (java.net.MalformedURLException ex) { 184 if (outputFileUrl != null) 185 outputFile = new File(outputFileUrl.getFile()); 186 else 187 throw new GateRuntimeException("Cannot export GATE annotations because" 188 + "document does not have a valid source URL."); 189 } 190 191 try { 192 // Prepare to write into the xmlFile using UTF-8 encoding 193 OutputStreamWriter writer = new OutputStreamWriter( 194 new FileOutputStream(outputFile),"UTF-8"); 195 196 // Write (test the toXml() method) 197 // This Action is added only when a gate.Document is created. 198 // So, is for sure that the resource is a gate.Document 199 writer.write(document.toXml(exportSet, includeFeatures)); 200 writer.flush(); 201 writer.close(); 202 } catch (IOException ex) { 203 throw new GateRuntimeException("Dumping PR: Error writing document " 204 + document.getName() + ": " 205 + ex.getMessage()); 206 } 207 208 209 }//write2File 210 211 protected AnnotationSet renameAnnotations(AnnotationSet annots2Export, 212 HashMap renameMap){ 213 Iterator iter = annots2Export.iterator(); 214 AnnotationSet as = document.getAnnotations(DUMPING_PR_SET); 215 if (!as.isEmpty()) 216 as.clear(); 217 while(iter.hasNext()) { 218 Annotation annot = (Annotation) iter.next(); 219 //first check whether this type needs to be renamed 220 //if not, continue 221 if (!renameMap.containsKey(annot.getType())) 222 renameMap.put(annot.getType(), annot.getType()); 223 try{ 224 as.add(annot.getId(), 225 annot.getStartNode().getOffset(), 226 annot.getEndNode().getOffset(), 227 (String) renameMap.get(annot.getType()), 228 annot.getFeatures()); 229 } catch (InvalidOffsetException ex) { 230 throw new GateRuntimeException("DumpingPR: " + ex.getMessage()); 231 } 232 }//while 233 return as; 234 }//renameAnnotations 235 236 237 /**get the name of the annotation set*/ 238 public String getAnnotationSetName() { 239 return annotationSetName; 240 }//getAnnotationSetName 241 242 /** set the annotation set name*/ 243 public void setAnnotationSetName(String newAnnotationSetName) { 244 annotationSetName = newAnnotationSetName; 245 }//setAnnotationSetName 246 247 public List getAnnotationTypes() { 248 return this.annotationTypes; 249 } 250 251 public void setAnnotationTypes(List newTypes) { 252 annotationTypes = newTypes; 253 } 254 255 public List getDumpTypes() { 256 return this.dumpTypes; 257 } 258 259 public void setDumpTypes(List newTypes) { 260 dumpTypes = newTypes; 261 } 262 263 public URL getOutputFileUrl() { 264 return this.outputFileUrl; 265 } 266 267 public void setOutputFileUrl(URL file) { 268 outputFileUrl = file; 269 } 270 271 public void setIncludeFeatures(Boolean inclFeatures) { 272 if (inclFeatures != null) 273 includeFeatures = inclFeatures.booleanValue(); 274 } 275 276 public Boolean getIncludeFeatures() { 277 return new Boolean(includeFeatures); 278 } 279 280 public String getSuffixForDumpFiles() { 281 return suffixForDumpFiles; 282 } 283 284 public void setSuffixForDumpFiles(String newSuffix) { 285 this.suffixForDumpFiles = newSuffix; 286 } 287 288 public Boolean getUseSuffixForDumpFiles() { 289 return new Boolean(this.useSuffixForDumpFiles); 290 } 291 292 public void setUseSuffixForDumpFiles(Boolean useOrNot) { 293 if (useOrNot != null) 294 this.useSuffixForDumpFiles = useOrNot.booleanValue(); 295 } 296 297 } // class AnnotationSetTransfer 298
|
DumpingPR |
|