|
Document |
|
1 /* 2 * Document.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Hamish Cunningham, 19/Jan/2000 12 * 13 * $Id: Document.java,v 1.37 2002/03/06 17:15:37 kalina Exp $ 14 */ 15 16 package gate; 17 18 import java.util.*; 19 import java.net.*; 20 21 import gate.util.*; 22 import gate.event.*; 23 24 25 /** Represents the commonalities between all sorts of documents. 26 */ 27 public interface Document extends LanguageResource, Comparable { 28 29 /** 30 * The parameter name for the document URL 31 */ 32 public static final String 33 DOCUMENT_URL_PARAMETER_NAME = "sourceUrl"; 34 35 /** 36 * The parameter name that determines whether or not a document is markup aware 37 */ 38 public static final String 39 DOCUMENT_MARKUP_AWARE_PARAMETER_NAME = "markupAware"; 40 41 public static final String 42 DOCUMENT_ENCODING_PARAMETER_NAME = "encoding"; 43 44 public static final String 45 DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME = "preserveOriginalContent"; 46 47 public static final String 48 DOCUMENT_STRING_CONTENT_PARAMETER_NAME = "stringContent"; 49 50 public static final String 51 DOCUMENT_REPOSITIONING_PARAMETER_NAME = "collectRepositioningInfo"; 52 53 public static final String 54 DOCUMENT_START_OFFSET_PARAMETER_NAME = "sourceUrlStartOffset"; 55 56 public static final String 57 DOCUMENT_END_OFFSET_PARAMETER_NAME = "sourceUrlEndOffset"; 58 59 /** Documents are identified by URLs 60 */ 61 public URL getSourceUrl(); 62 63 /** Set method for the document's URL 64 */ 65 public void setSourceUrl(URL sourceUrl); 66 67 /** Documents may be packed within files; in this case an optional pair of 68 * offsets refer to the location of the document. 69 */ 70 public Long[] getSourceUrlOffsets(); 71 72 /** Documents may be packed within files; in this case an optional pair of 73 * offsets refer to the location of the document. This method gets the 74 * start offset. 75 */ 76 public Long getSourceUrlStartOffset(); 77 78 /** Documents may be packed within files; in this case an optional pair of 79 * offsets refer to the location of the document. This method gets the 80 * end offset. 81 */ 82 public Long getSourceUrlEndOffset(); 83 84 /** The content of the document: wraps e.g. String for text; MPEG for 85 * video; etc. 86 */ 87 public DocumentContent getContent(); 88 89 /** Set method for the document content 90 */ 91 public void setContent(DocumentContent newContent); 92 93 /** Get the default set of annotations. The set is created if it 94 * doesn't exist yet. 95 */ 96 public AnnotationSet getAnnotations(); 97 98 /** Get a named set of annotations. Creates a new set if one with this 99 * name doesn't exist yet. 100 */ 101 public AnnotationSet getAnnotations(String name); 102 103 /** Returns a map with the named annotation sets 104 */ 105 public Map getNamedAnnotationSets(); 106 107 /** 108 * Removes one of the named annotation sets. 109 * Note that the default annotation set cannot be removed. 110 * @param name the name of the annotation set to be removed 111 */ 112 public void removeAnnotationSet(String name); 113 114 /** Make the document markup-aware. This will trigger the creation 115 * of a DocumentFormat object at Document initialisation time; the 116 * DocumentFormat object will unpack the markup in the Document and 117 * add it as annotations. Documents are <B>not</B> markup-aware by default. 118 * 119 * @param b markup awareness status. 120 */ 121 public void setMarkupAware(Boolean b); 122 123 /** Get the markup awareness status of the Document. 124 * 125 * @return whether the Document is markup aware. 126 */ 127 public Boolean getMarkupAware(); 128 129 /** 130 * Allow/disallow preserving of the original document content. 131 * If is <B>true</B> the original content will be retrieved from 132 * the DocumentContent object and preserved as document feature. 133 */ 134 public void setPreserveOriginalContent(Boolean b); 135 136 /** Get the preserving of content status of the Document. 137 * 138 * @return whether the Document should preserve it's original content. 139 */ 140 public Boolean getPreserveOriginalContent(); 141 142 /** 143 * Allow/disallow collecting of repositioning information. 144 * If is <B>true</B> information will be retrieved and preserved 145 * as document feature.<BR> 146 * Preserving of repositioning information give the possibilities 147 * for converting of coordinates between the original document content and 148 * extracted from the document text. 149 */ 150 public void setCollectRepositioningInfo(Boolean b); 151 152 /** Get the collectiong and preserving of repositioning information 153 * for the Document. <BR> 154 * Preserving of repositioning information give the possibilities 155 * for converting of coordinates between the original document content and 156 * extracted from the document text. 157 * 158 * @return whether the Document should collect and preserve information. 159 */ 160 public Boolean getCollectRepositioningInfo(); 161 162 /** Returns a GateXml document. This document is actually a serialization of 163 * a Gate Document in XML. 164 * @return a string representing a Gate Xml document 165 */ 166 public String toXml(); 167 168 /** Returns an XML document aming to preserve the original markups( 169 * the original markup will be in the same place and format as it was 170 * before processing the document) and include (if possible) 171 * the annotations specified in the aSourceAnnotationSet. 172 * <b>Warning:</b> Annotations from the aSourceAnnotationSet will be lost 173 * if they will cause a crosed over situation. 174 * @param aSourceAnnotationSet is an annotation set containing all the 175 * annotations that will be combined with the original marup set. 176 * @param includeFeatures determines whether or not features and gate IDs 177 * of the annotations should be included as attributes on the tags or not. 178 * If false, then only the annotation types are exported as tags, with no 179 * attributes. 180 * @return a string representing an XML document containing the original 181 * markup + dumped annotations form the aSourceAnnotationSet 182 */ 183 public String toXml(Set aSourceAnnotationSet, boolean includeFeatures); 184 185 /** 186 * Equivalent to toXml(aSourceAnnotationSet, true). 187 */ 188 public String toXml(Set aSourceAnnotationSet); 189 190 /** Make changes to the content. 191 */ 192 public void edit(Long start, Long end, DocumentContent replacement) 193 throws InvalidOffsetException; 194 195 /** 196 * Adds a {@link gate.event.DocumentListener} to this document. 197 * All the registered listeners will be notified of changes occured to the 198 * document. 199 */ 200 public void addDocumentListener(DocumentListener l); 201 202 /** 203 * Removes one of the previously registered document listeners. 204 */ 205 public void removeDocumentListener(DocumentListener l); 206 207 208 /** Documents may be packed within files; in this case an optional pair of 209 * offsets refer to the location of the document. This method sets the 210 * end offset. 211 */ 212 public void setSourceUrlEndOffset(Long sourceUrlEndOffset); 213 214 215 /** Documents may be packed within files; in this case an optional pair of 216 * offsets refer to the location of the document. This method sets the 217 * start offset. 218 */ 219 public void setSourceUrlStartOffset(Long sourceUrlStartOffset); 220 221 } // interface Document 222 223
|
Document |
|