1   /*
2    *  Document.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 19/Jan/2000
12   *
13   *  $Id: Document.java,v 1.37 2002/03/06 17:15:37 kalina Exp $
14   */
15  
16  package gate;
17  
18  import java.util.*;
19  import java.net.*;
20  
21  import gate.util.*;
22  import gate.event.*;
23  
24  
25  /** Represents the commonalities between all sorts of documents.
26   */
27  public interface Document extends LanguageResource, Comparable {
28  
29    /**
30     * The parameter name for the document URL
31     */
32    public static final String
33      DOCUMENT_URL_PARAMETER_NAME = "sourceUrl";
34  
35    /**
36    * The parameter name that determines whether or not a document is markup aware
37    */
38    public static final String
39      DOCUMENT_MARKUP_AWARE_PARAMETER_NAME = "markupAware";
40  
41    public static final String
42      DOCUMENT_ENCODING_PARAMETER_NAME = "encoding";
43  
44    public static final String
45      DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME = "preserveOriginalContent";
46  
47    public static final String
48      DOCUMENT_STRING_CONTENT_PARAMETER_NAME = "stringContent";
49  
50    public static final String
51      DOCUMENT_REPOSITIONING_PARAMETER_NAME = "collectRepositioningInfo";
52  
53    public static final String
54      DOCUMENT_START_OFFSET_PARAMETER_NAME = "sourceUrlStartOffset";
55  
56    public static final String
57      DOCUMENT_END_OFFSET_PARAMETER_NAME = "sourceUrlEndOffset";
58  
59    /** Documents are identified by URLs
60     */
61    public URL getSourceUrl();
62  
63    /** Set method for the document's URL
64     */
65    public void setSourceUrl(URL sourceUrl);
66  
67    /** Documents may be packed within files; in this case an optional pair of
68     *  offsets refer to the location of the document.
69     */
70    public Long[] getSourceUrlOffsets();
71  
72    /** Documents may be packed within files; in this case an optional pair of
73     *  offsets refer to the location of the document. This method gets the
74     *  start offset.
75     */
76    public Long getSourceUrlStartOffset();
77  
78    /** Documents may be packed within files; in this case an optional pair of
79     *  offsets refer to the location of the document. This method gets the
80     *  end offset.
81     */
82    public Long getSourceUrlEndOffset();
83  
84    /** The content of the document: wraps e.g. String for text; MPEG for
85     *  video; etc.
86     */
87    public DocumentContent getContent();
88  
89    /** Set method for the document content
90     */
91    public void setContent(DocumentContent newContent);
92  
93    /** Get the default set of annotations. The set is created if it
94     *  doesn't exist yet.
95     */
96    public AnnotationSet getAnnotations();
97  
98    /** Get a named set of annotations. Creates a new set if one with this
99     *  name doesn't exist yet.
100    */
101   public AnnotationSet getAnnotations(String name);
102 
103   /** Returns a map with the named annotation sets
104     */
105   public Map getNamedAnnotationSets();
106 
107   /**
108    * Removes one of the named annotation sets.
109    * Note that the default annotation set cannot be removed.
110    * @param name the name of the annotation set to be removed
111    */
112   public void removeAnnotationSet(String name);
113 
114   /** Make the document markup-aware. This will trigger the creation
115    *  of a DocumentFormat object at Document initialisation time; the
116    *  DocumentFormat object will unpack the markup in the Document and
117    *  add it as annotations. Documents are <B>not</B> markup-aware by default.
118    *
119    *  @param b markup awareness status.
120    */
121   public void setMarkupAware(Boolean b);
122 
123   /** Get the markup awareness status of the Document.
124    *
125    *  @return whether the Document is markup aware.
126    */
127   public Boolean getMarkupAware();
128 
129   /**
130    * Allow/disallow preserving of the original document content.
131    * If is <B>true</B> the original content will be retrieved from
132    * the DocumentContent object and preserved as document feature.
133    */
134   public void setPreserveOriginalContent(Boolean b);
135 
136   /** Get the preserving of content status of the Document.
137    *
138    *  @return whether the Document should preserve it's original content.
139    */
140   public Boolean getPreserveOriginalContent();
141 
142   /**
143    *  Allow/disallow collecting of repositioning information.
144    *  If is <B>true</B> information will be retrieved and preserved
145    *  as document feature.<BR>
146    *  Preserving of repositioning information give the possibilities
147    *  for converting of coordinates between the original document content and
148    *  extracted from the document text.
149    */
150   public void setCollectRepositioningInfo(Boolean b);
151 
152   /** Get the collectiong and preserving of repositioning information
153    *  for the Document. <BR>
154    *  Preserving of repositioning information give the possibilities
155    *  for converting of coordinates between the original document content and
156    *  extracted from the document text.
157    *
158    *  @return whether the Document should collect and preserve information.
159    */
160   public Boolean getCollectRepositioningInfo();
161 
162   /** Returns a GateXml document. This document is actually a serialization of
163    *  a Gate Document in XML.
164     * @return a string representing a Gate Xml document
165     */
166   public String toXml();
167 
168   /** Returns an XML document aming to preserve the original markups(
169     * the original markup will be in the same place and format as it was
170     * before processing the document) and include (if possible)
171     * the annotations specified in the aSourceAnnotationSet.
172     * <b>Warning:</b> Annotations from the aSourceAnnotationSet will be lost
173     * if they will cause a crosed over situation.
174     * @param aSourceAnnotationSet is an annotation set containing all the
175     * annotations that will be combined with the original marup set.
176     * @param includeFeatures determines whether or not features and gate IDs
177     * of the annotations should be included as attributes on the tags or not.
178     * If false, then only the annotation types are exported as tags, with no
179     * attributes.
180     * @return a string representing an XML document containing the original
181     * markup + dumped annotations form the aSourceAnnotationSet
182     */
183   public String toXml(Set aSourceAnnotationSet, boolean includeFeatures);
184 
185   /**
186    * Equivalent to toXml(aSourceAnnotationSet, true).
187    */
188   public String toXml(Set aSourceAnnotationSet);
189 
190   /** Make changes to the content.
191    */
192   public void edit(Long start, Long end, DocumentContent replacement)
193     throws InvalidOffsetException;
194 
195   /**
196    * Adds a {@link gate.event.DocumentListener} to this document.
197    * All the registered listeners will be notified of changes occured to the
198    * document.
199    */
200   public void addDocumentListener(DocumentListener l);
201 
202   /**
203    * Removes one of the previously registered document listeners.
204    */
205   public void removeDocumentListener(DocumentListener l);
206 
207 
208   /** Documents may be packed within files; in this case an optional pair of
209     * offsets refer to the location of the document. This method sets the
210     * end offset.
211     */
212   public void setSourceUrlEndOffset(Long sourceUrlEndOffset);
213 
214 
215   /** Documents may be packed within files; in this case an optional pair of
216     * offsets refer to the location of the document. This method sets the
217     * start offset.
218     */
219   public void setSourceUrlStartOffset(Long sourceUrlStartOffset);
220 
221 } // interface Document
222 
223