1   /*
2    *  TestXml.java
3    *
4    *  Copyright (c) 1998-2004, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Cristian URSU,  8/May/2000
12   *
13   *  $Id: TestXml.java,v 1.57 2004/08/06 16:08:26 valyt Exp $
14   */
15  
16  package gate.xml;
17  
18  import java.io.File;
19  import java.net.URL;
20  import java.util.*;
21  
22  import junit.framework.*;
23  
24  import gate.*;
25  import gate.util.Files;
26  //import org.w3c.www.mime.*;
27  
28  
29  /** Test class for XML facilities
30    *
31    */
32  public class TestXml extends TestCase
33  {
34    /** Debug flag */
35    private static final boolean DEBUG = false;
36  
37    /** Construction */
38    public TestXml(String name) { super(name); }
39  
40    /** Fixture set up */
41    public void setUp() {
42    } // setUp
43  
44    public void testGateDocumentToAndFromXmlWithDifferentKindOfFormats()
45                                                                 throws Exception{
46      List urlList = new LinkedList();
47      List urlDescription = new LinkedList();
48      URL url = null;
49  
50      url = Gate.getUrl("tests/xml/xces.xml");
51      assertTrue("Coudn't create a URL object for tests/xml/xces.xml ", url != null);
52      urlList.add(url);
53      urlDescription.add(" an XML document ");
54  
55      url = Gate.getUrl("tests/xml/Sentence.xml");
56      assertTrue("Coudn't create a URL object for tests/xml/Sentence.xml",
57                                                           url != null);
58      urlList.add(url);
59      urlDescription.add(" an XML document ");
60  
61      url = Gate.getUrl("tests/html/test1.htm");
62      assertTrue("Coudn't create a URL object for tests/html/test.htm",url != null);
63      urlList.add(url);
64      urlDescription.add(" an HTML document ");
65  
66      url = Gate.getUrl("tests/rtf/Sample.rtf");
67      assertTrue("Coudn't create a URL object for defg ",url != null);
68      urlList.add(url);
69      urlDescription.add(" a RTF document ");
70  
71  
72      url = Gate.getUrl("tests/email/test2.eml");
73      assertTrue("Coudn't create a URL object for defg ",url != null);
74      urlList.add(url);
75      urlDescription.add(" an EMAIL document ");
76  
77      Iterator iter = urlList.iterator();
78      Iterator descrIter = urlDescription.iterator();
79      while(iter.hasNext()){
80        runCompleteTestWithAFormat((URL) iter.next(),(String)descrIter.next());
81      }// End While
82  
83  
84    }// testGateDocumentToAndFromXmlWithDifferentKindOfFormats
85  
86    private void runCompleteTestWithAFormat(URL url, String urlDescription)
87                                                               throws Exception{
88      // Load the xml Key Document and unpack it
89      gate.Document keyDocument = null;
90  
91      FeatureMap params = Factory.newFeatureMap();
92      params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url);
93      params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
94      keyDocument = (Document)Factory.createResource("gate.corpora.DocumentImpl",
95                                                      params);
96  
97      assertTrue("Coudn't create a GATE document instance for " +
98              url.toString() +
99              " Can't continue." , keyDocument != null);
100 
101     gate.DocumentFormat keyDocFormat = null;
102     keyDocFormat = gate.DocumentFormat.getDocumentFormat(
103       keyDocument, keyDocument.getSourceUrl()
104     );
105 
106     assertTrue("Fail to recognize " +
107             url.toString() +
108             " as being " + urlDescription + " !", keyDocFormat != null);
109 
110     // Unpack the markup
111     keyDocFormat.unpackMarkup(keyDocument);
112     // Verfy if all annotations from the default annotation set are consistent
113     gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument);
114 
115     // Save the size of the document and the number of annotations
116     long keyDocumentSize = keyDocument.getContent().size().longValue();
117     int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size();
118 
119 
120     // Export the Gate document called keyDocument as  XML, into a temp file,
121     // using UTF-8 encoding
122     File xmlFile = null;
123     xmlFile = Files.writeTempFile(keyDocument.toXml(),"UTF-8");
124     assertTrue("The temp GATE XML file is null. Can't continue.",xmlFile != null);
125 /*
126     // Prepare to write into the xmlFile using UTF-8 encoding
127     OutputStreamWriter writer = new OutputStreamWriter(
128                     new FileOutputStream(xmlFile),"UTF-8");
129     // Write (test the toXml() method)
130     writer.write(keyDocument.toXml());
131     writer.flush();
132     writer.close();
133 */
134     // Load the XML Gate document form the tmp file into memory
135     gate.Document gateDoc = null;
136     gateDoc = gate.Factory.newDocument(xmlFile.toURL());
137 
138     assertTrue("Coudn't create a GATE document instance for " +
139                 xmlFile.toURL().toString() +
140                 " Can't continue." , gateDoc != null);
141 
142     gate.DocumentFormat gateDocFormat = null;
143     gateDocFormat =
144             DocumentFormat.getDocumentFormat(gateDoc,gateDoc.getSourceUrl());
145 
146     assertTrue("Fail to recognize " +
147       xmlFile.toURL().toString() +
148       " as being a GATE XML document !", gateDocFormat != null);
149 
150     gateDocFormat.unpackMarkup(gateDoc);
151     // Verfy if all annotations from the default annotation set are consistent
152     gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc);
153 
154     // Save the size of the document snd the number of annotations
155     long gateDocSize = keyDocument.getContent().size().longValue();
156     int gateDocAnnotationSetSize = keyDocument.getAnnotations().size();
157 
158     assertTrue("Exporting as GATE XML resulted in document content size lost." +
159       " Something went wrong.", keyDocumentSize == gateDocSize);
160 
161     assertTrue("Exporting as GATE XML resulted in annotation lost." +
162       " No. of annotations missing =  " +
163       Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize),
164       keyDocumentAnnotationSetSize == gateDocAnnotationSetSize);
165 
166     //Don't need tmp Gate XML file.
167     xmlFile.delete();
168   }//runCompleteTestWithAFormat
169 
170   /** A test */
171   public void testUnpackMarkup() throws Exception{
172     // create the markupElementsMap map
173     Map markupElementsMap = null;
174     gate.Document doc = null;
175     /*
176     markupElementsMap = new HashMap();
177     // populate it
178     markupElementsMap.put ("S","Sentence");
179     markupElementsMap.put ("s","Sentence");
180     */
181     // Create the element2String map
182     Map anElement2StringMap = null;
183     anElement2StringMap = new HashMap();
184     // Populate it
185     anElement2StringMap.put("S","\n");
186     anElement2StringMap.put("s","\n");
187 
188     doc = gate.Factory.newDocument(Gate.getUrl("tests/xml/xces.xml"));
189  //doc = gate.Factory.newDocument(new URL("file:///z:/gu.xml"));
190 
191     AnnotationSet annotSet = doc.getAnnotations(
192                         GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
193     assertEquals("For "+doc.getSourceUrl()+" the number of annotations"+
194     " should be:758",758,annotSet.size());
195 
196     gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
197   } // testUnpackMarkup()
198 
199   /** Test suite routine for the test runner */
200   public static Test suite() {
201     return new TestSuite(TestXml.class);
202   } // suite
203 
204 } // class TestXml
205