1   package gate.creole.morph;
2   
3   import java.net.MalformedURLException;
4   import java.net.URL;
5   import java.util.*;
6   
7   import junit.framework.*;
8   
9   import gate.*;
10  import gate.creole.*;
11  import gate.creole.tokeniser.DefaultTokeniser;
12  import gate.util.*;
13  import gate.util.Files;
14  import gate.util.OffsetComparator;
15  
16  /**
17   * <p>Title: TestMorph </p>
18   * <p>Description: </p>
19   * <p>Copyright: Copyright (c) 2000</p>
20   * <p>Company: University Of Sheffield</p>
21   * @author not attributable
22   * @version 1.0
23   */
24  
25  public class TestMorph
26      extends TestCase {
27  
28    private Morph morpher;
29    private Document verbDocumentToTest, verbDocumentWithAnswers,
30        nounDocumentToTest, nounDocumentWithAnswers;
31    private FeatureMap params;
32    private DefaultTokeniser tokeniser;
33    private int counter = 0;
34    private int outOf = 0;
35  
36    public TestMorph(String dummy) {
37      super(dummy);
38    }
39  
40    /**
41     * This method sets up the parameters for the files to be testes
42     * It initialises the Tokenizer and sets up the other parameters for
43     * the morph program
44     */
45    protected void setUp() {
46      try{
47        // creating documents
48        verbDocumentToTest = Factory.newDocument(
49          Gate.class.getResource(Files.getResourcePath() +
50          "/gate.ac.uk/tests/morph/verbTest.dat"));
51        verbDocumentWithAnswers = Factory.newDocument(
52                Gate.class.getResource(Files.getResourcePath() +
53                "/gate.ac.uk/tests/morph/verbAnswer.dat"));
54        nounDocumentToTest = Factory.newDocument(
55                Gate.class.getResource(Files.getResourcePath() +
56                "/gate.ac.uk/tests/morph/nounTest.dat"));
57        nounDocumentWithAnswers = Factory.newDocument(
58                Gate.class.getResource(Files.getResourcePath() +
59                "/gate.ac.uk/tests/morph/nounAnswer.dat"));
60        // create the instance of (Morphological analyzer)
61        morpher = (Morph)Factory.createResource("gate.creole.morph.Morph");
62      }catch (ResourceInstantiationException rie) {
63        throw new GateRuntimeException(rie);
64  //      fail("Resources cannot be created for the test and the answer file");
65      }
66  
67  
68  
69      // set the parameters for the morpher, feature names
70      morpher.setAffixFeatureName("affix");
71      morpher.setRootFeatureName("root");
72  
73  
74      try {
75        // finally create the Tokenizer
76        tokeniser = (DefaultTokeniser) Factory.createResource(
77            "gate.creole.tokeniser.DefaultTokeniser");
78      }
79      catch (ResourceInstantiationException rie) {
80        fail("Resources cannot be created fpr tokenizers");
81      }
82    }
83  
84    /**
85     * Test the morpher on verbs, if their roots are identified correctly or not
86     */
87    public void testVerbs() {
88  
89      // run the tokenizer on the verbTestDocument
90      tokeniser.setDocument(verbDocumentToTest);
91      tokeniser.setAnnotationSetName("TokeniserAS");
92      try {
93        tokeniser.execute();
94      }
95      catch (ExecutionException ee) {
96        fail("Error while executing Tokenizer on the test document");
97      }
98  
99      // run the tokenizer on the verbAnswerDocument
100     tokeniser.setDocument(verbDocumentWithAnswers);
101     tokeniser.setAnnotationSetName("TokeniserAS");
102     try {
103       tokeniser.execute();
104     }
105     catch (ExecutionException ee) {
106       fail("Error while executing Tokenizer on the test document");
107     }
108 
109     // check both documents are processed correctly by tokeniser
110     assertTrue(!verbDocumentToTest.getAnnotations("TokeniserAS").isEmpty());
111     assertTrue(!verbDocumentWithAnswers.getAnnotations("TokeniserAS").isEmpty());
112 
113 
114     // so we have finished running the tokenizer, now we need to test the
115     // morph program to test the document
116     morpher.setDocument(verbDocumentToTest);
117 
118     // compile the rules
119     // and check that the resource is being created successfully
120     try {
121       ProcessingResource pr = (ProcessingResource) (morpher.init());
122       assertTrue(pr != null);
123     }
124     catch (ResourceInstantiationException rie) {
125       fail("Error occured while compiling rules for morphological analyser" +
126            " using the default.rul file");
127     }
128 
129     // now check if the tokenizer was run properly on the document
130     AnnotationSet inputAs = verbDocumentToTest.getAnnotations("TokeniserAS");
131     List queryTokens = new ArrayList(inputAs.get(ANNIEConstants.
132                                                  TOKEN_ANNOTATION_TYPE));
133     Comparator offsetComparator = new OffsetComparator();
134     Collections.sort(queryTokens, offsetComparator);
135 
136     // same procedure with the answer document
137     AnnotationSet inputAs1 = verbDocumentWithAnswers.getAnnotations(
138         "TokeniserAS");
139     List answerTokens = new ArrayList(inputAs1.get(ANNIEConstants.
140         TOKEN_ANNOTATION_TYPE));
141     Collections.sort(answerTokens, offsetComparator);
142 
143     // create iterator to get access to each and every individual token
144     Iterator queryTokensIter = queryTokens.iterator();
145     Iterator answerTokensIter = answerTokens.iterator();
146 
147     while (queryTokensIter.hasNext() && answerTokensIter.hasNext()) {
148 
149       // get the word to test
150       Annotation currentQueryToken = (Annotation) queryTokensIter.next();
151       String queryTokenValue = (String) (currentQueryToken.getFeatures().
152                                          get(ANNIEConstants.
153                                              TOKEN_STRING_FEATURE_NAME));
154 
155       // get the answer of this word
156       Annotation currentAnswerToken = (Annotation) answerTokensIter.next();
157       String answerTokenValue = (String) (currentAnswerToken.getFeatures().
158                                           get(ANNIEConstants.
159                                               TOKEN_STRING_FEATURE_NAME));
160       // run the morpher
161       String rootWord = morpher.findBaseWord(queryTokenValue, "VB");
162 
163       // compare it with the answerTokenValue
164       assertEquals(rootWord, answerTokenValue);
165     }
166   }
167 
168   /**
169    * Test the morpher on nouns, if their roots are identified correctly or not
170    */
171   public void testNouns() {
172 
173     // run the tokenizer on the nounTestDocument
174     tokeniser.setDocument(nounDocumentToTest);
175     tokeniser.setAnnotationSetName("TokeniserAS");
176     try {
177       tokeniser.execute();
178     }
179     catch (ExecutionException ee) {
180       fail("Error while executing Tokenizer on the test document");
181     }
182 
183     // run the tokenizer on the nounAnswerDocument
184     tokeniser.setDocument(nounDocumentWithAnswers);
185     tokeniser.setAnnotationSetName("TokeniserAS");
186     try {
187       tokeniser.execute();
188     }
189     catch (ExecutionException ee) {
190       fail("Error while executing Tokenizer on the test document");
191     }
192 
193     // check both documents are processed correctly by tokeniser
194     assertTrue(!nounDocumentToTest.getAnnotations("TokeniserAS").isEmpty());
195     assertTrue(!nounDocumentWithAnswers.getAnnotations("TokeniserAS").isEmpty());
196 
197     // so we have finished running the tokenizer
198     // now we need to test the morph program
199 
200     // document to test
201     morpher.setDocument(nounDocumentToTest);
202 
203     // compile the rules
204     // and check that the resource is being created successfully
205     try {
206       ProcessingResource pr = (ProcessingResource) (morpher.init());
207       assertTrue(pr != null);
208     }
209     catch (ResourceInstantiationException rie) {
210       fail("Error occured while compiling rules for morphological analyser" +
211            " using the default.rul file");
212     }
213 
214     // now check if the tokenizer was run properly on the document
215     AnnotationSet inputAs = nounDocumentToTest.getAnnotations("TokeniserAS");
216     List queryTokens = new ArrayList(inputAs.get(ANNIEConstants.
217                                                  TOKEN_ANNOTATION_TYPE));
218     Comparator offsetComparator = new OffsetComparator();
219     Collections.sort(queryTokens, offsetComparator);
220 
221     // same procedure with the answer document
222     AnnotationSet inputAs1 = nounDocumentWithAnswers.getAnnotations(
223         "TokeniserAS");
224     List answerTokens = new ArrayList(inputAs1.get(ANNIEConstants.
225         TOKEN_ANNOTATION_TYPE));
226     Collections.sort(answerTokens, offsetComparator);
227 
228     // create iterator to get access to each and every individual token
229     Iterator queryTokensIter = queryTokens.iterator();
230     Iterator answerTokensIter = answerTokens.iterator();
231 
232     while (queryTokensIter.hasNext() && answerTokensIter.hasNext()) {
233 
234       // get the word to test
235       Annotation currentQueryToken = (Annotation) queryTokensIter.next();
236       String queryTokenValue = (String) (currentQueryToken.getFeatures().
237                                          get(ANNIEConstants.
238                                              TOKEN_STRING_FEATURE_NAME));
239 
240       // get the answer of this word
241       Annotation currentAnswerToken = (Annotation) answerTokensIter.next();
242       String answerTokenValue = (String) (currentAnswerToken.getFeatures().
243                                           get(ANNIEConstants.
244                                               TOKEN_STRING_FEATURE_NAME));
245       //String category = (String) (currentAnswerToken.getFeatures().get(ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME));
246 //      System.out.println(morpher+"  "+queryTokenValue);
247       // run the morpher
248       String rootWord = morpher.findBaseWord(queryTokenValue, "NN");
249 
250       // compare it with the answerTokenValue
251       assertEquals(rootWord, answerTokenValue);
252     }
253 
254   }
255 
256   public static Test suite() {
257     return new TestSuite(TestMorph.class);
258   }
259 }