1   /*
2    *  Coreferencer.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Marin Dimitrov, 18/Dec/2001
12   *
13   *  $Id: Coreferencer.java,v 1.15 2002/08/29 16:36:51 kalina Exp $
14   */
15  
16  package gate.creole.coref;
17  
18  import java.util.*;
19  
20  import junit.framework.*;
21  
22  import gate.*;
23  import gate.creole.*;
24  import gate.util.*;
25  
26  public class Coreferencer extends AbstractLanguageAnalyser
27                            implements ProcessingResource{
28  
29    public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
30  
31    public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
32  
33    public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";
34    public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";
35  
36    /** --- */
37    private static final boolean DEBUG = false;
38    /** --- */
39    private PronominalCoref pronominalModule;
40  
41    /** --- */
42    public Coreferencer() {
43      this.pronominalModule = new PronominalCoref();
44    }
45  
46  
47    /** Initialise this resource, and return it. */
48    public Resource init() throws ResourceInstantiationException {
49  
50      Resource result = super.init();
51  
52      //load all submodules
53      this.pronominalModule.init();
54  
55      return result;
56    } // init()
57  
58  
59    /**
60     * Reinitialises the processing resource. After calling this method the
61     * resource should be in the state it is after calling init.
62     * If the resource depends on external resources (such as rules files) then
63     * the resource will re-read those resources. If the data used to create
64     * the resource has changed since the resource has been created then the
65     * resource will change too after calling reInit().
66    */
67    public void reInit() throws ResourceInstantiationException {
68      init();
69    } // reInit()
70  
71  
72    /** Set the document to run on. */
73    public void setDocument(Document newDocument) {
74  
75  //    Assert.assertNotNull(newDocument);
76  
77      this.pronominalModule.setDocument(newDocument);
78      super.setDocument(newDocument);
79    }
80  
81  
82    /** --- */
83    public void setAnnotationSetName(String annotationSetName) {
84      this.pronominalModule.setAnnotationSetName(annotationSetName);
85    }
86  
87    /** --- */
88    public String getAnnotationSetName() {
89      return this.pronominalModule.getAnnotationSetName();
90    }
91  
92    /** --- */
93    public void setResolveIt(Boolean newValue) {
94      this.pronominalModule.setResolveIt(newValue);
95    }
96  
97    /** --- */
98    public Boolean getResolveIt() {
99      return this.pronominalModule.getResolveIt();
100   }
101 
102   /**
103    * This method runs the coreferencer. It assumes that all the needed parameters
104    * are set. If they are not, an exception will be fired.
105    */
106   public void execute() throws ExecutionException {
107 
108     this.pronominalModule.execute();
109     generateCorefChains();
110   }
111 
112   /** --- */
113   private void generateCorefChains() throws GateRuntimeException{
114 
115     //1. get the resolved corefs
116     HashMap ana2ant = this.pronominalModule.getResolvedAnaphora();
117 
118     //2. get the outout annotation set
119     String asName = getAnnotationSetName();
120     AnnotationSet outputSet = null;
121 
122     if (null == asName || asName.equals("")) {
123       outputSet = getDocument().getAnnotations();
124     }
125     else {
126       outputSet = getDocument().getAnnotations(asName);
127     }
128 
129     //3. generate new annotations
130     Iterator it = ana2ant.entrySet().iterator();
131     while (it.hasNext()) {
132       Map.Entry currLink = (Map.Entry)it.next();
133       Annotation anaphor = (Annotation)currLink.getKey();
134       Annotation antecedent = (Annotation)currLink.getValue();
135 
136       if (DEBUG) {
137         AnnotationSet corefSet = getDocument().getAnnotations("COREF");
138         Long antOffset = new Long(0);
139 
140         if (null != antecedent) {
141           antOffset = antecedent.getStartNode().getOffset();
142         }
143 
144         FeatureMap features = new SimpleFeatureMapImpl();
145         features.put("antecedent",antOffset);
146         corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features);
147       }
148 
149       //do we have antecedent?
150       if (null == antecedent) {
151         continue;
152       }
153 
154       //get the ortho-matches of the antecedent
155       List matches = (List)antecedent.getFeatures().
156         get(ANNOTATION_COREF_FEATURE_NAME);
157       if (null == matches) {
158         matches = new ArrayList();
159         matches.add(antecedent.getId());
160         antecedent.getFeatures().
161           put(ANNOTATION_COREF_FEATURE_NAME,matches);
162         //check if the document has a list of matches
163         //if yes, simply add the new list to it
164         //if not, create it and add the list of matches to it
165         if (document.getFeatures().containsKey(
166             DOCUMENT_COREF_FEATURE_NAME)) {
167           Map matchesMap = (Map) document.getFeatures().get(
168                                 DOCUMENT_COREF_FEATURE_NAME);
169           List matchesList = (List) matchesMap.get(getAnnotationSetName());
170           if (matchesList == null) {
171             matchesList = new ArrayList();
172             matchesMap.put(getAnnotationSetName(), matchesList);
173           }
174           matchesList.add(matches);
175         } else {
176           Map matchesMap = new HashMap();
177             List matchesList = new ArrayList();
178             matchesMap.put(getAnnotationSetName(), matchesList);
179             matchesList.add(matches);
180         }//if else
181       }//if matches == null
182 
183       FeatureMap features = new SimpleFeatureMapImpl();
184       features.put(COREF_TYPE_FEATURE_NAME,"PRONOUN");
185       features.put(ANNOTATION_COREF_FEATURE_NAME,matches);
186       features.put(COREF_ANTECEDENT_FEATURE_NAME,
187                    antecedent.getStartNode().getOffset());
188 
189       Integer annID = outputSet.add(anaphor.getStartNode(),
190                                     anaphor.getEndNode(),
191                                     antecedent.getType(),
192                                     features);
193       matches.add(annID);
194     }
195   }
196 
197 }