1   /*
2    *  Coreferencer.java
3    *
4    *  Copyright (c) 1998-2004, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Marin Dimitrov, 18/Dec/2001
12   *
13   *  $Id: Coreferencer.java,v 1.17 2004/07/21 17:10:04 akshay Exp $
14   */
15  
16  package gate.creole.coref;
17  
18  import java.util.*;
19  
20  import gate.*;
21  import gate.creole.*;
22  import gate.util.GateRuntimeException;
23  import gate.util.SimpleFeatureMapImpl;
24  
25  public class Coreferencer extends AbstractLanguageAnalyser
26                            implements ProcessingResource{
27  
28    public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
29  
30    public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
31  
32    public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";
33    public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";
34  
35    /** --- */
36    private static final boolean DEBUG = false;
37    /** --- */
38    private PronominalCoref pronominalModule;
39  
40    /** --- */
41    public Coreferencer() {
42      this.pronominalModule = new PronominalCoref();
43    }
44  
45  
46    /** Initialise this resource, and return it. */
47    public Resource init() throws ResourceInstantiationException {
48  
49      Resource result = super.init();
50  
51      //load all submodules
52      this.pronominalModule.init();
53  
54      return result;
55    } // init()
56  
57  
58    /**
59     * Reinitialises the processing resource. After calling this method the
60     * resource should be in the state it is after calling init.
61     * If the resource depends on external resources (such as rules files) then
62     * the resource will re-read those resources. If the data used to create
63     * the resource has changed since the resource has been created then the
64     * resource will change too after calling reInit().
65    */
66    public void reInit() throws ResourceInstantiationException {
67      init();
68    } // reInit()
69  
70  
71    /** Set the document to run on. */
72    public void setDocument(Document newDocument) {
73  
74  //    Assert.assertNotNull(newDocument);
75  
76      this.pronominalModule.setDocument(newDocument);
77      super.setDocument(newDocument);
78    }
79  
80  
81    /** --- */
82    public void setAnnotationSetName(String annotationSetName) {
83      this.pronominalModule.setAnnotationSetName(annotationSetName);
84    }
85  
86    /** --- */
87    public String getAnnotationSetName() {
88      return this.pronominalModule.getAnnotationSetName();
89    }
90  
91    /** --- */
92    public void setResolveIt(Boolean newValue) {
93      this.pronominalModule.setResolveIt(newValue);
94    }
95  
96    /** --- */
97    public Boolean getResolveIt() {
98      return this.pronominalModule.getResolveIt();
99    }
100 
101   /**
102    * This method runs the coreferencer. It assumes that all the needed parameters
103    * are set. If they are not, an exception will be fired.
104    */
105   public void execute() throws ExecutionException {
106 
107     this.pronominalModule.execute();
108     generateCorefChains();
109   }
110 
111   /** --- */
112   private void generateCorefChains() throws GateRuntimeException{
113 
114     //1. get the resolved corefs
115     HashMap ana2ant = this.pronominalModule.getResolvedAnaphora();
116 
117     //2. get the outout annotation set
118     String asName = getAnnotationSetName();
119     AnnotationSet outputSet = null;
120 
121     if (null == asName || asName.equals("")) {
122       outputSet = getDocument().getAnnotations();
123     }
124     else {
125       outputSet = getDocument().getAnnotations(asName);
126     }
127 
128     //3. generate new annotations
129     Iterator it = ana2ant.entrySet().iterator();
130     while (it.hasNext()) {
131       Map.Entry currLink = (Map.Entry)it.next();
132       Annotation anaphor = (Annotation)currLink.getKey();
133       Annotation antecedent = (Annotation)currLink.getValue();
134 
135       if (DEBUG) {
136         AnnotationSet corefSet = getDocument().getAnnotations("COREF");
137         Long antOffset = new Long(0);
138 
139         if (null != antecedent) {
140           antOffset = antecedent.getStartNode().getOffset();
141         }
142 
143         FeatureMap features = new SimpleFeatureMapImpl();
144         features.put("antecedent",antOffset);
145         corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features);
146       }
147 
148       //do we have antecedent?
149       if (null == antecedent) {
150         continue;
151       }
152 
153       //get the ortho-matches of the antecedent
154       List matches = (List)antecedent.getFeatures().
155         get(ANNOTATION_COREF_FEATURE_NAME);
156       if (null == matches) {
157         matches = new ArrayList();
158         matches.add(antecedent.getId());
159         antecedent.getFeatures().
160           put(ANNOTATION_COREF_FEATURE_NAME,matches);
161         //check if the document has a list of matches
162         //if yes, simply add the new list to it
163         //if not, create it and add the list of matches to it
164         if (document.getFeatures().containsKey(
165             DOCUMENT_COREF_FEATURE_NAME)) {
166           Map matchesMap = (Map) document.getFeatures().get(
167                                 DOCUMENT_COREF_FEATURE_NAME);
168           List matchesList = (List) matchesMap.get(getAnnotationSetName());
169           if (matchesList == null) {
170             matchesList = new ArrayList();
171             matchesMap.put(getAnnotationSetName(), matchesList);
172           }
173           matchesList.add(matches);
174         } else {
175           Map matchesMap = new HashMap();
176             List matchesList = new ArrayList();
177             matchesMap.put(getAnnotationSetName(), matchesList);
178             matchesList.add(matches);
179         }//if else
180       }//if matches == null
181 
182       FeatureMap features = new SimpleFeatureMapImpl();
183       features.put(COREF_TYPE_FEATURE_NAME,"PRONOUN");
184       features.put(ANNOTATION_COREF_FEATURE_NAME,matches);
185       features.put(COREF_ANTECEDENT_FEATURE_NAME,
186                    antecedent.getStartNode().getOffset());
187 
188       Integer annID = outputSet.add(anaphor.getStartNode(),
189                                     anaphor.getEndNode(),
190                                     antecedent.getType(),
191                                     features);
192       matches.add(annID);
193     }
194   }
195 
196 }