|
Coreferencer |
|
1 /* 2 * Coreferencer.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Marin Dimitrov, 18/Dec/2001 12 * 13 * $Id: Coreferencer.java,v 1.15 2002/08/29 16:36:51 kalina Exp $ 14 */ 15 16 package gate.creole.coref; 17 18 import java.util.*; 19 20 import junit.framework.*; 21 22 import gate.*; 23 import gate.creole.*; 24 import gate.util.*; 25 26 public class Coreferencer extends AbstractLanguageAnalyser 27 implements ProcessingResource{ 28 29 public static final String COREF_DOCUMENT_PARAMETER_NAME = "document"; 30 31 public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName"; 32 33 public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE"; 34 public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset"; 35 36 /** --- */ 37 private static final boolean DEBUG = false; 38 /** --- */ 39 private PronominalCoref pronominalModule; 40 41 /** --- */ 42 public Coreferencer() { 43 this.pronominalModule = new PronominalCoref(); 44 } 45 46 47 /** Initialise this resource, and return it. */ 48 public Resource init() throws ResourceInstantiationException { 49 50 Resource result = super.init(); 51 52 //load all submodules 53 this.pronominalModule.init(); 54 55 return result; 56 } // init() 57 58 59 /** 60 * Reinitialises the processing resource. After calling this method the 61 * resource should be in the state it is after calling init. 62 * If the resource depends on external resources (such as rules files) then 63 * the resource will re-read those resources. If the data used to create 64 * the resource has changed since the resource has been created then the 65 * resource will change too after calling reInit(). 66 */ 67 public void reInit() throws ResourceInstantiationException { 68 init(); 69 } // reInit() 70 71 72 /** Set the document to run on. */ 73 public void setDocument(Document newDocument) { 74 75 // Assert.assertNotNull(newDocument); 76 77 this.pronominalModule.setDocument(newDocument); 78 super.setDocument(newDocument); 79 } 80 81 82 /** --- */ 83 public void setAnnotationSetName(String annotationSetName) { 84 this.pronominalModule.setAnnotationSetName(annotationSetName); 85 } 86 87 /** --- */ 88 public String getAnnotationSetName() { 89 return this.pronominalModule.getAnnotationSetName(); 90 } 91 92 /** --- */ 93 public void setResolveIt(Boolean newValue) { 94 this.pronominalModule.setResolveIt(newValue); 95 } 96 97 /** --- */ 98 public Boolean getResolveIt() { 99 return this.pronominalModule.getResolveIt(); 100 } 101 102 /** 103 * This method runs the coreferencer. It assumes that all the needed parameters 104 * are set. If they are not, an exception will be fired. 105 */ 106 public void execute() throws ExecutionException { 107 108 this.pronominalModule.execute(); 109 generateCorefChains(); 110 } 111 112 /** --- */ 113 private void generateCorefChains() throws GateRuntimeException{ 114 115 //1. get the resolved corefs 116 HashMap ana2ant = this.pronominalModule.getResolvedAnaphora(); 117 118 //2. get the outout annotation set 119 String asName = getAnnotationSetName(); 120 AnnotationSet outputSet = null; 121 122 if (null == asName || asName.equals("")) { 123 outputSet = getDocument().getAnnotations(); 124 } 125 else { 126 outputSet = getDocument().getAnnotations(asName); 127 } 128 129 //3. generate new annotations 130 Iterator it = ana2ant.entrySet().iterator(); 131 while (it.hasNext()) { 132 Map.Entry currLink = (Map.Entry)it.next(); 133 Annotation anaphor = (Annotation)currLink.getKey(); 134 Annotation antecedent = (Annotation)currLink.getValue(); 135 136 if (DEBUG) { 137 AnnotationSet corefSet = getDocument().getAnnotations("COREF"); 138 Long antOffset = new Long(0); 139 140 if (null != antecedent) { 141 antOffset = antecedent.getStartNode().getOffset(); 142 } 143 144 FeatureMap features = new SimpleFeatureMapImpl(); 145 features.put("antecedent",antOffset); 146 corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features); 147 } 148 149 //do we have antecedent? 150 if (null == antecedent) { 151 continue; 152 } 153 154 //get the ortho-matches of the antecedent 155 List matches = (List)antecedent.getFeatures(). 156 get(ANNOTATION_COREF_FEATURE_NAME); 157 if (null == matches) { 158 matches = new ArrayList(); 159 matches.add(antecedent.getId()); 160 antecedent.getFeatures(). 161 put(ANNOTATION_COREF_FEATURE_NAME,matches); 162 //check if the document has a list of matches 163 //if yes, simply add the new list to it 164 //if not, create it and add the list of matches to it 165 if (document.getFeatures().containsKey( 166 DOCUMENT_COREF_FEATURE_NAME)) { 167 Map matchesMap = (Map) document.getFeatures().get( 168 DOCUMENT_COREF_FEATURE_NAME); 169 List matchesList = (List) matchesMap.get(getAnnotationSetName()); 170 if (matchesList == null) { 171 matchesList = new ArrayList(); 172 matchesMap.put(getAnnotationSetName(), matchesList); 173 } 174 matchesList.add(matches); 175 } else { 176 Map matchesMap = new HashMap(); 177 List matchesList = new ArrayList(); 178 matchesMap.put(getAnnotationSetName(), matchesList); 179 matchesList.add(matches); 180 }//if else 181 }//if matches == null 182 183 FeatureMap features = new SimpleFeatureMapImpl(); 184 features.put(COREF_TYPE_FEATURE_NAME,"PRONOUN"); 185 features.put(ANNOTATION_COREF_FEATURE_NAME,matches); 186 features.put(COREF_ANTECEDENT_FEATURE_NAME, 187 antecedent.getStartNode().getOffset()); 188 189 Integer annID = outputSet.add(anaphor.getStartNode(), 190 anaphor.getEndNode(), 191 antecedent.getType(), 192 features); 193 matches.add(annID); 194 } 195 } 196 197 }
|
Coreferencer |
|