|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--gate.util.AbstractFeatureBearer | +--gate.creole.AbstractResource | +--gate.creole.AbstractProcessingResource | +--gate.creole.namematch.Namematch
Field Summary | |
protected HashMap |
alias
|
protected String |
annotationSetName
the name of the annotation set |
protected String |
annotationType
the type of annotation |
protected Set |
annotationTypes
the types of the annotation |
private static int |
BUFF_SIZE
the size of the buffer |
private char[] |
cbuffer
a buffer in order to read an array of char |
protected HashMap |
cdg
|
protected HashMap |
connector
|
protected HashMap |
def_art
|
protected Document |
document
the document for namematch |
protected ExecutionException |
executionException
|
protected Boolean |
extLists
internal or external list |
protected List |
matchesDocument
the set with all the matches from document |
protected AnnotationSet |
nameAnnots
the annotation set for the document |
protected String |
organizationType
the organization type |
protected String |
personType
the person type |
protected HashMap |
prepos
|
protected HashMap |
spur_match
|
Fields inherited from class gate.creole.AbstractResource |
serialVersionUID |
Fields inherited from class gate.util.AbstractFeatureBearer |
features |
Constructor Summary | |
Namematch()
|
Method Summary | |
private boolean |
apply_rules_namematch(String shortName,
String longName)
apply_rules_namematch: apply rules similarly to lasie1.5's namematch |
private void |
buildTables(Document doc)
Tables for namematch info (used by the namematch rules) |
void |
check()
Trigger any exception that was caught when run() was invoked. |
void |
createAnnotList(String nameFile,
String nameList)
creates the lookup tables |
void |
createLists()
if ( == false) then reads the names of files in order to create the lookup tables |
void |
determineMatchesDocument()
all the matches from the current document are placed in a list |
String |
getAnnotationSetName()
get the name of the annotation set |
Set |
getAnnotationTypes()
get the types of the annotation |
Document |
getDocument()
Gets the document currently set as target for this namematch. |
Boolean |
getExtList()
|
List |
getMatchesDocument()
|
String |
getOrganizationType()
|
String |
getPersonType()
|
Resource |
init()
Initialise this resource, and return it. |
boolean |
matchRule0(String s1,
String s2)
RULE #0: If the two names are listed in table of spurius matches then they do NOT match Condition(s): - Applied to: all name annotations |
boolean |
matchRule1(String s1,
String s2,
boolean MatchCase)
RULE #1: If the two names are identical then they are the same Condition(s): depend on case Applied to: all name annotations |
boolean |
matchRule10(String s1,
String s2)
RULE #10: is one name the reverse of the other reversing around prepositions only? e.g. |
boolean |
matchRule11(String s1,
String s2)
RULE #11: does one name consist of contractions of the first two tokens of the other name? e.g. |
boolean |
matchRule12(String s1,
String s2)
RULE #12: do the first and last tokens of one name match the first and last tokens of the other? Condition(s): case-sensitive match Applied to: organisation annotations only |
boolean |
matchRule13(String s1,
String s2)
RULE #13: do multi-word names match except for one token e.g. |
boolean |
matchRule2(String s1,
String s2)
RULE #2: if the two names are listed as equivalent in the lookup table (alias) then they match Condition(s): - Applied to: all name annotations |
boolean |
matchRule3(String s1,
String s2)
RULE #3: adding a possessive at the end of one name causes a match e.g. |
boolean |
matchRule4(String s1,
String s2)
RULE #4: Do all tokens other than the punctuation marks , and . |
boolean |
matchRule5(String s1,
String s2)
RULE #5: if the 1st token of one name matches the second name e.g. |
boolean |
matchRule6(String s1,
String s2)
RULE #6: if one name is the acronym of the other e.g. |
boolean |
matchRule7(String s1,
String s2)
RULE #7: if one of the tokens in one of the names is in the list of separators eg. |
boolean |
matchRule8(String s1,
String s2)
RULE #8: if the names match after stripping off "The" and trailing company designator e.g. |
boolean |
matchRule9(String s1,
String s2)
RULE #9: does one of the names match the token just before a trailing company designator in the other name? e.g. |
String |
regularExpressions(String text,
String replacement,
String regEx)
substitute all multiple spaces, tabes and newlines with a single space |
void |
run()
Run the resource. |
void |
setAnnotationSetName(String newAnnotationSetName)
set the annotation set name |
void |
setAnnotationTypes(Set newType)
set the types of the annotations |
void |
setDocument(Document newDocument)
set the document |
void |
setExtLists(Boolean newExtLists)
set the annotations |
void |
setOrganizationType(String newOrganizationType)
|
void |
setPersonType(String newPersonType)
|
Methods inherited from class gate.creole.AbstractProcessingResource |
reInit |
Methods inherited from class gate.creole.AbstractResource |
getName, setName |
Methods inherited from class gate.util.AbstractFeatureBearer |
getFeatures, setFeatures |
Methods inherited from class java.lang.Object |
|
Methods inherited from interface gate.ProcessingResource |
reInit |
Methods inherited from interface gate.util.FeatureBearer |
getFeatures, getName, setFeatures, setName |
Field Detail |
protected Document document
protected String annotationSetName
protected Set annotationTypes
protected String organizationType
protected String personType
protected String annotationType
protected Boolean extLists
protected AnnotationSet nameAnnots
protected List matchesDocument
protected ExecutionException executionException
protected HashMap alias
protected HashMap cdg
protected HashMap spur_match
protected HashMap def_art
protected HashMap connector
protected HashMap prepos
private char[] cbuffer
private static final int BUFF_SIZE
Constructor Detail |
public Namematch()
Method Detail |
public Resource init() throws ResourceInstantiationException
init
in interface Resource
init
in class AbstractProcessingResource
public void run()
run
in interface Runnable
run
in class AbstractProcessingResource
public void determineMatchesDocument()
public void check() throws ExecutionException
ProcessingResource
check
in interface ProcessingResource
check
in class AbstractProcessingResource
public void createLists() throws IOException
public void createAnnotList(String nameFile, String nameList) throws IOException
private boolean apply_rules_namematch(String shortName, String longName)
public void setDocument(Document newDocument)
public void setExtLists(Boolean newExtLists)
public void setAnnotationSetName(String newAnnotationSetName)
public void setAnnotationTypes(Set newType)
public void setOrganizationType(String newOrganizationType)
public void setPersonType(String newPersonType)
public Document getDocument()
Document
public String getAnnotationSetName()
public Set getAnnotationTypes()
public String getOrganizationType()
public String getPersonType()
public Boolean getExtList()
public List getMatchesDocument()
public boolean matchRule0(String s1, String s2)
public boolean matchRule1(String s1, String s2, boolean MatchCase)
public boolean matchRule2(String s1, String s2)
public boolean matchRule3(String s1, String s2)
public boolean matchRule4(String s1, String s2)
public boolean matchRule5(String s1, String s2)
public boolean matchRule6(String s1, String s2)
public boolean matchRule7(String s1, String s2)
public boolean matchRule8(String s1, String s2)
public boolean matchRule9(String s1, String s2)
public boolean matchRule10(String s1, String s2)
public boolean matchRule11(String s1, String s2)
public boolean matchRule12(String s1, String s2)
public boolean matchRule13(String s1, String s2)
private void buildTables(Document doc)
public String regularExpressions(String text, String replacement, String regEx)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |