|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--gate.util.CorpusBenchmarkTool
Field Summary | |
private String |
annotSetName
|
private static List |
annotTypes
|
private static String |
CLEAN_DIR_NAME
|
private Properties |
configs
|
private File |
currDir
|
private static String |
CVS_DIR_NAME
|
private static boolean |
DEBUG
|
private int |
docNumber
|
private DefaultGazetteer |
gazetteer
|
private boolean |
isGenerateMode
If true, the corpus tool will generate the corpus, otherwise it'll run in evaluate mode |
private boolean |
isMarkedClean
|
private boolean |
isMarkedStored
If true, the corpus tool will evaluate stored against the human-marked documents |
private boolean |
isVerboseMode
|
private static String |
MARKED_DIR_NAME
|
private OrthoMatcher |
orthomatcher
|
private HashMap |
prCountByType
|
private HashMap |
precisionByType
|
private double |
precisionSum
|
private static String |
PROCESSED_DIR_NAME
|
private HashMap |
recallByType
|
private double |
recallSum
|
private HashMap |
recCountByType
|
private AnnotationSetTransfer |
setTransfer
|
private SentenceSplitter |
splitter
|
private File |
startDir
The directory from which we should generate/evaluate the corpus |
private POSTagger |
tagger
|
private double |
threshold
|
private DefaultTokeniser |
tokeniser
|
private ANNIETransducer |
transducer
|
private static String |
usage
String to print when wrong command-line args |
Constructor Summary | |
CorpusBenchmarkTool()
|
Method Summary | |
protected void |
evaluateAllThree(Document persDoc,
Document cleanDoc,
Document markedDoc)
|
protected void |
evaluateCorpus(File fileDir,
File processedDir,
File markedDir)
|
protected void |
evaluateDocuments(Document persDoc,
Document cleanDoc,
Document markedDoc)
|
protected void |
evaluateMarkedClean(File markedDir,
File cleanDir)
|
protected void |
evaluateMarkedStored(File markedDir,
File storedDir)
|
protected void |
evaluateTwoDocs(Document keyDoc,
Document respDoc)
|
void |
execute()
|
void |
execute(File dir)
|
protected void |
generateCorpus(File fileDir,
File outputDir)
|
boolean |
getGenerateMode()
|
boolean |
getMarkedClean()
|
boolean |
getMarkedStored()
|
double |
getPrecisionAverage()
Returns the average precision over the entire set of processed documents. |
double |
getRecallAverage()
Returns the average recall over the entire set of processed documents. |
File |
getStartDirectory()
|
double |
getThreshold()
|
boolean |
getVerboseMode()
|
void |
init()
|
void |
initPRs()
|
boolean |
isGenerateMode()
|
static void |
main(String[] args)
|
protected AnnotationDiff |
measureDocs(Document keyDoc,
Document respDoc,
String annotType)
|
protected void |
printAnnotations(AnnotationDiff annotDiff,
Document keyDoc,
Document respDoc)
|
protected void |
printAnnotations(Set set,
Document doc)
|
protected void |
printStatistics()
|
protected void |
printTableHeader()
|
protected void |
processDocument(Document doc)
|
void |
setGenerateMode(boolean mode)
|
void |
setMarkedClean(boolean mode)
|
void |
setMarkedStored(boolean mode)
|
void |
setStartDirectory(File dir)
|
void |
setThreshold(double newValue)
|
void |
setVerboseMode(boolean mode)
|
protected void |
updateStatistics(AnnotationDiff annotDiff,
String annotType)
|
Methods inherited from class java.lang.Object |
|
Field Detail |
private static final String MARKED_DIR_NAME
private static final String CLEAN_DIR_NAME
private static final String CVS_DIR_NAME
private static final String PROCESSED_DIR_NAME
private static final boolean DEBUG
private File startDir
private File currDir
private static List annotTypes
private DefaultTokeniser tokeniser
private DefaultGazetteer gazetteer
private SentenceSplitter splitter
private POSTagger tagger
private ANNIETransducer transducer
private OrthoMatcher orthomatcher
private AnnotationSetTransfer setTransfer
private double precisionSum
private double recallSum
private HashMap precisionByType
private HashMap prCountByType
private HashMap recallByType
private HashMap recCountByType
private int docNumber
private boolean isGenerateMode
private boolean isVerboseMode
private boolean isMarkedStored
private boolean isMarkedClean
private String annotSetName
private double threshold
private Properties configs
private static String usage
Constructor Detail |
public CorpusBenchmarkTool()
Method Detail |
public void initPRs()
public void execute()
public void init()
public void execute(File dir)
public static void main(String[] args) throws GateException
public void setGenerateMode(boolean mode)
public boolean getGenerateMode()
public boolean getVerboseMode()
public void setVerboseMode(boolean mode)
public void setMarkedStored(boolean mode)
public boolean getMarkedStored()
public void setMarkedClean(boolean mode)
public boolean getMarkedClean()
public double getPrecisionAverage()
If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the precision will be the average precision on those two sets.
If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the precision will be the average precision on those two sets of documents.
public double getRecallAverage()
If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the recall will be the average recall on those two sets.
If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the recall will be the average recall on those two sets of documents.
public boolean isGenerateMode()
public double getThreshold()
public void setThreshold(double newValue)
public File getStartDirectory()
public void setStartDirectory(File dir)
protected void generateCorpus(File fileDir, File outputDir)
protected void evaluateCorpus(File fileDir, File processedDir, File markedDir)
protected void evaluateMarkedStored(File markedDir, File storedDir)
protected void evaluateMarkedClean(File markedDir, File cleanDir)
protected void processDocument(Document doc)
protected void evaluateDocuments(Document persDoc, Document cleanDoc, Document markedDoc) throws ResourceInstantiationException
protected void evaluateAllThree(Document persDoc, Document cleanDoc, Document markedDoc) throws ResourceInstantiationException
protected void evaluateTwoDocs(Document keyDoc, Document respDoc) throws ResourceInstantiationException
protected void printTableHeader()
protected void updateStatistics(AnnotationDiff annotDiff, String annotType)
protected void printStatistics()
protected AnnotationDiff measureDocs(Document keyDoc, Document respDoc, String annotType) throws ResourceInstantiationException
protected void printAnnotations(AnnotationDiff annotDiff, Document keyDoc, Document respDoc)
protected void printAnnotations(Set set, Document doc)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |