gate.util
Class CorpusBenchmarkTool

java.lang.Object
  |
  +--gate.util.CorpusBenchmarkTool

public class CorpusBenchmarkTool
extends Object


Field Summary
private  String annotSetName
           
private static List annotTypes
           
private static String CLEAN_DIR_NAME
           
private  Properties configs
           
private  File currDir
           
private static String CVS_DIR_NAME
           
private static boolean DEBUG
           
private  int docNumber
           
private  DefaultGazetteer gazetteer
           
private  boolean isGenerateMode
          If true, the corpus tool will generate the corpus, otherwise it'll run in evaluate mode
private  boolean isMarkedClean
           
private  boolean isMarkedStored
          If true, the corpus tool will evaluate stored against the human-marked documents
private  boolean isVerboseMode
           
private static String MARKED_DIR_NAME
           
private  OrthoMatcher orthomatcher
           
private  HashMap prCountByType
           
private  HashMap precisionByType
           
private  double precisionSum
           
private static String PROCESSED_DIR_NAME
           
private  HashMap recallByType
           
private  double recallSum
           
private  HashMap recCountByType
           
private  AnnotationSetTransfer setTransfer
           
private  SentenceSplitter splitter
           
private  File startDir
          The directory from which we should generate/evaluate the corpus
private  POSTagger tagger
           
private  double threshold
           
private  DefaultTokeniser tokeniser
           
private  ANNIETransducer transducer
           
private static String usage
          String to print when wrong command-line args
 
Constructor Summary
CorpusBenchmarkTool()
           
 
Method Summary
protected  void evaluateAllThree(Document persDoc, Document cleanDoc, Document markedDoc)
           
protected  void evaluateCorpus(File fileDir, File processedDir, File markedDir)
           
protected  void evaluateDocuments(Document persDoc, Document cleanDoc, Document markedDoc)
           
protected  void evaluateMarkedClean(File markedDir, File cleanDir)
           
protected  void evaluateMarkedStored(File markedDir, File storedDir)
           
protected  void evaluateTwoDocs(Document keyDoc, Document respDoc)
           
 void execute()
           
 void execute(File dir)
           
protected  void generateCorpus(File fileDir, File outputDir)
           
 boolean getGenerateMode()
           
 boolean getMarkedClean()
           
 boolean getMarkedStored()
           
 double getPrecisionAverage()
          Returns the average precision over the entire set of processed documents.
 double getRecallAverage()
          Returns the average recall over the entire set of processed documents.
 File getStartDirectory()
           
 double getThreshold()
           
 boolean getVerboseMode()
           
 void init()
           
 void initPRs()
           
 boolean isGenerateMode()
           
static void main(String[] args)
           
protected  AnnotationDiff measureDocs(Document keyDoc, Document respDoc, String annotType)
           
protected  void printAnnotations(AnnotationDiff annotDiff, Document keyDoc, Document respDoc)
           
protected  void printAnnotations(Set set, Document doc)
           
protected  void printStatistics()
           
protected  void printTableHeader()
           
protected  void processDocument(Document doc)
           
 void setGenerateMode(boolean mode)
           
 void setMarkedClean(boolean mode)
           
 void setMarkedStored(boolean mode)
           
 void setStartDirectory(File dir)
           
 void setThreshold(double newValue)
           
 void setVerboseMode(boolean mode)
           
 void unloadPRs()
           
protected  void updateStatistics(AnnotationDiff annotDiff, String annotType)
           
 
Methods inherited from class java.lang.Object
, clone, equals, finalize, getClass, hashCode, notify, notifyAll, registerNatives, toString, wait, wait, wait
 

Field Detail

MARKED_DIR_NAME

private static final String MARKED_DIR_NAME

CLEAN_DIR_NAME

private static final String CLEAN_DIR_NAME

CVS_DIR_NAME

private static final String CVS_DIR_NAME

PROCESSED_DIR_NAME

private static final String PROCESSED_DIR_NAME

DEBUG

private static final boolean DEBUG

startDir

private File startDir
The directory from which we should generate/evaluate the corpus

currDir

private File currDir

annotTypes

private static List annotTypes

tokeniser

private DefaultTokeniser tokeniser

gazetteer

private DefaultGazetteer gazetteer

splitter

private SentenceSplitter splitter

tagger

private POSTagger tagger

transducer

private ANNIETransducer transducer

orthomatcher

private OrthoMatcher orthomatcher

setTransfer

private AnnotationSetTransfer setTransfer

precisionSum

private double precisionSum

recallSum

private double recallSum

precisionByType

private HashMap precisionByType

prCountByType

private HashMap prCountByType

recallByType

private HashMap recallByType

recCountByType

private HashMap recCountByType

docNumber

private int docNumber

isGenerateMode

private boolean isGenerateMode
If true, the corpus tool will generate the corpus, otherwise it'll run in evaluate mode

isVerboseMode

private boolean isVerboseMode

isMarkedStored

private boolean isMarkedStored
If true, the corpus tool will evaluate stored against the human-marked documents

isMarkedClean

private boolean isMarkedClean

annotSetName

private String annotSetName

threshold

private double threshold

configs

private Properties configs

usage

private static String usage
String to print when wrong command-line args
Constructor Detail

CorpusBenchmarkTool

public CorpusBenchmarkTool()
Method Detail

initPRs

public void initPRs()

unloadPRs

public void unloadPRs()

execute

public void execute()

init

public void init()

execute

public void execute(File dir)

main

public static void main(String[] args)
                 throws GateException

setGenerateMode

public void setGenerateMode(boolean mode)

getGenerateMode

public boolean getGenerateMode()

getVerboseMode

public boolean getVerboseMode()

setVerboseMode

public void setVerboseMode(boolean mode)

setMarkedStored

public void setMarkedStored(boolean mode)

getMarkedStored

public boolean getMarkedStored()

setMarkedClean

public void setMarkedClean(boolean mode)

getMarkedClean

public boolean getMarkedClean()

getPrecisionAverage

public double getPrecisionAverage()
Returns the average precision over the entire set of processed documents.

If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the precision will be the average precision on those two sets.

If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the precision will be the average precision on those two sets of documents.


getRecallAverage

public double getRecallAverage()
Returns the average recall over the entire set of processed documents.

If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the recall will be the average recall on those two sets.

If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the recall will be the average recall on those two sets of documents.


isGenerateMode

public boolean isGenerateMode()

getThreshold

public double getThreshold()

setThreshold

public void setThreshold(double newValue)

getStartDirectory

public File getStartDirectory()

setStartDirectory

public void setStartDirectory(File dir)

generateCorpus

protected void generateCorpus(File fileDir,
                              File outputDir)

evaluateCorpus

protected void evaluateCorpus(File fileDir,
                              File processedDir,
                              File markedDir)

evaluateMarkedStored

protected void evaluateMarkedStored(File markedDir,
                                    File storedDir)

evaluateMarkedClean

protected void evaluateMarkedClean(File markedDir,
                                   File cleanDir)

processDocument

protected void processDocument(Document doc)

evaluateDocuments

protected void evaluateDocuments(Document persDoc,
                                 Document cleanDoc,
                                 Document markedDoc)
                          throws ResourceInstantiationException

evaluateAllThree

protected void evaluateAllThree(Document persDoc,
                                Document cleanDoc,
                                Document markedDoc)
                         throws ResourceInstantiationException

evaluateTwoDocs

protected void evaluateTwoDocs(Document keyDoc,
                               Document respDoc)
                        throws ResourceInstantiationException

printTableHeader

protected void printTableHeader()

updateStatistics

protected void updateStatistics(AnnotationDiff annotDiff,
                                String annotType)

printStatistics

protected void printStatistics()

measureDocs

protected AnnotationDiff measureDocs(Document keyDoc,
                                     Document respDoc,
                                     String annotType)
                              throws ResourceInstantiationException

printAnnotations

protected void printAnnotations(AnnotationDiff annotDiff,
                                Document keyDoc,
                                Document respDoc)

printAnnotations

protected void printAnnotations(Set set,
                                Document doc)