gate.util
Class CorpusBenchmarkTool

java.lang.Object
  |
  +--gate.util.CorpusBenchmarkTool

public class CorpusBenchmarkTool
extends Object


Field Summary
private  String annotSetName
           
private static List annotTypes
           
private  Controller application
           
private  File applicationFile
           
(package private)  double beta
           
private static String CLEAN_DIR_NAME
           
private  Properties configs
           
private  HashMap correctByType
           
private  File currDir
           
private static String CVS_DIR_NAME
           
private static boolean DEBUG
           
private  int docNumber
           
private  HashMap fMeasureByType
           
private  HashMap fMeasureCountByType
           
private  double fMeasureSum
           
private  boolean isGenerateMode
          If true, the corpus tool will generate the corpus, otherwise it'll run in evaluate mode
private  boolean isMarkedClean
           
private  boolean isMarkedDS
           
private  boolean isMarkedStored
          If true, the corpus tool will evaluate stored against the human-marked documents
private  boolean isVerboseMode
           
private static String MARKED_DIR_NAME
           
private  HashMap missingByType
           
private  String outputSetName
           
private  HashMap partialByType
           
private  HashMap prCountByType
           
private  HashMap precisionByType
           
private  double precisionSum
           
private static String PROCESSED_DIR_NAME
           
private  HashMap recallByType
           
private  double recallSum
           
private  HashMap recCountByType
           
private  HashMap spurByType
           
private  File startDir
          The directory from which we should generate/evaluate the corpus
private  double threshold
           
private static String usage
          String to print when wrong command-line args
 
Constructor Summary
CorpusBenchmarkTool()
           
 
Method Summary
protected  void evaluateAllThree(Document persDoc, Document cleanDoc, Document markedDoc)
           
protected  void evaluateCorpus(File fileDir, File processedDir, File markedDir)
           
protected  void evaluateDocuments(Document persDoc, Document cleanDoc, Document markedDoc)
           
protected  void evaluateMarkedClean(File markedDir, File cleanDir)
           
protected  void evaluateMarkedStored(File markedDir, File storedDir)
           
protected  void evaluateTwoDocs(Document keyDoc, Document respDoc)
           
 void execute()
           
 void execute(File dir)
           
protected  void generateCorpus(File fileDir, File outputDir)
           
 boolean getGenerateMode()
           
 boolean getMarkedClean()
           
 boolean getMarkedDS()
           
 boolean getMarkedStored()
           
 double getPrecisionAverage()
          Returns the average precision over the entire set of processed documents.
 double getRecallAverage()
          Returns the average recall over the entire set of processed documents.
 File getStartDirectory()
           
 double getThreshold()
           
 boolean getVerboseMode()
           
 void init()
           
 void initPRs()
           
 boolean isGenerateMode()
           
static void main(String[] args)
           
protected  AnnotationDiff measureDocs(Document keyDoc, Document respDoc, String annotType)
           
protected  void printAnnotations(AnnotationDiff annotDiff, Document keyDoc, Document respDoc)
           
protected  void printAnnotations(Set set, Document doc)
           
 void printStatistics()
           
protected  void printStatsForType(String annotType)
           
protected  void printTableHeader()
           
protected  void processDocument(Document doc)
           
 void setApplicationFile(File newAppFile)
           
 void setGenerateMode(boolean mode)
           
 void setMarkedClean(boolean mode)
           
 void setMarkedDS(boolean mode)
           
 void setMarkedStored(boolean mode)
           
 void setStartDirectory(File dir)
           
 void setThreshold(double newValue)
           
 void setVerboseMode(boolean mode)
           
 void unloadPRs()
           
protected  void updateStatistics(AnnotationDiff annotDiff, String annotType)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

MARKED_DIR_NAME

private static final String MARKED_DIR_NAME
See Also:
Constant Field Values

CLEAN_DIR_NAME

private static final String CLEAN_DIR_NAME
See Also:
Constant Field Values

CVS_DIR_NAME

private static final String CVS_DIR_NAME
See Also:
Constant Field Values

PROCESSED_DIR_NAME

private static final String PROCESSED_DIR_NAME
See Also:
Constant Field Values

DEBUG

private static final boolean DEBUG
See Also:
Constant Field Values

startDir

private File startDir
The directory from which we should generate/evaluate the corpus


currDir

private File currDir

annotTypes

private static List annotTypes

application

private Controller application

applicationFile

private File applicationFile

precisionSum

private double precisionSum

recallSum

private double recallSum

fMeasureSum

private double fMeasureSum

precisionByType

private HashMap precisionByType

prCountByType

private HashMap prCountByType

recallByType

private HashMap recallByType

recCountByType

private HashMap recCountByType

fMeasureByType

private HashMap fMeasureByType

fMeasureCountByType

private HashMap fMeasureCountByType

missingByType

private HashMap missingByType

spurByType

private HashMap spurByType

correctByType

private HashMap correctByType

partialByType

private HashMap partialByType

beta

double beta

docNumber

private int docNumber

isGenerateMode

private boolean isGenerateMode
If true, the corpus tool will generate the corpus, otherwise it'll run in evaluate mode


isVerboseMode

private boolean isVerboseMode

isMarkedStored

private boolean isMarkedStored
If true, the corpus tool will evaluate stored against the human-marked documents


isMarkedClean

private boolean isMarkedClean

isMarkedDS

private boolean isMarkedDS

annotSetName

private String annotSetName

outputSetName

private String outputSetName

threshold

private double threshold

configs

private Properties configs

usage

private static String usage
String to print when wrong command-line args

Constructor Detail

CorpusBenchmarkTool

public CorpusBenchmarkTool()
Method Detail

initPRs

public void initPRs()

unloadPRs

public void unloadPRs()

execute

public void execute()

init

public void init()

execute

public void execute(File dir)

main

public static void main(String[] args)
                 throws GateException
GateException

setGenerateMode

public void setGenerateMode(boolean mode)

getGenerateMode

public boolean getGenerateMode()

getVerboseMode

public boolean getVerboseMode()

setVerboseMode

public void setVerboseMode(boolean mode)

setMarkedStored

public void setMarkedStored(boolean mode)

getMarkedStored

public boolean getMarkedStored()

setMarkedClean

public void setMarkedClean(boolean mode)

getMarkedClean

public boolean getMarkedClean()

setMarkedDS

public void setMarkedDS(boolean mode)

getMarkedDS

public boolean getMarkedDS()

setApplicationFile

public void setApplicationFile(File newAppFile)

getPrecisionAverage

public double getPrecisionAverage()
Returns the average precision over the entire set of processed documents.

If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the precision will be the average precision on those two sets.

If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the precision will be the average precision on those two sets of documents.


getRecallAverage

public double getRecallAverage()
Returns the average recall over the entire set of processed documents.

If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the recall will be the average recall on those two sets.

If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the recall will be the average recall on those two sets of documents.


isGenerateMode

public boolean isGenerateMode()

getThreshold

public double getThreshold()

setThreshold

public void setThreshold(double newValue)

getStartDirectory

public File getStartDirectory()

setStartDirectory

public void setStartDirectory(File dir)

generateCorpus

protected void generateCorpus(File fileDir,
                              File outputDir)

evaluateCorpus

protected void evaluateCorpus(File fileDir,
                              File processedDir,
                              File markedDir)

evaluateMarkedStored

protected void evaluateMarkedStored(File markedDir,
                                    File storedDir)

evaluateMarkedClean

protected void evaluateMarkedClean(File markedDir,
                                   File cleanDir)

processDocument

protected void processDocument(Document doc)

evaluateDocuments

protected void evaluateDocuments(Document persDoc,
                                 Document cleanDoc,
                                 Document markedDoc)
                          throws ResourceInstantiationException
ResourceInstantiationException

evaluateAllThree

protected void evaluateAllThree(Document persDoc,
                                Document cleanDoc,
                                Document markedDoc)
                         throws ResourceInstantiationException
ResourceInstantiationException

evaluateTwoDocs

protected void evaluateTwoDocs(Document keyDoc,
                               Document respDoc)
                        throws ResourceInstantiationException
ResourceInstantiationException

printTableHeader

protected void printTableHeader()

updateStatistics

protected void updateStatistics(AnnotationDiff annotDiff,
                                String annotType)

printStatistics

public void printStatistics()

printStatsForType

protected void printStatsForType(String annotType)

measureDocs

protected AnnotationDiff measureDocs(Document keyDoc,
                                     Document respDoc,
                                     String annotType)
                              throws ResourceInstantiationException
ResourceInstantiationException

printAnnotations

protected void printAnnotations(AnnotationDiff annotDiff,
                                Document keyDoc,
                                Document respDoc)

printAnnotations

protected void printAnnotations(Set set,
                                Document doc)