|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectgate.util.CorpusBenchmarkTool
Field Summary | |
private String |
annotSetName
|
private static List |
annotTypes
|
private Controller |
application
|
private File |
applicationFile
|
(package private) double |
beta
|
private static String |
CLEAN_DIR_NAME
|
private Properties |
configs
|
private static int |
corpusWordCount
|
private HashMap |
correctByType
|
private File |
currDir
|
private static String |
CVS_DIR_NAME
|
private static boolean |
DEBUG
|
private Set |
diffFeaturesSet
The list of features used in the AnnotationDiff separated by comma Example: "class;inst" |
private int |
docNumber
|
private String |
documentEncoding
|
private static String |
ERROR_DIR_NAME
|
private HashMap |
fMeasureByType
|
private HashMap |
fMeasureCountByType
|
private double |
fMeasureSum
|
(package private) static boolean |
hasProcessed
|
private boolean |
isGenerateMode
If true, the corpus tool will generate the corpus, otherwise it'll run in evaluate mode |
private boolean |
isMarkedClean
|
private boolean |
isMarkedDS
|
private boolean |
isMarkedStored
If true, the corpus tool will evaluate stored against the human-marked documents |
private boolean |
isMoreInfoMode
If true - show more info in document table |
private boolean |
isVerboseMode
If true - show annotations for docs below threshold |
private static String |
MARKED_DIR_NAME
|
private HashMap |
missingByType
|
private String |
outputSetName
|
private HashMap |
partialByType
|
private HashMap |
prCountByType
|
private HashMap |
precisionByType
|
private double |
precisionSum
|
private HashMap |
proc_correctByType
|
private HashMap |
proc_fMeasureByType
|
private HashMap |
proc_fMeasureCountByType
|
private double |
proc_fMeasureSum
|
private HashMap |
proc_missingByType
|
private HashMap |
proc_partialByType
|
private HashMap |
proc_prCountByType
|
private HashMap |
proc_precisionByType
|
private double |
proc_precisionSum
|
private HashMap |
proc_recallByType
|
private double |
proc_recallSum
|
private HashMap |
proc_recCountByType
|
private HashMap |
proc_spurByType
|
private static String |
PROCESSED_DIR_NAME
|
private HashMap |
recallByType
|
private double |
recallSum
|
private HashMap |
recCountByType
|
private HashMap |
spurByType
|
private File |
startDir
The directory from which we should generate/evaluate the corpus |
private double |
threshold
|
private static String |
usage
String to print when wrong command-line args |
Constructor Summary | |
CorpusBenchmarkTool()
|
Method Summary | |
protected int |
countWords(Document annotDoc)
Count all Token.kind=word annotations in the document |
protected void |
evaluateAllThree(Document persDoc,
Document cleanDoc,
Document markedDoc,
File errDir)
|
protected void |
evaluateCorpus(File fileDir,
File processedDir,
File markedDir,
File errorDir)
|
protected void |
evaluateDocuments(Document persDoc,
Document cleanDoc,
Document markedDoc,
File errDir)
|
protected void |
evaluateMarkedClean(File markedDir,
File cleanDir,
File errDir)
|
protected void |
evaluateMarkedStored(File markedDir,
File storedDir,
File errDir)
|
protected void |
evaluateTwoDocs(Document keyDoc,
Document respDoc,
File errDir)
|
void |
execute()
|
void |
execute(File dir)
|
protected void |
generateCorpus(File fileDir,
File outputDir)
|
Set |
getDiffFeaturesList()
|
boolean |
getGenerateMode()
|
boolean |
getMarkedClean()
|
boolean |
getMarkedDS()
|
boolean |
getMarkedStored()
|
boolean |
getMoreInfo()
|
double |
getPrecisionAverage()
Returns the average precision over the entire set of processed documents. |
double |
getPrecisionAverageProc()
For processed documents |
double |
getRecallAverage()
Returns the average recall over the entire set of processed documents. |
double |
getRecallAverageProc()
|
File |
getStartDirectory()
|
double |
getThreshold()
|
boolean |
getVerboseMode()
|
void |
init()
|
void |
initPRs()
|
boolean |
isGenerateMode()
|
static void |
main(String[] args)
|
protected AnnotationDiff |
measureDocs(Document keyDoc,
Document respDoc,
String annotType)
|
protected void |
printAnnotations(AnnotationDiff annotDiff,
Document keyDoc,
Document respDoc)
|
protected void |
printAnnotations(Set set,
Document doc)
|
void |
printStatistics()
|
protected void |
printStatsForType(String annotType)
|
protected void |
printTableHeader()
|
protected void |
processDocument(Document doc)
|
void |
setApplicationFile(File newAppFile)
|
void |
setDiffFeaturesList(Set features)
|
void |
setGenerateMode(boolean mode)
|
void |
setMarkedClean(boolean mode)
|
void |
setMarkedDS(boolean mode)
|
void |
setMarkedStored(boolean mode)
|
void |
setMoreInfo(boolean mode)
|
void |
setStartDirectory(File dir)
|
void |
setThreshold(double newValue)
|
void |
setVerboseMode(boolean mode)
|
protected void |
storeAnnotations(String type,
AnnotationDiff annotDiff,
Document keyDoc,
Document respDoc,
FileWriter errFileWriter)
|
protected void |
storeAnnotations(String type,
Set set,
Document doc,
FileWriter file)
|
void |
unloadPRs()
|
protected void |
updateStatistics(AnnotationDiff annotDiff,
String annotType)
|
protected void |
updateStatisticsProc(AnnotationDiff annotDiff,
String annotType)
Update statistics for processed documents The same procedure as updateStatistics with different hashTables |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
private static final String MARKED_DIR_NAME
private static final String CLEAN_DIR_NAME
private static final String CVS_DIR_NAME
private static final String PROCESSED_DIR_NAME
private static final String ERROR_DIR_NAME
private static final boolean DEBUG
private File startDir
private File currDir
private static List annotTypes
private Controller application
private File applicationFile
private double precisionSum
private double recallSum
private double fMeasureSum
private HashMap precisionByType
private HashMap prCountByType
private HashMap recallByType
private HashMap recCountByType
private HashMap fMeasureByType
private HashMap fMeasureCountByType
private HashMap missingByType
private HashMap spurByType
private HashMap correctByType
private HashMap partialByType
static boolean hasProcessed
private double proc_precisionSum
private double proc_recallSum
private double proc_fMeasureSum
private HashMap proc_precisionByType
private HashMap proc_prCountByType
private HashMap proc_recallByType
private HashMap proc_recCountByType
private HashMap proc_fMeasureByType
private HashMap proc_fMeasureCountByType
private HashMap proc_missingByType
private HashMap proc_spurByType
private HashMap proc_correctByType
private HashMap proc_partialByType
double beta
private int docNumber
private boolean isGenerateMode
private boolean isVerboseMode
private boolean isMoreInfoMode
private Set diffFeaturesSet
private boolean isMarkedStored
private boolean isMarkedClean
private boolean isMarkedDS
private String annotSetName
private String outputSetName
private double threshold
private Properties configs
private static int corpusWordCount
private String documentEncoding
private static String usage
Constructor Detail |
public CorpusBenchmarkTool()
Method Detail |
public void initPRs()
public void unloadPRs()
public void execute()
public void init()
public void execute(File dir)
public static void main(String[] args) throws GateException
GateException
public void setGenerateMode(boolean mode)
public boolean getGenerateMode()
public boolean getVerboseMode()
public void setVerboseMode(boolean mode)
public void setMoreInfo(boolean mode)
public boolean getMoreInfo()
public void setDiffFeaturesList(Set features)
public Set getDiffFeaturesList()
public void setMarkedStored(boolean mode)
public boolean getMarkedStored()
public void setMarkedClean(boolean mode)
public boolean getMarkedClean()
public void setMarkedDS(boolean mode)
public boolean getMarkedDS()
public void setApplicationFile(File newAppFile)
public double getPrecisionAverage()
If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the precision will be the average precision on those two sets.
If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the precision will be the average precision on those two sets of documents.
public double getRecallAverage()
If the tool has been evaluating the original documents against the previously-stored automatically annotated ones, then the recall will be the average recall on those two sets.
If the tool was run in -marked mode, i.e., was evaluating the stored automatically processed ones against the human-annotated ones, then the recall will be the average recall on those two sets of documents.
public double getPrecisionAverageProc()
public double getRecallAverageProc()
public boolean isGenerateMode()
public double getThreshold()
public void setThreshold(double newValue)
public File getStartDirectory()
public void setStartDirectory(File dir)
protected void generateCorpus(File fileDir, File outputDir)
protected void evaluateCorpus(File fileDir, File processedDir, File markedDir, File errorDir)
protected void evaluateMarkedStored(File markedDir, File storedDir, File errDir)
protected void evaluateMarkedClean(File markedDir, File cleanDir, File errDir)
protected void processDocument(Document doc)
protected void evaluateDocuments(Document persDoc, Document cleanDoc, Document markedDoc, File errDir) throws ResourceInstantiationException
ResourceInstantiationException
protected int countWords(Document annotDoc)
protected void evaluateAllThree(Document persDoc, Document cleanDoc, Document markedDoc, File errDir) throws ResourceInstantiationException
ResourceInstantiationException
protected void evaluateTwoDocs(Document keyDoc, Document respDoc, File errDir) throws ResourceInstantiationException
ResourceInstantiationException
protected void printTableHeader()
protected void updateStatistics(AnnotationDiff annotDiff, String annotType)
protected void updateStatisticsProc(AnnotationDiff annotDiff, String annotType)
public void printStatistics()
protected void printStatsForType(String annotType)
protected AnnotationDiff measureDocs(Document keyDoc, Document respDoc, String annotType) throws ResourceInstantiationException
ResourceInstantiationException
protected void storeAnnotations(String type, AnnotationDiff annotDiff, Document keyDoc, Document respDoc, FileWriter errFileWriter)
protected void storeAnnotations(String type, Set set, Document doc, FileWriter file) throws IOException
IOException
protected void printAnnotations(AnnotationDiff annotDiff, Document keyDoc, Document respDoc)
protected void printAnnotations(Set set, Document doc)
|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |