forked from microsoft/DynSP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
executable file
·62 lines (49 loc) · 1.78 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import csv, sys
# read tsv files with the four essential columns: id, annotator, position, answer_coordinates
# output: dt: sid -> pos -> ansCord
def readTsv(fnTsv):
dt = {}
for row in csv.DictReader(open(fnTsv, 'r'), delimiter='\t'):
sid = row['id'] + '\t' + row['annotator'] # sequence id
pos = int(row['position']) # position
ansCord = set(eval(row['answer_coordinates'])) # answer coordinates
if not sid in dt:
dt[sid] = {}
dt[sid][pos] = ansCord
return dt
def evaluate(fnGold, fnPred):
dtGold = readTsv(fnGold)
dtPred = readTsv(fnPred)
# Calcuate both sequence-level accuracy and question-level accuracy
seqCnt = seqCor = 0
ansCnt = ansCor = 0
breakCorrect, breakTotal = {},{}
for sid,qa in dtGold.items():
seqCnt += 1
ansCnt += len(qa)
if sid not in dtPred: continue # sequence does not exist in the prediction
predQA = dtPred[sid]
allQCorrect = True
for q,a in qa.items():
if q not in breakTotal:
breakCorrect[q] = breakTotal[q] = 0
breakTotal[q] += 1
if q in predQA and a == predQA[q]:
ansCor += 1 # correctly answered question
breakCorrect[q] += 1
else:
allQCorrect = False
if allQCorrect: seqCor += 1
print "Sequence Accuracy = %0.2f%% (%d/%d)" % (100.0 * seqCor/seqCnt, seqCor, seqCnt)
print "Answer Accuracy = %0.2f%% (%d/%d)" % (100.0 * ansCor/ansCnt, ansCor, ansCnt)
print "Break-down:"
for q in sorted(breakTotal.keys()):
print "Position %d Accuracy = %0.2f%% (%d/%d)" % (q, 100.0 * breakCorrect[q]/breakTotal[q], breakCorrect[q], breakTotal[q])
return [seqCor, seqCnt, ansCor, ansCnt]
if __name__ == '__main__':
if len(sys.argv) != 3:
sys.stderr.write("Usage: %s goldTsv predTsv\n" % sys.argv[0])
sys.exit(-1)
fnGold = sys.argv[1]
fnPred = sys.argv[2]
evaluate(fnGold, fnPred)