auto_grader.py 1.16 KB
Newer Older
1 2 3 4 5
import datetime
import csv
from analyze import match

def eval_matching(your_matching):
Sanjay Krishnan committed
6 7 8 9 10 11 12 13 14 15 16 17
    f = open('Amzon_GoogleProducts_perfectMapping.csv', 'r', encoding = "ISO-8859-1")
    reader = csv.reader(f, delimiter=',', quotechar='"')
    matches = set()
    proposed_matches = set()

    tp = set()
    fp = set()
    fn = set()
    tn = set()

    for row in reader:
        matches.add((row[0],row[1]))
18
        #print((row[0],row[1]))
Sanjay Krishnan committed
19

20
    for m in your_matching:
Sanjay Krishnan committed
21 22 23 24 25 26 27 28 29 30 31
        proposed_matches.add(m)

        if m in matches:
            tp.add(m)
        else:
            fp.add(m)

    for m in matches:
        if m not in proposed_matches:
            fn.add(m)

32 33 34 35 36
    if len(your_matching) == 0:
        prec = 1.0
    else:
        prec = len(tp)/(len(tp) + len(fp))

Sanjay Krishnan committed
37 38
    rec = len(tp)/(len(tp) + len(fn))

39 40
    return {'precision': prec, 
            'recall': rec,
Sanjay Krishnan committed
41 42 43 44 45 46 47
            'accuracy': 2*(prec*rec)/(prec+rec) }

#prints out the accuracy
now = datetime.datetime.now()
out = eval_matching(match())
timing = (datetime.datetime.now()-now).total_seconds()
print("----Accuracy----")
48
print(out['accuracy'], out['precision'] ,out['recall'])
Sanjay Krishnan committed
49
print("---- Timing ----")
50
print(timing,"seconds")