core.py 1.65 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
'''
The core module sets up the data structures and 
and references for this programming assignment.

2010
'''

import platform
import csv

if platform.system() == 'Windows':
  print("This assignment will not work on a windows computer")
  exit()


#defines an iterator over the google catalog
class Catalog():

    def __init__(self, filename):
      self.filename = filename

    def __iter__(self):
      f = open(self.filename, 'r', encoding = "ISO-8859-1")
      self.reader = csv.reader(f, delimiter=',', quotechar='"')
      next(self.reader)
      return self

    def __next__(self):
      row = next(self.reader)
      return {'id': row[0],
               'title': row[1],
               'description': row[2],
               'mfg': row[3],
               'price': row[4]
              }

def google_catalog():
    return Catalog('GoogleProducts.csv')

def amazon_catalog():
    return Catalog('Amazon.csv')


def eval_matching(matching):
    f = open('Amzon_GoogleProducts_perfectMapping.csv', 'r', encoding = "ISO-8859-1")
    reader = csv.reader(f, delimiter=',', quotechar='"')
    matches = set()
    proposed_matches = set()

    tp = set()
    fp = set()
    fn = set()
    tn = set()

    for row in reader:
        matches.add((row[0],row[1]))

    for m in matching:
        proposed_matches.add(m)

        if m in matches:
            tp.add(m)
        else:
            fp.add(m)

    for m in matches:
        if m not in proposed_matches:
            fn.add(m)

    prec = len(tp)/(len(tp) + len(fp))
    rec = len(tp)/(len(tp) + len(fn))

    return {'false positive': 1-prec, 
            'false negative': 1-rec,
            'accuracy': 2*(prec*rec)/(prec+rec) }