Commit 92fd772c by Sanjay Krishnan
parents 7da0fcb6 5144136c
Showing with 20 additions and 6 deletions
......@@ -8,7 +8,7 @@ import os
import json
def imdb_title_words():
f = open('title.csv','r')
f = open('title.csv','r', errors='replace')
line = f.readline()
while line != "":
......@@ -22,7 +22,7 @@ def imdb_title_words():
f.close()
def imdb_years():
f = open('title.csv','r')
f = open('title.csv','r', errors='replace')
line = f.readline()
while line != "":
......
from core import *
from ooc import *
"""
Get the dataset first, download title.csv put it in the pa1 folder
https://www.dropbox.com/s/zl7yt8cl0lvajxg/title.csv?dl=0
......
......@@ -66,13 +66,20 @@ class MemoryLimitedHashMap(object):
return set([k for k in self._data])
def flushed(self):
def flushed(self, returnSubKeys=False):
'''
Returns a set over keys that have been flushed.
Tuple is (key, location)
if returnSubKeys=True
The tuple is ((key,subkey), location), if no subkey is provided it's just
(key)
'''
return set([self.path2Key(k) for k in os.listdir(self.diskfile)])
if not returnSubKeys:
return set([self.path2Key(k) for k in os.listdir(self.diskfile)])
else:
return set([self.path2Subkey(k) for k in os.listdir(self.diskfile)])
def keyPath(self, k, subkey):
return self.diskfile+"/"+str(k)+ "_" + subkey
......@@ -81,7 +88,11 @@ class MemoryLimitedHashMap(object):
key = k.split("_")[0]
return key
def flushKey(self, k, subkey):
def path2Subkey(self, k):
key = tuple(k.split("_"))
return key
def flushKey(self, k, subkey=""):
'''
Removes the key from the dictionary and
persists it to disk.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment