Commit 92fd772c by Sanjay Krishnan
parents 7da0fcb6 5144136c
Showing with 18 additions and 4 deletions
...@@ -8,7 +8,7 @@ import os ...@@ -8,7 +8,7 @@ import os
import json import json
def imdb_title_words(): def imdb_title_words():
f = open('title.csv','r') f = open('title.csv','r', errors='replace')
line = f.readline() line = f.readline()
while line != "": while line != "":
...@@ -22,7 +22,7 @@ def imdb_title_words(): ...@@ -22,7 +22,7 @@ def imdb_title_words():
f.close() f.close()
def imdb_years(): def imdb_years():
f = open('title.csv','r') f = open('title.csv','r', errors='replace')
line = f.readline() line = f.readline()
while line != "": while line != "":
......
from core import *
from ooc import *
""" """
Get the dataset first, download title.csv put it in the pa1 folder Get the dataset first, download title.csv put it in the pa1 folder
https://www.dropbox.com/s/zl7yt8cl0lvajxg/title.csv?dl=0 https://www.dropbox.com/s/zl7yt8cl0lvajxg/title.csv?dl=0
......
...@@ -66,13 +66,20 @@ class MemoryLimitedHashMap(object): ...@@ -66,13 +66,20 @@ class MemoryLimitedHashMap(object):
return set([k for k in self._data]) return set([k for k in self._data])
def flushed(self): def flushed(self, returnSubKeys=False):
''' '''
Returns a set over keys that have been flushed. Returns a set over keys that have been flushed.
Tuple is (key, location) Tuple is (key, location)
if returnSubKeys=True
The tuple is ((key,subkey), location), if no subkey is provided it's just
(key)
''' '''
if not returnSubKeys:
return set([self.path2Key(k) for k in os.listdir(self.diskfile)]) return set([self.path2Key(k) for k in os.listdir(self.diskfile)])
else:
return set([self.path2Subkey(k) for k in os.listdir(self.diskfile)])
def keyPath(self, k, subkey): def keyPath(self, k, subkey):
return self.diskfile+"/"+str(k)+ "_" + subkey return self.diskfile+"/"+str(k)+ "_" + subkey
...@@ -81,7 +88,11 @@ class MemoryLimitedHashMap(object): ...@@ -81,7 +88,11 @@ class MemoryLimitedHashMap(object):
key = k.split("_")[0] key = k.split("_")[0]
return key return key
def flushKey(self, k, subkey): def path2Subkey(self, k):
key = tuple(k.split("_"))
return key
def flushKey(self, k, subkey=""):
''' '''
Removes the key from the dictionary and Removes the key from the dictionary and
persists it to disk. persists it to disk.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment