Merge branch 'master' of https://mit.cs.uchicago.edu/skr/cmsc13600-public

92fd772c · Sanjay Krishnan · 7da0fcb6 · 5144136c · 92fd772c · 92fd772c
Commit 92fd772c authored Mar 26, 2021 by Sanjay Krishnan
Showing with 18 additions and 4 deletions
hw6/core.py
hw6/countD.py
hw6/ooc.py
--- a/hw6/core.py
+++ b/hw6/core.py
@@ -8,7 +8,7 @@ import os
 import json

 def imdb_title_words():
-    f = open('title.csv','r')
+    f = open('title.csv','r', errors='replace')
    line = f.readline()

    while line != "":
@@ -22,7 +22,7 @@ def imdb_title_words():
    f.close()

 def imdb_years():
-    f = open('title.csv','r')
+    f = open('title.csv','r', errors='replace')
    line = f.readline()

    while line != "":

--- a/hw6/countD.py
+++ b/hw6/countD.py
+from core import *
+from ooc import *
+
 """
 Get the dataset first, download title.csv put it in the pa1 folder
 https://www.dropbox.com/s/zl7yt8cl0lvajxg/title.csv?dl=0

--- a/hw6/ooc.py
+++ b/hw6/ooc.py
@@ -66,13 +66,20 @@ class MemoryLimitedHashMap(object):
    return set([k for k in self._data])


-  def flushed(self):
+  def flushed(self, returnSubKeys=False):
    '''
    Returns a set over keys that have been flushed. 
    Tuple is (key, location)
+    
+    if returnSubKeys=True
+    The tuple is ((key,subkey), location), if no subkey is provided it's just 
+    (key)
    '''
    
+    if not returnSubKeys:    
        return set([self.path2Key(k) for k in os.listdir(self.diskfile)])
+    else:
+        return set([self.path2Subkey(k) for k in os.listdir(self.diskfile)])

  def keyPath(self, k, subkey):
    return self.diskfile+"/"+str(k)+ "_" + subkey
@@ -81,7 +88,11 @@ class MemoryLimitedHashMap(object):
    key = k.split("_")[0]
    return key

-  def flushKey(self, k, subkey):
+  def path2Subkey(self, k):
+    key = tuple(k.split("_"))
+    return key
+
+  def flushKey(self, k, subkey=""):
    '''
    Removes the key from the dictionary and 
    persists it to disk.