ooc.py 2.58 KB
Newer Older
Sanjay Krishnan committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
import os
import json

class MemoryLimitedHashMap(object):
  '''
  A MemoryLimitedHashMap simulates a hardware memory limit for a 
  key-value data structure. It will raise an exception if the 
  limit is exceeded.

  Keys must be strings
  '''

  def __init__(self, diskfile='disk.file', limit=1000):
    '''
    The constructor takes a reference to a persistent file
    and a memory limit.
    '''

    if os.path.exists(diskfile):
        print("[Warning] Overwriting the Disk File", diskfile)

        import shutil
        shutil.rmtree(diskfile) 

    os.mkdir(diskfile)
    self.diskfile = diskfile
    self._data = {}
    self.limit = limit

  def size(self):
    return len(self._data)

  def put(self, k, v):
    '''
    Basically works like dict put
    '''

    if not self.contains(k) and len(self._data) == self.limit:
      raise ValueError("[Error] Attempting to Insert Into a Full Map: " + str((k,v)))
    else:
      self._data[k] = v


  def get(self, k):
    '''
    Basically works like dict get
    '''

    return self._data[k]


  def contains(self, k):
    '''
    Basically works like hash map contains
    '''

    return (k in self._data)


  def keys(self):
    '''
Krishnan Sanjay committed
62
    Returns a set of keys (in memory). Tuple
Sanjay Krishnan committed
63 64 65 66 67 68
    is (key, location)
    '''

    return set([k for k in self._data])


Krishnan Sanjay committed
69
  def flushed(self):
Sanjay Krishnan committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    '''
    Returns a set over keys that have been flushed. 
    Tuple is (key, location)
    '''

    return set([self.path2Key(k) for k in os.listdir(self.diskfile)])

  def keyPath(self, k, subkey):
    return self.diskfile+"/"+str(k)+ "_" + subkey

  def path2Key(self, k):
    key = k.split("_")[0]
    return key

  def flushKey(self, k, subkey):
    '''
    Removes the key from the dictionary and 
    persists it to disk.
    '''
    if not self.contains(k):
        raise ValueError("[Error] Map Does Not Contain " + k)

    f = open(self.keyPath(k, subkey), 'a')
    f.write(json.dumps(self.get(k)) + "\n")
    f.close()

    del self._data[k] #free up the space


  def load(self, k, subkey=""):
    '''
    Streams all of the data from a persisted key 
    '''
    fname = self.keyPath(k, subkey)

    if not os.path.exists(fname):
        raise ValueError("[Error] Disk Does Not Contain " + k)

    f = open(fname, 'r')
    
    line = f.readline()

    while line != "":
        yield (k, json.loads(line.strip()))
        line = f.readline()

  
  def loadAll(self, subkey=""):
    '''
    Streams all of the data from all keys
    '''

    for k in self.keys():
        yield (k, self.get(k))

    for k in self.fKeys():
        for _,v in self.load(k, subkey):
            yield (k,v)