ooc.py 2.95 KB
Newer Older
Sanjay Krishnan committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
import os
import json

class MemoryLimitedHashMap(object):
  '''
  A MemoryLimitedHashMap simulates a hardware memory limit for a 
  key-value data structure. It will raise an exception if the 
  limit is exceeded.

  Keys must be strings
  '''

  def __init__(self, diskfile='disk.file', limit=1000):
    '''
    The constructor takes a reference to a persistent file
    and a memory limit.
    '''

    if os.path.exists(diskfile):
        print("[Warning] Overwriting the Disk File", diskfile)

        import shutil
        shutil.rmtree(diskfile) 

    os.mkdir(diskfile)
    self.diskfile = diskfile
    self._data = {}
    self.limit = limit

  def size(self):
    return len(self._data)

  def put(self, k, v):
    '''
    Basically works like dict put
    '''

    if not self.contains(k) and len(self._data) == self.limit:
      raise ValueError("[Error] Attempting to Insert Into a Full Map: " + str((k,v)))
    else:
      self._data[k] = v


  def get(self, k):
    '''
    Basically works like dict get
    '''

    return self._data[k]


  def contains(self, k):
    '''
    Basically works like hash map contains
    '''

    return (k in self._data)


  def keys(self):
    '''
Krishnan Sanjay committed
62
    Returns a set of keys (in memory). Tuple
Sanjay Krishnan committed
63 64 65 66 67 68
    is (key, location)
    '''

    return set([k for k in self._data])


Krishnan Sanjay committed
69
  def flushed(self, returnSubKeys=False):
Sanjay Krishnan committed
70 71 72
    '''
    Returns a set over keys that have been flushed. 
    Tuple is (key, location)
Krishnan Sanjay committed
73 74 75 76
    
    if returnSubKeys=True
    The tuple is ((key,subkey), location), if no subkey is provided it's just 
    (key)
Sanjay Krishnan committed
77
    '''
Krishnan Sanjay committed
78 79 80 81 82
    
    if not returnSubKeys:    
        return set([self.path2Key(k) for k in os.listdir(self.diskfile)])
    else:
        return set([self.path2Subkey(k) for k in os.listdir(self.diskfile)])
Sanjay Krishnan committed
83 84 85 86 87 88 89 90

  def keyPath(self, k, subkey):
    return self.diskfile+"/"+str(k)+ "_" + subkey

  def path2Key(self, k):
    key = k.split("_")[0]
    return key

Krishnan Sanjay committed
91 92 93 94
  def path2Subkey(self, k):
    key = k.split("_")
    return key

Krishnan Sanjay committed
95
  def flushKey(self, k, subkey=""):
Sanjay Krishnan committed
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    '''
    Removes the key from the dictionary and 
    persists it to disk.
    '''
    if not self.contains(k):
        raise ValueError("[Error] Map Does Not Contain " + k)

    f = open(self.keyPath(k, subkey), 'a')
    f.write(json.dumps(self.get(k)) + "\n")
    f.close()

    del self._data[k] #free up the space


  def load(self, k, subkey=""):
    '''
    Streams all of the data from a persisted key 
    '''
    fname = self.keyPath(k, subkey)

    if not os.path.exists(fname):
        raise ValueError("[Error] Disk Does Not Contain " + k)

    f = open(fname, 'r')
    
    line = f.readline()

    while line != "":
        yield (k, json.loads(line.strip()))
        line = f.readline()

  
Krishnan Sanjay committed
128
  def loadAll(self, subkey="", inMemory=False):
Sanjay Krishnan committed
129 130 131 132
    '''
    Streams all of the data from all keys
    '''

Krishnan Sanjay committed
133
    if inMemory:
Krishnan Sanjay committed
134 135
        for k in self.keys():
            yield (k, self.get(k))
Sanjay Krishnan committed
136

Krishnan Sanjay committed
137
    for k in self.flushed():
Sanjay Krishnan committed
138 139
        for _,v in self.load(k, subkey):
            yield (k,v)