ooc.py 2.62 KB
Newer Older
Sanjay Krishnan committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
import os
import json

class MemoryLimitedHashMap(object):
  '''
  A MemoryLimitedHashMap simulates a hardware memory limit for a 
  key-value data structure. It will raise an exception if the 
  limit is exceeded.

  Keys must be strings
  '''

  def __init__(self, diskfile='disk.file', limit=1000):
    '''
    The constructor takes a reference to a persistent file
    and a memory limit.
    '''

    if os.path.exists(diskfile):
        print("[Warning] Overwriting the Disk File", diskfile)

        import shutil
        shutil.rmtree(diskfile) 

    os.mkdir(diskfile)
    self.diskfile = diskfile
    self._data = {}
    self.limit = limit

  def size(self):
    return len(self._data)

  def put(self, k, v):
    '''
    Basically works like dict put
    '''

    if not self.contains(k) and len(self._data) == self.limit:
      raise ValueError("[Error] Attempting to Insert Into a Full Map: " + str((k,v)))
    else:
      self._data[k] = v


  def get(self, k):
    '''
    Basically works like dict get
    '''

    return self._data[k]


  def contains(self, k):
    '''
    Basically works like hash map contains
    '''

    return (k in self._data)


  def keys(self):
    '''
Krishnan Sanjay committed
62
    Returns a set of keys (in memory). Tuple
Sanjay Krishnan committed
63 64 65 66 67 68
    is (key, location)
    '''

    return set([k for k in self._data])


Krishnan Sanjay committed
69
  def flushed(self):
Sanjay Krishnan committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83
    '''
    Returns a set over keys that have been flushed. 
    Tuple is (key, location)
    '''

    return set([self.path2Key(k) for k in os.listdir(self.diskfile)])

  def keyPath(self, k, subkey):
    return self.diskfile+"/"+str(k)+ "_" + subkey

  def path2Key(self, k):
    key = k.split("_")[0]
    return key

Krishnan Sanjay committed
84
  def flushKey(self, k, subkey=""):
Sanjay Krishnan committed
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
    '''
    Removes the key from the dictionary and 
    persists it to disk.
    '''
    if not self.contains(k):
        raise ValueError("[Error] Map Does Not Contain " + k)

    f = open(self.keyPath(k, subkey), 'a')
    f.write(json.dumps(self.get(k)) + "\n")
    f.close()

    del self._data[k] #free up the space


  def load(self, k, subkey=""):
    '''
    Streams all of the data from a persisted key 
    '''
    fname = self.keyPath(k, subkey)

    if not os.path.exists(fname):
        raise ValueError("[Error] Disk Does Not Contain " + k)

    f = open(fname, 'r')
    
    line = f.readline()

    while line != "":
        yield (k, json.loads(line.strip()))
        line = f.readline()

  
Krishnan Sanjay committed
117
  def loadAll(self, subkey="", inMemory=False):
Sanjay Krishnan committed
118 119 120 121
    '''
    Streams all of the data from all keys
    '''

Krishnan Sanjay committed
122
    if inMemory:
Krishnan Sanjay committed
123 124
        for k in self.keys():
            yield (k, self.get(k))
Sanjay Krishnan committed
125

Krishnan Sanjay committed
126
    for k in self.flushed():
Sanjay Krishnan committed
127 128
        for _,v in self.load(k, subkey):
            yield (k,v)