ooc.py 2.56 KB
Newer Older
Sanjay Krishnan committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
import os
import json

class MemoryLimitedHashMap(object):
  '''
  A MemoryLimitedHashMap simulates a hardware memory limit for a 
  key-value data structure. It will raise an exception if the 
  limit is exceeded.

  Keys must be strings
  '''

  def __init__(self, diskfile='disk.file', limit=1000):
    '''
    The constructor takes a reference to a persistent file
    and a memory limit.
    '''

    if os.path.exists(diskfile):
        print("[Warning] Overwriting the Disk File", diskfile)

        import shutil
        shutil.rmtree(diskfile) 

    os.mkdir(diskfile)
    self.diskfile = diskfile
    self._data = {}
    self.limit = limit

  def size(self):
    return len(self._data)

  def put(self, k, v):
    '''
    Basically works like dict put
    '''

    if not self.contains(k) and len(self._data) == self.limit:
      raise ValueError("[Error] Attempting to Insert Into a Full Map: " + str((k,v)))
    else:
      self._data[k] = v


  def get(self, k):
    '''
    Basically works like dict get
    '''

    return self._data[k]


  def contains(self, k):
    '''
    Basically works like hash map contains
    '''

    return (k in self._data)


  def keys(self):
    '''
    Returns a set of keys. Tuple
    is (key, location)
    '''

    return set([k for k in self._data])


  def fKeys(self):
    '''
    Returns a set over keys that have been flushed. 
    Tuple is (key, location)
    '''

    return set([self.path2Key(k) for k in os.listdir(self.diskfile)])

  def keyPath(self, k, subkey):
    return self.diskfile+"/"+str(k)+ "_" + subkey

  def path2Key(self, k):
    key = k.split("_")[0]
    return key

  def flushKey(self, k, subkey):
    '''
    Removes the key from the dictionary and 
    persists it to disk.
    '''
    if not self.contains(k):
        raise ValueError("[Error] Map Does Not Contain " + k)

    f = open(self.keyPath(k, subkey), 'a')
    f.write(json.dumps(self.get(k)) + "\n")
    f.close()

    del self._data[k] #free up the space


  def load(self, k, subkey=""):
    '''
    Streams all of the data from a persisted key 
    '''
    fname = self.keyPath(k, subkey)

    if not os.path.exists(fname):
        raise ValueError("[Error] Disk Does Not Contain " + k)

    f = open(fname, 'r')
    
    line = f.readline()

    while line != "":
        yield (k, json.loads(line.strip()))
        line = f.readline()

  
  def loadAll(self, subkey=""):
    '''
    Streams all of the data from all keys
    '''

    for k in self.keys():
        yield (k, self.get(k))

    for k in self.fKeys():
        for _,v in self.load(k, subkey):
            yield (k,v)