summaryrefslogtreecommitdiffstats
path: root/Lib/dumbdbm.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/dumbdbm.py')
-rw-r--r--Lib/dumbdbm.py97
1 files changed, 69 insertions, 28 deletions
diff --git a/Lib/dumbdbm.py b/Lib/dumbdbm.py
index b932f84..1af22b1 100644
--- a/Lib/dumbdbm.py
+++ b/Lib/dumbdbm.py
@@ -33,11 +33,26 @@ error = IOError # For anydbm
class _Database(UserDict.DictMixin):
- def __init__(self, file, mode):
+ def __init__(self, filebasename, mode):
self._mode = mode
- self._dirfile = file + _os.extsep + 'dir'
- self._datfile = file + _os.extsep + 'dat'
- self._bakfile = file + _os.extsep + 'bak'
+
+ # The directory file is a text file. Each line looks like
+ # "%r, (%d, %d)\n" % (key, pos, siz)
+ # where key is the string key, pos is the offset into the dat
+ # file of the associated value's first byte, and siz is the number
+ # of bytes in the associated value.
+ self._dirfile = filebasename + _os.extsep + 'dir'
+
+ # The data file is a binary file pointed into by the directory
+ # file, and holds the values associated with keys. Each value
+ # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
+ # binary 8-bit string value.
+ self._datfile = filebasename + _os.extsep + 'dat'
+ self._bakfile = filebasename + _os.extsep + 'bak'
+
+ # The index is an in-memory dict, mirroring the directory file.
+ self._index = None # maps keys to (pos, siz) pairs
+
# Mod by Jack: create data file if needed
try:
f = _open(self._datfile, 'r')
@@ -46,6 +61,7 @@ class _Database(UserDict.DictMixin):
f.close()
self._update()
+ # Read directory file into the in-memory index dict.
def _update(self):
self._index = {}
try:
@@ -53,21 +69,28 @@ class _Database(UserDict.DictMixin):
except IOError:
pass
else:
- while 1:
- line = f.readline().rstrip()
- if not line: break
- key, (pos, siz) = eval(line)
- self._index[key] = (pos, siz)
+ for line in f:
+ key, pos_and_siz_pair = eval(line)
+ self._index[key] = pos_and_siz_pair
f.close()
+ # Write the index dict to the directory file. The original directory
+ # file (if any) is renamed with a .bak extension first. If a .bak
+ # file currently exists, it's deleted.
def _commit(self):
- try: _os.unlink(self._bakfile)
- except _os.error: pass
- try: _os.rename(self._dirfile, self._bakfile)
- except _os.error: pass
+ try:
+ _os.unlink(self._bakfile)
+ except _os.error:
+ pass
+
+ try:
+ _os.rename(self._dirfile, self._bakfile)
+ except _os.error:
+ pass
+
f = _open(self._dirfile, 'w', self._mode)
for key, (pos, siz) in self._index.items():
- f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
+ f.write("%r, (%d, %d)\n" % (key, pos, siz))
f.close()
def __getitem__(self, key):
@@ -78,21 +101,25 @@ class _Database(UserDict.DictMixin):
f.close()
return dat
+ # Append val to the data file, starting at a _BLOCKSIZE-aligned
+ # offset. The data file is first padded with NUL bytes (if needed)
+ # to get to an aligned offset. Return pair
+ # (starting offset of val, len(val))
def _addval(self, val):
f = _open(self._datfile, 'rb+')
f.seek(0, 2)
pos = int(f.tell())
-## Does not work under MW compiler
-## pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
-## f.seek(pos)
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
f.write('\0'*(npos-pos))
pos = npos
-
f.write(val)
f.close()
return (pos, len(val))
+ # Write val to the data file, starting at offset pos. The caller
+ # is responsible for ensuring that there's enough room starting at
+ # pos to hold val, without overwriting some other value. Return
+ # pair (pos, len(val)).
def _setval(self, pos, val):
f = _open(self._datfile, 'rb+')
f.seek(pos)
@@ -100,31 +127,45 @@ class _Database(UserDict.DictMixin):
f.close()
return (pos, len(val))
- def _addkey(self, key, (pos, siz)):
- self._index[key] = (pos, siz)
+ # key is a new key whose associated value starts in the data file
+ # at offset pos and with length size. Add an index record to
+ # the in-memory index dict, and append one to the index file.
+ def _addkey(self, key, pos_and_siz_pair):
+ self._index[key] = pos_and_siz_pair
f = _open(self._dirfile, 'a', self._mode)
- f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
+ f.write("%r, %r\n" % (key, pos_and_siz_pair))
f.close()
def __setitem__(self, key, val):
if not type(key) == type('') == type(val):
raise TypeError, "keys and values must be strings"
- if not key in self._index:
- (pos, siz) = self._addval(val)
- self._addkey(key, (pos, siz))
+ if key not in self._index:
+ self._addkey(key, self._addval(val))
else:
+ # See whether the new value is small enough to fit in the
+ # (padded) space currently occupied by the old value.
pos, siz = self._index[key]
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
if newblocks <= oldblocks:
- pos, siz = self._setval(pos, val)
- self._index[key] = pos, siz
+ self._index[key] = self._setval(pos, val)
else:
- pos, siz = self._addval(val)
- self._index[key] = pos, siz
+ # The new value doesn't fit in the (padded) space used
+ # by the old value. The blocks used by the old value are
+ # forever lost.
+ self._index[key] = self._addval(val)
+
+ # Note that _index may be out of synch with the directory
+ # file now: _setval() and _addval() don't update the directory
+ # file.
def __delitem__(self, key):
+ # The blocks used by the associated value are lost.
del self._index[key]
+ # XXX It's unclear why we do a _commit() here (the code always
+ # XXX has, so I'm not changing it). _setitem__ doesn't try to
+ # XXX keep the directory file in synch. Why should we? Or
+ # XXX why shouldn't __setitem__?
self._commit()
def keys(self):