summaryrefslogtreecommitdiffstats
path: root/Lib/dbm/dumb.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/dbm/dumb.py')
-rw-r--r--Lib/dbm/dumb.py257
1 files changed, 257 insertions, 0 deletions
diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py
new file mode 100644
index 0000000..76f4a63
--- /dev/null
+++ b/Lib/dbm/dumb.py
@@ -0,0 +1,257 @@
+"""A dumb and slow but simple dbm clone.
+
+For database spam, spam.dir contains the index (a text file),
+spam.bak *may* contain a backup of the index (also a text file),
+while spam.dat contains the data (a binary file).
+
+XXX TO DO:
+
+- seems to contain a bug when updating...
+
+- reclaim free space (currently, space once occupied by deleted or expanded
+items is never reused)
+
+- support concurrent access (currently, if two processes take turns making
+updates, they can mess up the index)
+
+- support efficient access to large databases (currently, the whole index
+is read when the database is opened, and some updates rewrite the whole index)
+
+- support opening for read-only (flag = 'm')
+
+"""
+
+import io as _io
+import os as _os
+import collections
+
+__all__ = ["error", "open"]
+
+_BLOCKSIZE = 512
+
+error = IOError
+
+class _Database(collections.MutableMapping):
+
+ # The on-disk directory and data files can remain in mutually
+ # inconsistent states for an arbitrarily long time (see comments
+ # at the end of __setitem__). This is only repaired when _commit()
+ # gets called. One place _commit() gets called is from __del__(),
+ # and if that occurs at program shutdown time, module globals may
+ # already have gotten rebound to None. Since it's crucial that
+ # _commit() finish successfully, we can't ignore shutdown races
+ # here, and _commit() must not reference any globals.
+ _os = _os # for _commit()
+ _io = _io # for _commit()
+
+ def __init__(self, filebasename, mode):
+ self._mode = mode
+
+ # The directory file is a text file. Each line looks like
+ # "%r, (%d, %d)\n" % (key, pos, siz)
+ # where key is the string key, pos is the offset into the dat
+ # file of the associated value's first byte, and siz is the number
+ # of bytes in the associated value.
+ self._dirfile = filebasename + '.dir'
+
+ # The data file is a binary file pointed into by the directory
+ # file, and holds the values associated with keys. Each value
+ # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
+ # binary 8-bit string value.
+ self._datfile = filebasename + '.dat'
+ self._bakfile = filebasename + '.bak'
+
+ # The index is an in-memory dict, mirroring the directory file.
+ self._index = None # maps keys to (pos, siz) pairs
+
+ # Mod by Jack: create data file if needed
+ try:
+ f = _io.open(self._datfile, 'r')
+ except IOError:
+ f = _io.open(self._datfile, 'w')
+ self._chmod(self._datfile)
+ f.close()
+ self._update()
+
+ # Read directory file into the in-memory index dict.
+ def _update(self):
+ self._index = {}
+ try:
+ f = _io.open(self._dirfile, 'r')
+ except IOError:
+ pass
+ else:
+ for line in f:
+ line = line.rstrip()
+ key, pos_and_siz_pair = eval(line)
+ self._index[key] = pos_and_siz_pair
+ f.close()
+
+ # Write the index dict to the directory file. The original directory
+ # file (if any) is renamed with a .bak extension first. If a .bak
+ # file currently exists, it's deleted.
+ def _commit(self):
+ # CAUTION: It's vital that _commit() succeed, and _commit() can
+ # be called from __del__(). Therefore we must never reference a
+ # global in this routine.
+ if self._index is None:
+ return # nothing to do
+
+ try:
+ self._os.unlink(self._bakfile)
+ except self._os.error:
+ pass
+
+ try:
+ self._os.rename(self._dirfile, self._bakfile)
+ except self._os.error:
+ pass
+
+ f = self._io.open(self._dirfile, 'w')
+ self._chmod(self._dirfile)
+ for key, pos_and_siz_pair in self._index.items():
+ f.write("%r, %r\n" % (key, pos_and_siz_pair))
+ f.close()
+
+ sync = _commit
+
+ def __getitem__(self, key):
+ key = key.decode("latin-1")
+ pos, siz = self._index[key] # may raise KeyError
+ f = _io.open(self._datfile, 'rb')
+ f.seek(pos)
+ dat = f.read(siz)
+ f.close()
+ return dat
+
+ # Append val to the data file, starting at a _BLOCKSIZE-aligned
+ # offset. The data file is first padded with NUL bytes (if needed)
+ # to get to an aligned offset. Return pair
+ # (starting offset of val, len(val))
+ def _addval(self, val):
+ f = _io.open(self._datfile, 'rb+')
+ f.seek(0, 2)
+ pos = int(f.tell())
+ npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
+ f.write(b'\0'*(npos-pos))
+ pos = npos
+ f.write(val)
+ f.close()
+ return (pos, len(val))
+
+ # Write val to the data file, starting at offset pos. The caller
+ # is responsible for ensuring that there's enough room starting at
+ # pos to hold val, without overwriting some other value. Return
+ # pair (pos, len(val)).
+ def _setval(self, pos, val):
+ f = _io.open(self._datfile, 'rb+')
+ f.seek(pos)
+ f.write(val)
+ f.close()
+ return (pos, len(val))
+
+ # key is a new key whose associated value starts in the data file
+ # at offset pos and with length siz. Add an index record to
+ # the in-memory index dict, and append one to the directory file.
+ def _addkey(self, key, pos_and_siz_pair):
+ self._index[key] = pos_and_siz_pair
+ f = _io.open(self._dirfile, 'a')
+ self._chmod(self._dirfile)
+ f.write("%r, %r\n" % (key, pos_and_siz_pair))
+ f.close()
+
+ def __setitem__(self, key, val):
+ if not isinstance(key, bytes):
+ raise TypeError("keys must be bytes")
+ key = key.decode("latin-1") # hashable bytes
+ if not isinstance(val, (bytes, bytearray)):
+ raise TypeError("values must be byte strings")
+ if key not in self._index:
+ self._addkey(key, self._addval(val))
+ else:
+ # See whether the new value is small enough to fit in the
+ # (padded) space currently occupied by the old value.
+ pos, siz = self._index[key]
+ oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
+ newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
+ if newblocks <= oldblocks:
+ self._index[key] = self._setval(pos, val)
+ else:
+ # The new value doesn't fit in the (padded) space used
+ # by the old value. The blocks used by the old value are
+ # forever lost.
+ self._index[key] = self._addval(val)
+
+ # Note that _index may be out of synch with the directory
+ # file now: _setval() and _addval() don't update the directory
+ # file. This also means that the on-disk directory and data
+ # files are in a mutually inconsistent state, and they'll
+ # remain that way until _commit() is called. Note that this
+ # is a disaster (for the database) if the program crashes
+ # (so that _commit() never gets called).
+
+ def __delitem__(self, key):
+ key = key.decode("latin-1")
+ # The blocks used by the associated value are lost.
+ del self._index[key]
+ # XXX It's unclear why we do a _commit() here (the code always
+ # XXX has, so I'm not changing it). _setitem__ doesn't try to
+ # XXX keep the directory file in synch. Why should we? Or
+ # XXX why shouldn't __setitem__?
+ self._commit()
+
+ def keys(self):
+ return [key.encode("latin-1") for key in self._index.keys()]
+
+ def items(self):
+ return [(key.encode("latin-1"), self[key.encode("latin-1")])
+ for key in self._index.keys()]
+
+ def __contains__(self, key):
+ key = key.decode("latin-1")
+ return key in self._index
+
+ def iterkeys(self):
+ return iter(self._index.keys())
+ __iter__ = iterkeys
+
+ def __len__(self):
+ return len(self._index)
+
+ def close(self):
+ self._commit()
+ self._index = self._datfile = self._dirfile = self._bakfile = None
+
+ __del__ = close
+
+ def _chmod (self, file):
+ if hasattr(self._os, 'chmod'):
+ self._os.chmod(file, self._mode)
+
+
+def open(file, flag=None, mode=0o666):
+ """Open the database file, filename, and return corresponding object.
+
+ The flag argument, used to control how the database is opened in the
+ other DBM implementations, is ignored in the dbm.dumb module; the
+ database is always opened for update, and will be created if it does
+ not exist.
+
+ The optional mode argument is the UNIX mode of the file, used only when
+ the database has to be created. It defaults to octal code 0o666 (and
+ will be modified by the prevailing umask).
+
+ """
+ # flag argument is currently ignored
+
+ # Modify mode depending on the umask
+ try:
+ um = _os.umask(0)
+ _os.umask(um)
+ except AttributeError:
+ pass
+ else:
+ # Turn off any bits that are set in the umask
+ mode = mode & (~um)
+
+ return _Database(file, mode)