summaryrefslogtreecommitdiffstats
path: root/Lib/dbm
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/dbm')
-rw-r--r--Lib/dbm/__init__.py189
-rw-r--r--Lib/dbm/dumb.py316
-rw-r--r--Lib/dbm/gnu.py3
-rw-r--r--Lib/dbm/ndbm.py3
4 files changed, 0 insertions, 511 deletions
diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py
deleted file mode 100644
index f65da52..0000000
--- a/Lib/dbm/__init__.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""Generic interface to all dbm clones.
-
-Use
-
- import dbm
- d = dbm.open(file, 'w', 0o666)
-
-The returned object is a dbm.gnu, dbm.ndbm or dbm.dumb object, dependent on the
-type of database being opened (determined by the whichdb function) in the case
-of an existing dbm. If the dbm does not exist and the create or new flag ('c'
-or 'n') was specified, the dbm type will be determined by the availability of
-the modules (tested in the above order).
-
-It has the following interface (key and data are strings):
-
- d[key] = data # store data at key (may override data at
- # existing key)
- data = d[key] # retrieve data at key (raise KeyError if no
- # such key)
- del d[key] # delete data stored at key (raises KeyError
- # if no such key)
- flag = key in d # true if the key exists
- list = d.keys() # return a list of all existing keys (slow!)
-
-Future versions may change the order in which implementations are
-tested for existence, and add interfaces to other dbm-like
-implementations.
-"""
-
-__all__ = ['open', 'whichdb', 'error']
-
-import io
-import os
-import struct
-import sys
-
-
-class error(Exception):
- pass
-
-_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.dumb']
-_defaultmod = None
-_modules = {}
-
-error = (error, OSError)
-
-try:
- from dbm import ndbm
-except ImportError:
- ndbm = None
-
-
-def open(file, flag='r', mode=0o666):
- """Open or create database at path given by *file*.
-
- Optional argument *flag* can be 'r' (default) for read-only access, 'w'
- for read-write access of an existing database, 'c' for read-write access
- to a new or existing database, and 'n' for read-write access to a new
- database.
-
- Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it
- only if it doesn't exist; and 'n' always creates a new database.
- """
- global _defaultmod
- if _defaultmod is None:
- for name in _names:
- try:
- mod = __import__(name, fromlist=['open'])
- except ImportError:
- continue
- if not _defaultmod:
- _defaultmod = mod
- _modules[name] = mod
- if not _defaultmod:
- raise ImportError("no dbm clone found; tried %s" % _names)
-
- # guess the type of an existing database, if not creating a new one
- result = whichdb(file) if 'n' not in flag else None
- if result is None:
- # db doesn't exist or 'n' flag was specified to create a new db
- if 'c' in flag or 'n' in flag:
- # file doesn't exist and the new flag was used so use default type
- mod = _defaultmod
- else:
- raise error[0]("db file doesn't exist; "
- "use 'c' or 'n' flag to create a new db")
- elif result == "":
- # db type cannot be determined
- raise error[0]("db type could not be determined")
- elif result not in _modules:
- raise error[0]("db type is {0}, but the module is not "
- "available".format(result))
- else:
- mod = _modules[result]
- return mod.open(file, flag, mode)
-
-
-def whichdb(filename):
- """Guess which db package to use to open a db file.
-
- Return values:
-
- - None if the database file can't be read;
- - empty string if the file can be read but can't be recognized
- - the name of the dbm submodule (e.g. "ndbm" or "gnu") if recognized.
-
- Importing the given module may still fail, and opening the
- database using that module may still fail.
- """
-
- # Check for ndbm first -- this has a .pag and a .dir file
- try:
- f = io.open(filename + ".pag", "rb")
- f.close()
- f = io.open(filename + ".dir", "rb")
- f.close()
- return "dbm.ndbm"
- except OSError:
- # some dbm emulations based on Berkeley DB generate a .db file
- # some do not, but they should be caught by the bsd checks
- try:
- f = io.open(filename + ".db", "rb")
- f.close()
- # guarantee we can actually open the file using dbm
- # kind of overkill, but since we are dealing with emulations
- # it seems like a prudent step
- if ndbm is not None:
- d = ndbm.open(filename)
- d.close()
- return "dbm.ndbm"
- except OSError:
- pass
-
- # Check for dumbdbm next -- this has a .dir and a .dat file
- try:
- # First check for presence of files
- os.stat(filename + ".dat")
- size = os.stat(filename + ".dir").st_size
- # dumbdbm files with no keys are empty
- if size == 0:
- return "dbm.dumb"
- f = io.open(filename + ".dir", "rb")
- try:
- if f.read(1) in (b"'", b'"'):
- return "dbm.dumb"
- finally:
- f.close()
- except OSError:
- pass
-
- # See if the file exists, return None if not
- try:
- f = io.open(filename, "rb")
- except OSError:
- return None
-
- with f:
- # Read the start of the file -- the magic number
- s16 = f.read(16)
- s = s16[0:4]
-
- # Return "" if not at least 4 bytes
- if len(s) != 4:
- return ""
-
- # Convert to 4-byte int in native byte order -- return "" if impossible
- try:
- (magic,) = struct.unpack("=l", s)
- except struct.error:
- return ""
-
- # Check for GNU dbm
- if magic in (0x13579ace, 0x13579acd, 0x13579acf):
- return "dbm.gnu"
-
- # Later versions of Berkeley db hash file have a 12-byte pad in
- # front of the file type
- try:
- (magic,) = struct.unpack("=l", s16[-4:])
- except struct.error:
- return ""
-
- # Unknown
- return ""
-
-
-if __name__ == "__main__":
- for filename in sys.argv[1:]:
- print(whichdb(filename) or "UNKNOWN", filename)
diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py
deleted file mode 100644
index 864ad37..0000000
--- a/Lib/dbm/dumb.py
+++ /dev/null
@@ -1,316 +0,0 @@
-"""A dumb and slow but simple dbm clone.
-
-For database spam, spam.dir contains the index (a text file),
-spam.bak *may* contain a backup of the index (also a text file),
-while spam.dat contains the data (a binary file).
-
-XXX TO DO:
-
-- seems to contain a bug when updating...
-
-- reclaim free space (currently, space once occupied by deleted or expanded
-items is never reused)
-
-- support concurrent access (currently, if two processes take turns making
-updates, they can mess up the index)
-
-- support efficient access to large databases (currently, the whole index
-is read when the database is opened, and some updates rewrite the whole index)
-
-- support opening for read-only (flag = 'm')
-
-"""
-
-import ast as _ast
-import io as _io
-import os as _os
-import collections.abc
-
-__all__ = ["error", "open"]
-
-_BLOCKSIZE = 512
-
-error = OSError
-
-class _Database(collections.abc.MutableMapping):
-
- # The on-disk directory and data files can remain in mutually
- # inconsistent states for an arbitrarily long time (see comments
- # at the end of __setitem__). This is only repaired when _commit()
- # gets called. One place _commit() gets called is from __del__(),
- # and if that occurs at program shutdown time, module globals may
- # already have gotten rebound to None. Since it's crucial that
- # _commit() finish successfully, we can't ignore shutdown races
- # here, and _commit() must not reference any globals.
- _os = _os # for _commit()
- _io = _io # for _commit()
-
- def __init__(self, filebasename, mode, flag='c'):
- self._mode = mode
- self._readonly = (flag == 'r')
-
- # The directory file is a text file. Each line looks like
- # "%r, (%d, %d)\n" % (key, pos, siz)
- # where key is the string key, pos is the offset into the dat
- # file of the associated value's first byte, and siz is the number
- # of bytes in the associated value.
- self._dirfile = filebasename + '.dir'
-
- # The data file is a binary file pointed into by the directory
- # file, and holds the values associated with keys. Each value
- # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
- # binary 8-bit string value.
- self._datfile = filebasename + '.dat'
- self._bakfile = filebasename + '.bak'
-
- # The index is an in-memory dict, mirroring the directory file.
- self._index = None # maps keys to (pos, siz) pairs
-
- # Handle the creation
- self._create(flag)
- self._update(flag)
-
- def _create(self, flag):
- if flag == 'n':
- for filename in (self._datfile, self._bakfile, self._dirfile):
- try:
- _os.remove(filename)
- except OSError:
- pass
- # Mod by Jack: create data file if needed
- try:
- f = _io.open(self._datfile, 'r', encoding="Latin-1")
- except OSError:
- if flag not in ('c', 'n'):
- raise
- with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
- self._chmod(self._datfile)
- else:
- f.close()
-
- # Read directory file into the in-memory index dict.
- def _update(self, flag):
- self._modified = False
- self._index = {}
- try:
- f = _io.open(self._dirfile, 'r', encoding="Latin-1")
- except OSError:
- if flag not in ('c', 'n'):
- raise
- self._modified = True
- else:
- with f:
- for line in f:
- line = line.rstrip()
- key, pos_and_siz_pair = _ast.literal_eval(line)
- key = key.encode('Latin-1')
- self._index[key] = pos_and_siz_pair
-
- # Write the index dict to the directory file. The original directory
- # file (if any) is renamed with a .bak extension first. If a .bak
- # file currently exists, it's deleted.
- def _commit(self):
- # CAUTION: It's vital that _commit() succeed, and _commit() can
- # be called from __del__(). Therefore we must never reference a
- # global in this routine.
- if self._index is None or not self._modified:
- return # nothing to do
-
- try:
- self._os.unlink(self._bakfile)
- except OSError:
- pass
-
- try:
- self._os.rename(self._dirfile, self._bakfile)
- except OSError:
- pass
-
- with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
- self._chmod(self._dirfile)
- for key, pos_and_siz_pair in self._index.items():
- # Use Latin-1 since it has no qualms with any value in any
- # position; UTF-8, though, does care sometimes.
- entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
- f.write(entry)
-
- sync = _commit
-
- def _verify_open(self):
- if self._index is None:
- raise error('DBM object has already been closed')
-
- def __getitem__(self, key):
- if isinstance(key, str):
- key = key.encode('utf-8')
- self._verify_open()
- pos, siz = self._index[key] # may raise KeyError
- with _io.open(self._datfile, 'rb') as f:
- f.seek(pos)
- dat = f.read(siz)
- return dat
-
- # Append val to the data file, starting at a _BLOCKSIZE-aligned
- # offset. The data file is first padded with NUL bytes (if needed)
- # to get to an aligned offset. Return pair
- # (starting offset of val, len(val))
- def _addval(self, val):
- with _io.open(self._datfile, 'rb+') as f:
- f.seek(0, 2)
- pos = int(f.tell())
- npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
- f.write(b'\0'*(npos-pos))
- pos = npos
- f.write(val)
- return (pos, len(val))
-
- # Write val to the data file, starting at offset pos. The caller
- # is responsible for ensuring that there's enough room starting at
- # pos to hold val, without overwriting some other value. Return
- # pair (pos, len(val)).
- def _setval(self, pos, val):
- with _io.open(self._datfile, 'rb+') as f:
- f.seek(pos)
- f.write(val)
- return (pos, len(val))
-
- # key is a new key whose associated value starts in the data file
- # at offset pos and with length siz. Add an index record to
- # the in-memory index dict, and append one to the directory file.
- def _addkey(self, key, pos_and_siz_pair):
- self._index[key] = pos_and_siz_pair
- with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
- self._chmod(self._dirfile)
- f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
-
- def __setitem__(self, key, val):
- if self._readonly:
- raise error('The database is opened for reading only')
- if isinstance(key, str):
- key = key.encode('utf-8')
- elif not isinstance(key, (bytes, bytearray)):
- raise TypeError("keys must be bytes or strings")
- if isinstance(val, str):
- val = val.encode('utf-8')
- elif not isinstance(val, (bytes, bytearray)):
- raise TypeError("values must be bytes or strings")
- self._verify_open()
- self._modified = True
- if key not in self._index:
- self._addkey(key, self._addval(val))
- else:
- # See whether the new value is small enough to fit in the
- # (padded) space currently occupied by the old value.
- pos, siz = self._index[key]
- oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
- newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
- if newblocks <= oldblocks:
- self._index[key] = self._setval(pos, val)
- else:
- # The new value doesn't fit in the (padded) space used
- # by the old value. The blocks used by the old value are
- # forever lost.
- self._index[key] = self._addval(val)
-
- # Note that _index may be out of synch with the directory
- # file now: _setval() and _addval() don't update the directory
- # file. This also means that the on-disk directory and data
- # files are in a mutually inconsistent state, and they'll
- # remain that way until _commit() is called. Note that this
- # is a disaster (for the database) if the program crashes
- # (so that _commit() never gets called).
-
- def __delitem__(self, key):
- if self._readonly:
- raise error('The database is opened for reading only')
- if isinstance(key, str):
- key = key.encode('utf-8')
- self._verify_open()
- self._modified = True
- # The blocks used by the associated value are lost.
- del self._index[key]
- # XXX It's unclear why we do a _commit() here (the code always
- # XXX has, so I'm not changing it). __setitem__ doesn't try to
- # XXX keep the directory file in synch. Why should we? Or
- # XXX why shouldn't __setitem__?
- self._commit()
-
- def keys(self):
- try:
- return list(self._index)
- except TypeError:
- raise error('DBM object has already been closed') from None
-
- def items(self):
- self._verify_open()
- return [(key, self[key]) for key in self._index.keys()]
-
- def __contains__(self, key):
- if isinstance(key, str):
- key = key.encode('utf-8')
- try:
- return key in self._index
- except TypeError:
- if self._index is None:
- raise error('DBM object has already been closed') from None
- else:
- raise
-
- def iterkeys(self):
- try:
- return iter(self._index)
- except TypeError:
- raise error('DBM object has already been closed') from None
- __iter__ = iterkeys
-
- def __len__(self):
- try:
- return len(self._index)
- except TypeError:
- raise error('DBM object has already been closed') from None
-
- def close(self):
- try:
- self._commit()
- finally:
- self._index = self._datfile = self._dirfile = self._bakfile = None
-
- __del__ = close
-
- def _chmod(self, file):
- self._os.chmod(file, self._mode)
-
- def __enter__(self):
- return self
-
- def __exit__(self, *args):
- self.close()
-
-
-def open(file, flag='c', mode=0o666):
- """Open the database file, filename, and return corresponding object.
-
- The flag argument, used to control how the database is opened in the
- other DBM implementations, supports only the semantics of 'c' and 'n'
- values. Other values will default to the semantics of 'c' value:
- the database will always opened for update and will be created if it
- does not exist.
-
- The optional mode argument is the UNIX mode of the file, used only when
- the database has to be created. It defaults to octal code 0o666 (and
- will be modified by the prevailing umask).
-
- """
-
- # Modify mode depending on the umask
- try:
- um = _os.umask(0)
- _os.umask(um)
- except AttributeError:
- pass
- else:
- # Turn off any bits that are set in the umask
- mode = mode & (~um)
- if flag not in ('r', 'w', 'c', 'n'):
- raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'")
- return _Database(file, mode, flag=flag)
diff --git a/Lib/dbm/gnu.py b/Lib/dbm/gnu.py
deleted file mode 100644
index b07a1de..0000000
--- a/Lib/dbm/gnu.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Provide the _gdbm module as a dbm submodule."""
-
-from _gdbm import *
diff --git a/Lib/dbm/ndbm.py b/Lib/dbm/ndbm.py
deleted file mode 100644
index 23056a2..0000000
--- a/Lib/dbm/ndbm.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Provide the _dbm module as a dbm submodule."""
-
-from _dbm import *