diff options
Diffstat (limited to 'Lib/dbm')
-rw-r--r-- | Lib/dbm/__init__.py | 189 | ||||
-rw-r--r-- | Lib/dbm/dumb.py | 316 | ||||
-rw-r--r-- | Lib/dbm/gnu.py | 3 | ||||
-rw-r--r-- | Lib/dbm/ndbm.py | 3 |
4 files changed, 0 insertions, 511 deletions
diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py deleted file mode 100644 index f65da52..0000000 --- a/Lib/dbm/__init__.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Generic interface to all dbm clones. - -Use - - import dbm - d = dbm.open(file, 'w', 0o666) - -The returned object is a dbm.gnu, dbm.ndbm or dbm.dumb object, dependent on the -type of database being opened (determined by the whichdb function) in the case -of an existing dbm. If the dbm does not exist and the create or new flag ('c' -or 'n') was specified, the dbm type will be determined by the availability of -the modules (tested in the above order). - -It has the following interface (key and data are strings): - - d[key] = data # store data at key (may override data at - # existing key) - data = d[key] # retrieve data at key (raise KeyError if no - # such key) - del d[key] # delete data stored at key (raises KeyError - # if no such key) - flag = key in d # true if the key exists - list = d.keys() # return a list of all existing keys (slow!) - -Future versions may change the order in which implementations are -tested for existence, and add interfaces to other dbm-like -implementations. -""" - -__all__ = ['open', 'whichdb', 'error'] - -import io -import os -import struct -import sys - - -class error(Exception): - pass - -_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] -_defaultmod = None -_modules = {} - -error = (error, OSError) - -try: - from dbm import ndbm -except ImportError: - ndbm = None - - -def open(file, flag='r', mode=0o666): - """Open or create database at path given by *file*. - - Optional argument *flag* can be 'r' (default) for read-only access, 'w' - for read-write access of an existing database, 'c' for read-write access - to a new or existing database, and 'n' for read-write access to a new - database. - - Note: 'r' and 'w' fail if the database doesn't exist; 'c' creates it - only if it doesn't exist; and 'n' always creates a new database. - """ - global _defaultmod - if _defaultmod is None: - for name in _names: - try: - mod = __import__(name, fromlist=['open']) - except ImportError: - continue - if not _defaultmod: - _defaultmod = mod - _modules[name] = mod - if not _defaultmod: - raise ImportError("no dbm clone found; tried %s" % _names) - - # guess the type of an existing database, if not creating a new one - result = whichdb(file) if 'n' not in flag else None - if result is None: - # db doesn't exist or 'n' flag was specified to create a new db - if 'c' in flag or 'n' in flag: - # file doesn't exist and the new flag was used so use default type - mod = _defaultmod - else: - raise error[0]("db file doesn't exist; " - "use 'c' or 'n' flag to create a new db") - elif result == "": - # db type cannot be determined - raise error[0]("db type could not be determined") - elif result not in _modules: - raise error[0]("db type is {0}, but the module is not " - "available".format(result)) - else: - mod = _modules[result] - return mod.open(file, flag, mode) - - -def whichdb(filename): - """Guess which db package to use to open a db file. - - Return values: - - - None if the database file can't be read; - - empty string if the file can be read but can't be recognized - - the name of the dbm submodule (e.g. "ndbm" or "gnu") if recognized. - - Importing the given module may still fail, and opening the - database using that module may still fail. - """ - - # Check for ndbm first -- this has a .pag and a .dir file - try: - f = io.open(filename + ".pag", "rb") - f.close() - f = io.open(filename + ".dir", "rb") - f.close() - return "dbm.ndbm" - except OSError: - # some dbm emulations based on Berkeley DB generate a .db file - # some do not, but they should be caught by the bsd checks - try: - f = io.open(filename + ".db", "rb") - f.close() - # guarantee we can actually open the file using dbm - # kind of overkill, but since we are dealing with emulations - # it seems like a prudent step - if ndbm is not None: - d = ndbm.open(filename) - d.close() - return "dbm.ndbm" - except OSError: - pass - - # Check for dumbdbm next -- this has a .dir and a .dat file - try: - # First check for presence of files - os.stat(filename + ".dat") - size = os.stat(filename + ".dir").st_size - # dumbdbm files with no keys are empty - if size == 0: - return "dbm.dumb" - f = io.open(filename + ".dir", "rb") - try: - if f.read(1) in (b"'", b'"'): - return "dbm.dumb" - finally: - f.close() - except OSError: - pass - - # See if the file exists, return None if not - try: - f = io.open(filename, "rb") - except OSError: - return None - - with f: - # Read the start of the file -- the magic number - s16 = f.read(16) - s = s16[0:4] - - # Return "" if not at least 4 bytes - if len(s) != 4: - return "" - - # Convert to 4-byte int in native byte order -- return "" if impossible - try: - (magic,) = struct.unpack("=l", s) - except struct.error: - return "" - - # Check for GNU dbm - if magic in (0x13579ace, 0x13579acd, 0x13579acf): - return "dbm.gnu" - - # Later versions of Berkeley db hash file have a 12-byte pad in - # front of the file type - try: - (magic,) = struct.unpack("=l", s16[-4:]) - except struct.error: - return "" - - # Unknown - return "" - - -if __name__ == "__main__": - for filename in sys.argv[1:]: - print(whichdb(filename) or "UNKNOWN", filename) diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py deleted file mode 100644 index 864ad37..0000000 --- a/Lib/dbm/dumb.py +++ /dev/null @@ -1,316 +0,0 @@ -"""A dumb and slow but simple dbm clone. - -For database spam, spam.dir contains the index (a text file), -spam.bak *may* contain a backup of the index (also a text file), -while spam.dat contains the data (a binary file). - -XXX TO DO: - -- seems to contain a bug when updating... - -- reclaim free space (currently, space once occupied by deleted or expanded -items is never reused) - -- support concurrent access (currently, if two processes take turns making -updates, they can mess up the index) - -- support efficient access to large databases (currently, the whole index -is read when the database is opened, and some updates rewrite the whole index) - -- support opening for read-only (flag = 'm') - -""" - -import ast as _ast -import io as _io -import os as _os -import collections.abc - -__all__ = ["error", "open"] - -_BLOCKSIZE = 512 - -error = OSError - -class _Database(collections.abc.MutableMapping): - - # The on-disk directory and data files can remain in mutually - # inconsistent states for an arbitrarily long time (see comments - # at the end of __setitem__). This is only repaired when _commit() - # gets called. One place _commit() gets called is from __del__(), - # and if that occurs at program shutdown time, module globals may - # already have gotten rebound to None. Since it's crucial that - # _commit() finish successfully, we can't ignore shutdown races - # here, and _commit() must not reference any globals. - _os = _os # for _commit() - _io = _io # for _commit() - - def __init__(self, filebasename, mode, flag='c'): - self._mode = mode - self._readonly = (flag == 'r') - - # The directory file is a text file. Each line looks like - # "%r, (%d, %d)\n" % (key, pos, siz) - # where key is the string key, pos is the offset into the dat - # file of the associated value's first byte, and siz is the number - # of bytes in the associated value. - self._dirfile = filebasename + '.dir' - - # The data file is a binary file pointed into by the directory - # file, and holds the values associated with keys. Each value - # begins at a _BLOCKSIZE-aligned byte offset, and is a raw - # binary 8-bit string value. - self._datfile = filebasename + '.dat' - self._bakfile = filebasename + '.bak' - - # The index is an in-memory dict, mirroring the directory file. - self._index = None # maps keys to (pos, siz) pairs - - # Handle the creation - self._create(flag) - self._update(flag) - - def _create(self, flag): - if flag == 'n': - for filename in (self._datfile, self._bakfile, self._dirfile): - try: - _os.remove(filename) - except OSError: - pass - # Mod by Jack: create data file if needed - try: - f = _io.open(self._datfile, 'r', encoding="Latin-1") - except OSError: - if flag not in ('c', 'n'): - raise - with _io.open(self._datfile, 'w', encoding="Latin-1") as f: - self._chmod(self._datfile) - else: - f.close() - - # Read directory file into the in-memory index dict. - def _update(self, flag): - self._modified = False - self._index = {} - try: - f = _io.open(self._dirfile, 'r', encoding="Latin-1") - except OSError: - if flag not in ('c', 'n'): - raise - self._modified = True - else: - with f: - for line in f: - line = line.rstrip() - key, pos_and_siz_pair = _ast.literal_eval(line) - key = key.encode('Latin-1') - self._index[key] = pos_and_siz_pair - - # Write the index dict to the directory file. The original directory - # file (if any) is renamed with a .bak extension first. If a .bak - # file currently exists, it's deleted. - def _commit(self): - # CAUTION: It's vital that _commit() succeed, and _commit() can - # be called from __del__(). Therefore we must never reference a - # global in this routine. - if self._index is None or not self._modified: - return # nothing to do - - try: - self._os.unlink(self._bakfile) - except OSError: - pass - - try: - self._os.rename(self._dirfile, self._bakfile) - except OSError: - pass - - with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f: - self._chmod(self._dirfile) - for key, pos_and_siz_pair in self._index.items(): - # Use Latin-1 since it has no qualms with any value in any - # position; UTF-8, though, does care sometimes. - entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) - f.write(entry) - - sync = _commit - - def _verify_open(self): - if self._index is None: - raise error('DBM object has already been closed') - - def __getitem__(self, key): - if isinstance(key, str): - key = key.encode('utf-8') - self._verify_open() - pos, siz = self._index[key] # may raise KeyError - with _io.open(self._datfile, 'rb') as f: - f.seek(pos) - dat = f.read(siz) - return dat - - # Append val to the data file, starting at a _BLOCKSIZE-aligned - # offset. The data file is first padded with NUL bytes (if needed) - # to get to an aligned offset. Return pair - # (starting offset of val, len(val)) - def _addval(self, val): - with _io.open(self._datfile, 'rb+') as f: - f.seek(0, 2) - pos = int(f.tell()) - npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE - f.write(b'\0'*(npos-pos)) - pos = npos - f.write(val) - return (pos, len(val)) - - # Write val to the data file, starting at offset pos. The caller - # is responsible for ensuring that there's enough room starting at - # pos to hold val, without overwriting some other value. Return - # pair (pos, len(val)). - def _setval(self, pos, val): - with _io.open(self._datfile, 'rb+') as f: - f.seek(pos) - f.write(val) - return (pos, len(val)) - - # key is a new key whose associated value starts in the data file - # at offset pos and with length siz. Add an index record to - # the in-memory index dict, and append one to the directory file. - def _addkey(self, key, pos_and_siz_pair): - self._index[key] = pos_and_siz_pair - with _io.open(self._dirfile, 'a', encoding="Latin-1") as f: - self._chmod(self._dirfile) - f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair)) - - def __setitem__(self, key, val): - if self._readonly: - raise error('The database is opened for reading only') - if isinstance(key, str): - key = key.encode('utf-8') - elif not isinstance(key, (bytes, bytearray)): - raise TypeError("keys must be bytes or strings") - if isinstance(val, str): - val = val.encode('utf-8') - elif not isinstance(val, (bytes, bytearray)): - raise TypeError("values must be bytes or strings") - self._verify_open() - self._modified = True - if key not in self._index: - self._addkey(key, self._addval(val)) - else: - # See whether the new value is small enough to fit in the - # (padded) space currently occupied by the old value. - pos, siz = self._index[key] - oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE - newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE - if newblocks <= oldblocks: - self._index[key] = self._setval(pos, val) - else: - # The new value doesn't fit in the (padded) space used - # by the old value. The blocks used by the old value are - # forever lost. - self._index[key] = self._addval(val) - - # Note that _index may be out of synch with the directory - # file now: _setval() and _addval() don't update the directory - # file. This also means that the on-disk directory and data - # files are in a mutually inconsistent state, and they'll - # remain that way until _commit() is called. Note that this - # is a disaster (for the database) if the program crashes - # (so that _commit() never gets called). - - def __delitem__(self, key): - if self._readonly: - raise error('The database is opened for reading only') - if isinstance(key, str): - key = key.encode('utf-8') - self._verify_open() - self._modified = True - # The blocks used by the associated value are lost. - del self._index[key] - # XXX It's unclear why we do a _commit() here (the code always - # XXX has, so I'm not changing it). __setitem__ doesn't try to - # XXX keep the directory file in synch. Why should we? Or - # XXX why shouldn't __setitem__? - self._commit() - - def keys(self): - try: - return list(self._index) - except TypeError: - raise error('DBM object has already been closed') from None - - def items(self): - self._verify_open() - return [(key, self[key]) for key in self._index.keys()] - - def __contains__(self, key): - if isinstance(key, str): - key = key.encode('utf-8') - try: - return key in self._index - except TypeError: - if self._index is None: - raise error('DBM object has already been closed') from None - else: - raise - - def iterkeys(self): - try: - return iter(self._index) - except TypeError: - raise error('DBM object has already been closed') from None - __iter__ = iterkeys - - def __len__(self): - try: - return len(self._index) - except TypeError: - raise error('DBM object has already been closed') from None - - def close(self): - try: - self._commit() - finally: - self._index = self._datfile = self._dirfile = self._bakfile = None - - __del__ = close - - def _chmod(self, file): - self._os.chmod(file, self._mode) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - -def open(file, flag='c', mode=0o666): - """Open the database file, filename, and return corresponding object. - - The flag argument, used to control how the database is opened in the - other DBM implementations, supports only the semantics of 'c' and 'n' - values. Other values will default to the semantics of 'c' value: - the database will always opened for update and will be created if it - does not exist. - - The optional mode argument is the UNIX mode of the file, used only when - the database has to be created. It defaults to octal code 0o666 (and - will be modified by the prevailing umask). - - """ - - # Modify mode depending on the umask - try: - um = _os.umask(0) - _os.umask(um) - except AttributeError: - pass - else: - # Turn off any bits that are set in the umask - mode = mode & (~um) - if flag not in ('r', 'w', 'c', 'n'): - raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'") - return _Database(file, mode, flag=flag) diff --git a/Lib/dbm/gnu.py b/Lib/dbm/gnu.py deleted file mode 100644 index b07a1de..0000000 --- a/Lib/dbm/gnu.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Provide the _gdbm module as a dbm submodule.""" - -from _gdbm import * diff --git a/Lib/dbm/ndbm.py b/Lib/dbm/ndbm.py deleted file mode 100644 index 23056a2..0000000 --- a/Lib/dbm/ndbm.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Provide the _dbm module as a dbm submodule.""" - -from _dbm import * |