From 7317c1ef7aaba7deda66b7fa917d09e68d83635b Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Tue, 25 Nov 2008 19:19:17 +0000 Subject: dbm.gnu and dbm.ndbm accept both strings and bytes as keys and values. For the former they are converted to bytes before being written to the DB. Closes issue 3799. Reviewed by Skip Montanaro. --- Doc/library/dbm.rst | 39 ++++++++++++++++++++++++--------------- Lib/test/test_dbm_dumb.py | 2 ++ Lib/test/test_dbm_gnu.py | 2 ++ Lib/test/test_dbm_ndbm.py | 2 ++ Misc/NEWS | 6 ++++++ Modules/_dbmmodule.c | 4 ++-- Modules/_gdbmmodule.c | 4 ++-- 7 files changed, 40 insertions(+), 19 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index ed05921..84edbbe 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -52,7 +52,9 @@ The object returned by :func:`open` supports most of the same functionality as dictionaries; keys and their corresponding values can be stored, retrieved, and deleted, and the :keyword:`in` operator and the :meth:`keys` method are -available. Keys and values must always be strings. +available. Key and values are always stored as bytes. This means that when +strings are used they are implicitly converted to the default encoding before +being stored. The following example records some hostnames and a corresponding title, and then prints out the contents of the database:: @@ -63,9 +65,15 @@ then prints out the contents of the database:: db = dbm.open('cache', 'c') # Record some values + db[b'hello'] = b'there' db['www.python.org'] = 'Python Website' db['www.cnn.com'] = 'Cable News Network' + # Note that the keys are considered bytes now. + assert db[b'www.python.org'] == b'Python Website' + # Notice how the value is now in bytes. + assert db['www.cnn.com'] == b'Cable News Network' + # Loop through contents. Other dictionary methods # such as .keys(), .values() also work. for k, v in db.iteritems(): @@ -98,17 +106,18 @@ The individual submodules are described in the following sections. This module is quite similar to the :mod:`dbm` module, but uses the GNU library ``gdbm`` instead to provide some additional functionality. Please note that the -file formats created by ``gdbm`` and ``dbm`` are incompatible. +file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible. The :mod:`dbm.gnu` module provides an interface to the GNU DBM library. -``gdbm`` objects behave like mappings (dictionaries), except that keys and -values are always strings. Printing a :mod:`dbm.gnu` object doesn't print the +``dbm.gnu.gdbm`` objects behave like mappings (dictionaries), except that keys and +values are always converted to bytes before storing. Printing a ``gdbm`` +object doesn't print the keys and values, and the :meth:`items` and :meth:`values` methods are not supported. .. exception:: error - Raised on ``gdbm``\ -specific errors, such as I/O errors. :exc:`KeyError` is + Raised on :mod:`dbm.gnu`-specific errors, such as I/O errors. :exc:`KeyError` is raised for general mapping errors like specifying an incorrect key. @@ -183,7 +192,7 @@ supported. If you have carried out a lot of deletions and would like to shrink the space used by the ``gdbm`` file, this routine will reorganize the database. ``gdbm`` - will not shorten the length of a database file except by using this + objects will not shorten the length of a database file except by using this reorganization; otherwise, deleted file space will be kept and reused as new (key, value) pairs are added. @@ -203,8 +212,8 @@ supported. The :mod:`dbm.ndbm` module provides an interface to the Unix "(n)dbm" library. Dbm objects behave like mappings (dictionaries), except that keys and values are -always strings. Printing a dbm object doesn't print the keys and values, and the -:meth:`items` and :meth:`values` methods are not supported. +always stored as bytes. Printing a ``dbm`` object doesn't print the keys and +values, and the :meth:`items` and :meth:`values` methods are not supported. This module can be used with the "classic" ndbm interface, the BSD DB compatibility interface, or the GNU GDBM compatibility interface. On Unix, the @@ -213,7 +222,7 @@ to simplify building this module. .. exception:: error - Raised on dbm-specific errors, such as I/O errors. :exc:`KeyError` is raised + Raised on :mod:`dbm.ndbm`-specific errors, such as I/O errors. :exc:`KeyError` is raised for general mapping errors like specifying an incorrect key. @@ -224,7 +233,7 @@ to simplify building this module. .. function:: open(filename[, flag[, mode]]) - Open a dbm database and return a dbm object. The *filename* argument is the + Open a dbm database and return a ``dbm`` object. The *filename* argument is the name of the database file (without the :file:`.dir` or :file:`.pag` extensions; note that the BSD DB implementation of the interface will append the extension :file:`.db` and only create one file). @@ -264,27 +273,27 @@ to simplify building this module. .. note:: The :mod:`dbm.dumb` module is intended as a last resort fallback for the - :mod:`dbm` module when no more robust module is available. The :mod:`dbm.dumb` + :mod:`dbm` module when a more robust module is not available. The :mod:`dbm.dumb` module is not written for speed and is not nearly as heavily used as the other database modules. The :mod:`dbm.dumb` module provides a persistent dictionary-like interface which -is written entirely in Python. Unlike other modules such as :mod:`gdbm` no +is written entirely in Python. Unlike other modules such as :mod:`dbm.gnu` no external library is required. As with other persistent mappings, the keys and -values must always be strings. +values are always stored as bytes. The module defines the following: .. exception:: error - Raised on dbm.dumb-specific errors, such as I/O errors. :exc:`KeyError` is + Raised on :mod:`dbm.dumb`-specific errors, such as I/O errors. :exc:`KeyError` is raised for general mapping errors like specifying an incorrect key. .. function:: open(filename[, flag[, mode]]) - Open a dumbdbm database and return a dumbdbm object. The *filename* argument is + Open a ``dumbdbm`` database and return a dumbdbm object. The *filename* argument is the basename of the database file (without any specific extensions). When a dumbdbm database is created, files with :file:`.dat` and :file:`.dir` extensions are created. diff --git a/Lib/test/test_dbm_dumb.py b/Lib/test/test_dbm_dumb.py index 940991d..e2964aa 100644 --- a/Lib/test/test_dbm_dumb.py +++ b/Lib/test/test_dbm_dumb.py @@ -115,11 +115,13 @@ class DumbDBMTestCase(unittest.TestCase): self.init_db() f = dumbdbm.open(_fname) f['\u00fc'] = b'!' + f['1'] = 'a' f.close() f = dumbdbm.open(_fname, 'r') self.assert_('\u00fc' in f) self.assertEqual(f['\u00fc'.encode('utf-8')], self._dict['\u00fc'.encode('utf-8')]) + self.assertEqual(f[b'1'], b'a') def test_line_endings(self): # test for bug #1172763: dumbdbm would die if the line endings diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index eddb970..0049aaa 100755 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -20,9 +20,11 @@ class TestGdbm(unittest.TestCase): self.assertEqual(self.g.keys(), []) self.g['a'] = 'b' self.g['12345678910'] = '019237410982340912840198242' + self.g[b'bytes'] = b'data' key_set = set(self.g.keys()) self.assertEqual(key_set, set([b'a', b'12345678910'])) self.assert_(b'a' in self.g) + self.assertEqual(self.g[b'bytes'], b'data') key = self.g.firstkey() while key: self.assert_(key in key_set) diff --git a/Lib/test/test_dbm_ndbm.py b/Lib/test/test_dbm_ndbm.py index 74d3238..6d0a36d 100755 --- a/Lib/test/test_dbm_ndbm.py +++ b/Lib/test/test_dbm_ndbm.py @@ -20,9 +20,11 @@ class DbmTestCase(unittest.TestCase): self.d = dbm.ndbm.open(self.filename, 'c') self.assert_(self.d.keys() == []) self.d['a'] = 'b' + self.d[b'bytes'] = b'data' self.d['12345678910'] = '019237410982340912840198242' self.d.keys() self.assert_(b'a' in self.d) + self.assertEqual(self.d[b'bytes'], b'data') self.d.close() def test_modes(self): diff --git a/Misc/NEWS b/Misc/NEWS index afc3982..bc127e7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -28,6 +28,12 @@ Library - Issue #4383: When IDLE cannot make the connection to its subprocess, it would fail to properly display the error message. +Docs +---- + +- Issue #3799: Document that dbm.gnu and dbm.ndbm will accept string arguments + for keys and values which will be converted to bytes before committal. + What's New in Python 3.0 release candidate 3? ============================================= diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index d43acdc..1aef3d9 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -122,7 +122,7 @@ dbm_ass_sub(dbmobject *dp, PyObject *v, PyObject *w) if ( !PyArg_Parse(v, "s#", &krec.dptr, &tmp_size) ) { PyErr_SetString(PyExc_TypeError, - "dbm mappings have string keys only"); + "dbm mappings have bytes or string keys only"); return -1; } krec.dsize = tmp_size; @@ -140,7 +140,7 @@ dbm_ass_sub(dbmobject *dp, PyObject *v, PyObject *w) } else { if ( !PyArg_Parse(w, "s#", &drec.dptr, &tmp_size) ) { PyErr_SetString(PyExc_TypeError, - "dbm mappings have byte string elements only"); + "dbm mappings have byte or string elements only"); return -1; } drec.dsize = tmp_size; diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index 590ef21..c6817fe 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -142,7 +142,7 @@ dbm_ass_sub(dbmobject *dp, PyObject *v, PyObject *w) if (!PyArg_Parse(v, "s#", &krec.dptr, &krec.dsize) ) { PyErr_SetString(PyExc_TypeError, - "gdbm mappings have string indices only"); + "gdbm mappings have bytes or string indices only"); return -1; } if (dp->di_dbm == NULL) { @@ -160,7 +160,7 @@ dbm_ass_sub(dbmobject *dp, PyObject *v, PyObject *w) else { if (!PyArg_Parse(w, "s#", &drec.dptr, &drec.dsize)) { PyErr_SetString(PyExc_TypeError, - "gdbm mappings have byte string elements only"); + "gdbm mappings have byte or string elements only"); return -1; } errno = 0; -- cgit v0.12