From 707137b8637feef37b2e06a851fdca9d1b945861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henry-Joseph=20Aud=C3=A9oud?= Date: Fri, 10 Sep 2021 14:26:16 +0200 Subject: bpo-40563: Support pathlike objects on dbm/shelve (GH-21849) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Hakan Çelik --- Doc/library/dbm.rst | 14 ++++++ Doc/library/shelve.rst | 3 ++ Lib/dbm/__init__.py | 13 ++--- Lib/dbm/dumb.py | 7 +-- Lib/test/test_dbm.py | 57 ++++++++++++++-------- Lib/test/test_dbm_dumb.py | 9 ++++ Lib/test/test_dbm_gnu.py | 11 ++++- Lib/test/test_dbm_ndbm.py | 9 ++++ Lib/test/test_shelve.py | 32 ++++++------ .../2020-05-21-01-42-32.bpo-40563.fDn5bP.rst | 1 + Modules/_dbmmodule.c | 9 ++-- Modules/_gdbmmodule.c | 9 ++-- Modules/clinic/_dbmmodule.c.h | 9 +--- Modules/clinic/_gdbmmodule.c.h | 9 +--- 14 files changed, 124 insertions(+), 68 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-05-21-01-42-32.bpo-40563.fDn5bP.rst diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index ff01ae9..2be4993 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -33,6 +33,8 @@ the Oracle Berkeley DB. file's format can't be guessed; or a string containing the required module name, such as ``'dbm.ndbm'`` or ``'dbm.gnu'``. +.. versionchanged:: 3.11 + Accepts :term:`path-like object` for filename. .. function:: open(file, flag='r', mode=0o666) @@ -77,6 +79,9 @@ available, as well as :meth:`get` and :meth:`setdefault`. Deleting a key from a read-only database raises database module specific error instead of :exc:`KeyError`. +.. versionchanged:: 3.11 + Accepts :term:`path-like object` for file. + Key and values are always stored as bytes. This means that when strings are used they are implicitly converted to the default encoding before being stored. @@ -202,6 +207,9 @@ supported. In addition to the dictionary-like methods, ``gdbm`` objects have the following methods: + .. versionchanged:: 3.11 + Accepts :term:`path-like object` for filename. + .. method:: gdbm.firstkey() It's possible to loop over every key in the database using this method and the @@ -298,6 +306,9 @@ to locate the appropriate header file to simplify building this module. In addition to the dictionary-like methods, ``ndbm`` objects provide the following method: + .. versionchanged:: 3.11 + Accepts :term:`path-like object` for filename. + .. method:: ndbm.close() Close the ``ndbm`` database. @@ -379,6 +390,9 @@ The module defines the following: flags ``'r'`` and ``'w'`` no longer creates a database if it does not exist. + .. versionchanged:: 3.11 + Accepts :term:`path-like object` for filename. + In addition to the methods provided by the :class:`collections.abc.MutableMapping` class, :class:`dumbdbm` objects provide the following methods: diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index 684f239..a50fc6f 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -45,6 +45,9 @@ lots of shared sub-objects. The keys are ordinary strings. :data:`pickle.DEFAULT_PROTOCOL` is now used as the default pickle protocol. + .. versionchanged:: 3.11 + Accepts :term:`path-like object` for filename. + .. note:: Do not rely on the shelf being closed automatically; always call diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py index f65da52..8055d37 100644 --- a/Lib/dbm/__init__.py +++ b/Lib/dbm/__init__.py @@ -109,17 +109,18 @@ def whichdb(filename): """ # Check for ndbm first -- this has a .pag and a .dir file + filename = os.fsencode(filename) try: - f = io.open(filename + ".pag", "rb") + f = io.open(filename + b".pag", "rb") f.close() - f = io.open(filename + ".dir", "rb") + f = io.open(filename + b".dir", "rb") f.close() return "dbm.ndbm" except OSError: # some dbm emulations based on Berkeley DB generate a .db file # some do not, but they should be caught by the bsd checks try: - f = io.open(filename + ".db", "rb") + f = io.open(filename + b".db", "rb") f.close() # guarantee we can actually open the file using dbm # kind of overkill, but since we are dealing with emulations @@ -134,12 +135,12 @@ def whichdb(filename): # Check for dumbdbm next -- this has a .dir and a .dat file try: # First check for presence of files - os.stat(filename + ".dat") - size = os.stat(filename + ".dir").st_size + os.stat(filename + b".dat") + size = os.stat(filename + b".dir").st_size # dumbdbm files with no keys are empty if size == 0: return "dbm.dumb" - f = io.open(filename + ".dir", "rb") + f = io.open(filename + b".dir", "rb") try: if f.read(1) in (b"'", b'"'): return "dbm.dumb" diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index 864ad37..754624c 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -46,6 +46,7 @@ class _Database(collections.abc.MutableMapping): _io = _io # for _commit() def __init__(self, filebasename, mode, flag='c'): + filebasename = self._os.fsencode(filebasename) self._mode = mode self._readonly = (flag == 'r') @@ -54,14 +55,14 @@ class _Database(collections.abc.MutableMapping): # where key is the string key, pos is the offset into the dat # file of the associated value's first byte, and siz is the number # of bytes in the associated value. - self._dirfile = filebasename + '.dir' + self._dirfile = filebasename + b'.dir' # The data file is a binary file pointed into by the directory # file, and holds the values associated with keys. Each value # begins at a _BLOCKSIZE-aligned byte offset, and is a raw # binary 8-bit string value. - self._datfile = filebasename + '.dat' - self._bakfile = filebasename + '.bak' + self._datfile = filebasename + b'.dat' + self._bakfile = filebasename + b'.bak' # The index is an in-memory dict, mirroring the directory file. self._index = None # maps keys to (pos, siz) pairs diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index e02d1e1..0404e06 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -2,6 +2,7 @@ import unittest import glob +import os from test.support import import_helper from test.support import os_helper @@ -129,6 +130,15 @@ class AnyDBMTestCase: assert(f[key] == b"Python:") f.close() + def test_open_with_bytes(self): + dbm.open(os.fsencode(_fname), "c").close() + + def test_open_with_pathlib_path(self): + dbm.open(os_helper.FakePath(_fname), "c").close() + + def test_open_with_pathlib_path_bytes(self): + dbm.open(os_helper.FakePath(os.fsencode(_fname)), "c").close() + def read_helper(self, f): keys = self.keys_helper(f) for key in self._dict: @@ -144,26 +154,29 @@ class AnyDBMTestCase: class WhichDBTestCase(unittest.TestCase): def test_whichdb(self): - for module in dbm_iterator(): - # Check whether whichdb correctly guesses module name - # for databases opened with "module" module. - # Try with empty files first - name = module.__name__ - if name == 'dbm.dumb': - continue # whichdb can't support dbm.dumb - delete_files() - f = module.open(_fname, 'c') - f.close() - self.assertEqual(name, self.dbm.whichdb(_fname)) - # Now add a key - f = module.open(_fname, 'w') - f[b"1"] = b"1" - # and test that we can find it - self.assertIn(b"1", f) - # and read it - self.assertEqual(f[b"1"], b"1") - f.close() - self.assertEqual(name, self.dbm.whichdb(_fname)) + _bytes_fname = os.fsencode(_fname) + for path in [_fname, os_helper.FakePath(_fname), + _bytes_fname, os_helper.FakePath(_bytes_fname)]: + for module in dbm_iterator(): + # Check whether whichdb correctly guesses module name + # for databases opened with "module" module. + # Try with empty files first + name = module.__name__ + if name == 'dbm.dumb': + continue # whichdb can't support dbm.dumb + delete_files() + f = module.open(path, 'c') + f.close() + self.assertEqual(name, self.dbm.whichdb(path)) + # Now add a key + f = module.open(path, 'w') + f[b"1"] = b"1" + # and test that we can find it + self.assertIn(b"1", f) + # and read it + self.assertEqual(f[b"1"], b"1") + f.close() + self.assertEqual(name, self.dbm.whichdb(path)) @unittest.skipUnless(ndbm, reason='Test requires ndbm') def test_whichdb_ndbm(self): @@ -171,7 +184,11 @@ class WhichDBTestCase(unittest.TestCase): db_file = '{}_ndbm.db'.format(_fname) with open(db_file, 'w'): self.addCleanup(os_helper.unlink, db_file) + db_file_bytes = os.fsencode(db_file) self.assertIsNone(self.dbm.whichdb(db_file[:-3])) + self.assertIsNone(self.dbm.whichdb(os_helper.FakePath(db_file[:-3]))) + self.assertIsNone(self.dbm.whichdb(db_file_bytes[:-3])) + self.assertIsNone(self.dbm.whichdb(os_helper.FakePath(db_file_bytes[:-3]))) def tearDown(self): delete_files() diff --git a/Lib/test/test_dbm_dumb.py b/Lib/test/test_dbm_dumb.py index ddaffb4..73cff63 100644 --- a/Lib/test/test_dbm_dumb.py +++ b/Lib/test/test_dbm_dumb.py @@ -294,6 +294,15 @@ class DumbDBMTestCase(unittest.TestCase): self.assertTrue(b'key' in db) self.assertEqual(db[b'key'], b'value') + def test_open_with_pathlib_path(self): + dumbdbm.open(os_helper.FakePath(_fname), "c").close() + + def test_open_with_bytes_path(self): + dumbdbm.open(os.fsencode(_fname), "c").close() + + def test_open_with_pathlib_bytes_path(self): + dumbdbm.open(os_helper.FakePath(os.fsencode(_fname)), "c").close() + def tearDown(self): _delete_files() diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index f39b002..4eaa0f4 100644 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -3,7 +3,7 @@ from test.support import import_helper, cpython_only gdbm = import_helper.import_module("dbm.gnu") #skip if not supported import unittest import os -from test.support.os_helper import TESTFN, TESTFN_NONASCII, unlink +from test.support.os_helper import TESTFN, TESTFN_NONASCII, unlink, FakePath filename = TESTFN @@ -169,6 +169,15 @@ class TestGdbm(unittest.TestCase): self.assertIn(nonexisting_file, str(cm.exception)) self.assertEqual(cm.exception.filename, nonexisting_file) + def test_open_with_pathlib_path(self): + gdbm.open(FakePath(filename), "c").close() + + def test_open_with_bytes_path(self): + gdbm.open(os.fsencode(filename), "c").close() + + def test_open_with_pathlib_bytes_path(self): + gdbm.open(FakePath(os.fsencode(filename)), "c").close() + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_dbm_ndbm.py b/Lib/test/test_dbm_ndbm.py index 639c833..e57d9ca 100644 --- a/Lib/test/test_dbm_ndbm.py +++ b/Lib/test/test_dbm_ndbm.py @@ -124,6 +124,15 @@ class DbmTestCase(unittest.TestCase): self.assertIn(nonexisting_file, str(cm.exception)) self.assertEqual(cm.exception.filename, nonexisting_file) + def test_open_with_pathlib_path(self): + dbm.ndbm.open(os_helper.FakePath(self.filename), "c").close() + + def test_open_with_bytes_path(self): + dbm.ndbm.open(os.fsencode(self.filename), "c").close() + + def test_open_with_pathlib_bytes_path(self): + dbm.ndbm.open(os_helper.FakePath(os.fsencode(self.filename)), "c").close() + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py index cfdd67c..03c0347 100644 --- a/Lib/test/test_shelve.py +++ b/Lib/test/test_shelve.py @@ -2,6 +2,7 @@ import unittest import shelve import glob import pickle +import os from test import support from test.support import os_helper @@ -65,29 +66,32 @@ class TestCase(unittest.TestCase): else: self.fail('Closed shelf should not find a key') - def test_ascii_file_shelf(self): - s = shelve.open(self.fn, protocol=0) + def test_open_template(self, filename=None, protocol=None): + s = shelve.open(filename=filename if filename is not None else self.fn, + protocol=protocol) try: s['key1'] = (1,2,3,4) self.assertEqual(s['key1'], (1,2,3,4)) finally: s.close() + def test_ascii_file_shelf(self): + self.test_open_template(protocol=0) + def test_binary_file_shelf(self): - s = shelve.open(self.fn, protocol=1) - try: - s['key1'] = (1,2,3,4) - self.assertEqual(s['key1'], (1,2,3,4)) - finally: - s.close() + self.test_open_template(protocol=1) def test_proto2_file_shelf(self): - s = shelve.open(self.fn, protocol=2) - try: - s['key1'] = (1,2,3,4) - self.assertEqual(s['key1'], (1,2,3,4)) - finally: - s.close() + self.test_open_template(protocol=2) + + def test_pathlib_path_file_shelf(self): + self.test_open_template(filename=os_helper.FakePath(self.fn)) + + def test_bytes_path_file_shelf(self): + self.test_open_template(filename=os.fsencode(self.fn)) + + def test_pathlib_bytes_path_file_shelf(self): + self.test_open_template(filename=os_helper.FakePath(os.fsencode(self.fn))) def test_in_memory_shelf(self): d1 = byteskeydict() diff --git a/Misc/NEWS.d/next/Library/2020-05-21-01-42-32.bpo-40563.fDn5bP.rst b/Misc/NEWS.d/next/Library/2020-05-21-01-42-32.bpo-40563.fDn5bP.rst new file mode 100644 index 0000000..f206646 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-21-01-42-32.bpo-40563.fDn5bP.rst @@ -0,0 +1 @@ +Support pathlike objects on dbm/shelve. Patch by Hakan Çelik and Henry-Joseph Audéoud. diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index 3fe97ef..4cbbac3 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -433,7 +433,7 @@ static PyType_Spec dbmtype_spec = { _dbm.open as dbmopen - filename: unicode + filename: object The filename to open. flags: str="r" @@ -452,7 +452,7 @@ Return a database object. static PyObject * dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, int mode) -/*[clinic end generated code: output=9527750f5df90764 input=376a9d903a50df59]*/ +/*[clinic end generated code: output=9527750f5df90764 input=d8cf50a9f81218c8]*/ { int iflags; _dbm_state *state = get_dbm_state(module); @@ -479,10 +479,11 @@ dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, return NULL; } - PyObject *filenamebytes = PyUnicode_EncodeFSDefault(filename); - if (filenamebytes == NULL) { + PyObject *filenamebytes; + if (!PyUnicode_FSConverter(filename, &filenamebytes)) { return NULL; } + const char *name = PyBytes_AS_STRING(filenamebytes); if (strlen(name) != (size_t)PyBytes_GET_SIZE(filenamebytes)) { Py_DECREF(filenamebytes); diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index 3c9a0e9..efbf331 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -590,7 +590,7 @@ static PyType_Spec gdbmtype_spec = { /*[clinic input] _gdbm.open as dbmopen - filename: unicode + filename: object flags: str="r" mode: int(py_default="0o666") = 0o666 / @@ -622,7 +622,7 @@ when the database has to be created. It defaults to octal 0o666. static PyObject * dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, int mode) -/*[clinic end generated code: output=9527750f5df90764 input=812b7d74399ceb0e]*/ +/*[clinic end generated code: output=9527750f5df90764 input=bca6ec81dc49292c]*/ { int iflags; _gdbm_state *state = get_gdbm_state(module); @@ -672,10 +672,11 @@ dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, } } - PyObject *filenamebytes = PyUnicode_EncodeFSDefault(filename); - if (filenamebytes == NULL) { + PyObject *filenamebytes; + if (!PyUnicode_FSConverter(filename, &filenamebytes)) { return NULL; } + const char *name = PyBytes_AS_STRING(filenamebytes); if (strlen(name) != (size_t)PyBytes_GET_SIZE(filenamebytes)) { Py_DECREF(filenamebytes); diff --git a/Modules/clinic/_dbmmodule.c.h b/Modules/clinic/_dbmmodule.c.h index b50db5d..f0b8220 100644 --- a/Modules/clinic/_dbmmodule.c.h +++ b/Modules/clinic/_dbmmodule.c.h @@ -149,13 +149,6 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (!_PyArg_CheckPositional("open", nargs, 1, 3)) { goto exit; } - if (!PyUnicode_Check(args[0])) { - _PyArg_BadArgument("open", "argument 1", "str", args[0]); - goto exit; - } - if (PyUnicode_READY(args[0]) == -1) { - goto exit; - } filename = args[0]; if (nargs < 2) { goto skip_optional; @@ -186,4 +179,4 @@ skip_optional: exit: return return_value; } -/*[clinic end generated code: output=13b6d821416be228 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=32ef6c0f8f2d3db9 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_gdbmmodule.c.h b/Modules/clinic/_gdbmmodule.c.h index 15baf52..a40e80d 100644 --- a/Modules/clinic/_gdbmmodule.c.h +++ b/Modules/clinic/_gdbmmodule.c.h @@ -303,13 +303,6 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (!_PyArg_CheckPositional("open", nargs, 1, 3)) { goto exit; } - if (!PyUnicode_Check(args[0])) { - _PyArg_BadArgument("open", "argument 1", "str", args[0]); - goto exit; - } - if (PyUnicode_READY(args[0]) == -1) { - goto exit; - } filename = args[0]; if (nargs < 2) { goto skip_optional; @@ -340,4 +333,4 @@ skip_optional: exit: return return_value; } -/*[clinic end generated code: output=1fed9ed50ad23551 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=63c507f93d84a3a4 input=a9049054013a1b77]*/ -- cgit v0.12