From df50938f583b6abd9f31f1ff1f5ad52d7b04ecbb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 8 Sep 2022 19:32:40 -0700 Subject: GH-46412: More efficient bool() for ndbm/_gdbmmodule (#96692) --- Lib/test/test_dbm_gnu.py | 14 ++++++++++ Lib/test/test_dbm_ndbm.py | 14 ++++++++++ .../2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst | 1 + Modules/_dbmmodule.c | 32 ++++++++++++++++++++++ Modules/_gdbmmodule.c | 30 ++++++++++++++++++++ 5 files changed, 91 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index 4eaa0f4..73602ca 100644 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -118,6 +118,20 @@ class TestGdbm(unittest.TestCase): self.assertEqual(str(cm.exception), "GDBM object has already been closed") + def test_bool_empty(self): + with gdbm.open(filename, 'c') as db: + self.assertFalse(bool(db)) + + def test_bool_not_empty(self): + with gdbm.open(filename, 'c') as db: + db['a'] = 'b' + self.assertTrue(bool(db)) + + def test_bool_on_closed_db_raises(self): + with gdbm.open(filename, 'c') as db: + db['a'] = 'b' + self.assertRaises(gdbm.error, bool, db) + def test_bytes(self): with gdbm.open(filename, 'c') as db: db[b'bytes key \xbd'] = b'bytes value \xbd' diff --git a/Lib/test/test_dbm_ndbm.py b/Lib/test/test_dbm_ndbm.py index e57d9ca..8f37e3c 100644 --- a/Lib/test/test_dbm_ndbm.py +++ b/Lib/test/test_dbm_ndbm.py @@ -133,6 +133,20 @@ class DbmTestCase(unittest.TestCase): def test_open_with_pathlib_bytes_path(self): dbm.ndbm.open(os_helper.FakePath(os.fsencode(self.filename)), "c").close() + def test_bool_empty(self): + with dbm.ndbm.open(self.filename, 'c') as db: + self.assertFalse(bool(db)) + + def test_bool_not_empty(self): + with dbm.ndbm.open(self.filename, 'c') as db: + db['a'] = 'b' + self.assertTrue(bool(db)) + + def test_bool_on_closed_db_raises(self): + with dbm.ndbm.open(self.filename, 'c') as db: + db['a'] = 'b' + self.assertRaises(dbm.ndbm.error, bool, db) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst b/Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst new file mode 100644 index 0000000..27fcd03 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst @@ -0,0 +1 @@ +Improve performance of ``bool(db)`` for large ndb/gdb databases. Previously this would call ``len(db)`` which would iterate over all keys -- the answer (empty or not) is known after the first key. diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index 5913b03..9c83e38 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -130,6 +130,37 @@ dbm_length(dbmobject *dp) return dp->di_size; } +static int +dbm_bool(dbmobject *dp) +{ + _dbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); + assert(state != NULL); + + if (dp->di_dbm == NULL) { + PyErr_SetString(state->dbm_error, "DBM object has already been closed"); + return -1; + } + + if (dp->di_size > 0) { + /* Known non-zero size. */ + return 1; + } + if (dp->di_size == 0) { + /* Known zero size. */ + return 0; + } + + /* Unknown size. Ensure DBM object has an entry. */ + datum key = dbm_firstkey(dp->di_dbm); + if (key.dptr == NULL) { + /* Empty. Cache this fact. */ + dp->di_size = 0; + return 0; + } + /* Non-empty. Don't cache the length since we don't know. */ + return 1; +} + static PyObject * dbm_subscript(dbmobject *dp, PyObject *key) { @@ -416,6 +447,7 @@ static PyType_Slot dbmtype_spec_slots[] = { {Py_mp_length, dbm_length}, {Py_mp_subscript, dbm_subscript}, {Py_mp_ass_subscript, dbm_ass_sub}, + {Py_nb_bool, dbm_bool}, {0, 0} }; diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index e6440fa..a96d323 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -162,6 +162,35 @@ gdbm_length(gdbmobject *dp) return dp->di_size; } +static int +gdbm_bool(gdbmobject *dp) +{ + _gdbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); + if (dp->di_dbm == NULL) { + PyErr_SetString(state->gdbm_error, "GDBM object has already been closed"); + return -1; + } + if (dp->di_size > 0) { + /* Known non-zero size. */ + return 1; + } + if (dp->di_size == 0) { + /* Known zero size. */ + return 0; + } + /* Unknown size. Ensure DBM object has an entry. */ + datum key = gdbm_firstkey(dp->di_dbm); + if (key.dptr == NULL) { + /* Empty. Cache this fact. */ + dp->di_size = 0; + return 0; + } + + /* Non-empty. Don't cache the length since we don't know. */ + free(key.dptr); + return 1; +} + // Wrapper function for PyArg_Parse(o, "s#", &d.dptr, &d.size). // This function is needed to support PY_SSIZE_T_CLEAN. // Return 1 on success, same to PyArg_Parse(). @@ -569,6 +598,7 @@ static PyType_Slot gdbmtype_spec_slots[] = { {Py_mp_length, gdbm_length}, {Py_mp_subscript, gdbm_subscript}, {Py_mp_ass_subscript, gdbm_ass_sub}, + {Py_nb_bool, gdbm_bool}, {Py_tp_doc, (char*)gdbm_object__doc__}, {0, 0} }; -- cgit v0.12