summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2022-09-09 02:32:40 (GMT)
committerGitHub <noreply@github.com>2022-09-09 02:32:40 (GMT)
commitdf50938f583b6abd9f31f1ff1f5ad52d7b04ecbb (patch)
treeaa1fbb0e797563aae648fb1499b3297dcca59b50
parent95d6330a3edacacd081f55699bd6f0a787908924 (diff)
downloadcpython-df50938f583b6abd9f31f1ff1f5ad52d7b04ecbb.zip
cpython-df50938f583b6abd9f31f1ff1f5ad52d7b04ecbb.tar.gz
cpython-df50938f583b6abd9f31f1ff1f5ad52d7b04ecbb.tar.bz2
GH-46412: More efficient bool() for ndbm/_gdbmmodule (#96692)
-rw-r--r--Lib/test/test_dbm_gnu.py14
-rw-r--r--Lib/test/test_dbm_ndbm.py14
-rw-r--r--Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst1
-rw-r--r--Modules/_dbmmodule.c32
-rw-r--r--Modules/_gdbmmodule.c30
5 files changed, 91 insertions, 0 deletions
diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py
index 4eaa0f4..73602ca 100644
--- a/Lib/test/test_dbm_gnu.py
+++ b/Lib/test/test_dbm_gnu.py
@@ -118,6 +118,20 @@ class TestGdbm(unittest.TestCase):
self.assertEqual(str(cm.exception),
"GDBM object has already been closed")
+ def test_bool_empty(self):
+ with gdbm.open(filename, 'c') as db:
+ self.assertFalse(bool(db))
+
+ def test_bool_not_empty(self):
+ with gdbm.open(filename, 'c') as db:
+ db['a'] = 'b'
+ self.assertTrue(bool(db))
+
+ def test_bool_on_closed_db_raises(self):
+ with gdbm.open(filename, 'c') as db:
+ db['a'] = 'b'
+ self.assertRaises(gdbm.error, bool, db)
+
def test_bytes(self):
with gdbm.open(filename, 'c') as db:
db[b'bytes key \xbd'] = b'bytes value \xbd'
diff --git a/Lib/test/test_dbm_ndbm.py b/Lib/test/test_dbm_ndbm.py
index e57d9ca..8f37e3c 100644
--- a/Lib/test/test_dbm_ndbm.py
+++ b/Lib/test/test_dbm_ndbm.py
@@ -133,6 +133,20 @@ class DbmTestCase(unittest.TestCase):
def test_open_with_pathlib_bytes_path(self):
dbm.ndbm.open(os_helper.FakePath(os.fsencode(self.filename)), "c").close()
+ def test_bool_empty(self):
+ with dbm.ndbm.open(self.filename, 'c') as db:
+ self.assertFalse(bool(db))
+
+ def test_bool_not_empty(self):
+ with dbm.ndbm.open(self.filename, 'c') as db:
+ db['a'] = 'b'
+ self.assertTrue(bool(db))
+
+ def test_bool_on_closed_db_raises(self):
+ with dbm.ndbm.open(self.filename, 'c') as db:
+ db['a'] = 'b'
+ self.assertRaises(dbm.ndbm.error, bool, db)
+
if __name__ == '__main__':
unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst b/Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst
new file mode 100644
index 0000000..27fcd03
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-09-08-20-12-48.gh-issue-46412.r_cfTh.rst
@@ -0,0 +1 @@
+Improve performance of ``bool(db)`` for large ndb/gdb databases. Previously this would call ``len(db)`` which would iterate over all keys -- the answer (empty or not) is known after the first key.
diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c
index 5913b03..9c83e38 100644
--- a/Modules/_dbmmodule.c
+++ b/Modules/_dbmmodule.c
@@ -130,6 +130,37 @@ dbm_length(dbmobject *dp)
return dp->di_size;
}
+static int
+dbm_bool(dbmobject *dp)
+{
+ _dbm_state *state = PyType_GetModuleState(Py_TYPE(dp));
+ assert(state != NULL);
+
+ if (dp->di_dbm == NULL) {
+ PyErr_SetString(state->dbm_error, "DBM object has already been closed");
+ return -1;
+ }
+
+ if (dp->di_size > 0) {
+ /* Known non-zero size. */
+ return 1;
+ }
+ if (dp->di_size == 0) {
+ /* Known zero size. */
+ return 0;
+ }
+
+ /* Unknown size. Ensure DBM object has an entry. */
+ datum key = dbm_firstkey(dp->di_dbm);
+ if (key.dptr == NULL) {
+ /* Empty. Cache this fact. */
+ dp->di_size = 0;
+ return 0;
+ }
+ /* Non-empty. Don't cache the length since we don't know. */
+ return 1;
+}
+
static PyObject *
dbm_subscript(dbmobject *dp, PyObject *key)
{
@@ -416,6 +447,7 @@ static PyType_Slot dbmtype_spec_slots[] = {
{Py_mp_length, dbm_length},
{Py_mp_subscript, dbm_subscript},
{Py_mp_ass_subscript, dbm_ass_sub},
+ {Py_nb_bool, dbm_bool},
{0, 0}
};
diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c
index e6440fa..a96d323 100644
--- a/Modules/_gdbmmodule.c
+++ b/Modules/_gdbmmodule.c
@@ -162,6 +162,35 @@ gdbm_length(gdbmobject *dp)
return dp->di_size;
}
+static int
+gdbm_bool(gdbmobject *dp)
+{
+ _gdbm_state *state = PyType_GetModuleState(Py_TYPE(dp));
+ if (dp->di_dbm == NULL) {
+ PyErr_SetString(state->gdbm_error, "GDBM object has already been closed");
+ return -1;
+ }
+ if (dp->di_size > 0) {
+ /* Known non-zero size. */
+ return 1;
+ }
+ if (dp->di_size == 0) {
+ /* Known zero size. */
+ return 0;
+ }
+ /* Unknown size. Ensure DBM object has an entry. */
+ datum key = gdbm_firstkey(dp->di_dbm);
+ if (key.dptr == NULL) {
+ /* Empty. Cache this fact. */
+ dp->di_size = 0;
+ return 0;
+ }
+
+ /* Non-empty. Don't cache the length since we don't know. */
+ free(key.dptr);
+ return 1;
+}
+
// Wrapper function for PyArg_Parse(o, "s#", &d.dptr, &d.size).
// This function is needed to support PY_SSIZE_T_CLEAN.
// Return 1 on success, same to PyArg_Parse().
@@ -569,6 +598,7 @@ static PyType_Slot gdbmtype_spec_slots[] = {
{Py_mp_length, gdbm_length},
{Py_mp_subscript, gdbm_subscript},
{Py_mp_ass_subscript, gdbm_ass_sub},
+ {Py_nb_bool, gdbm_bool},
{Py_tp_doc, (char*)gdbm_object__doc__},
{0, 0}
};