summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2023-08-24 13:59:12 (GMT)
committerGitHub <noreply@github.com>2023-08-24 13:59:12 (GMT)
commit67266266469fe0e817736227f39537182534c1a5 (patch)
tree1ae99ebf16335cbd67678f3911702e9819cbe039
parentc163d7f0b67a568e9b64eeb9c1cbbaa127818596 (diff)
downloadcpython-67266266469fe0e817736227f39537182534c1a5.zip
cpython-67266266469fe0e817736227f39537182534c1a5.tar.gz
cpython-67266266469fe0e817736227f39537182534c1a5.tar.bz2
gh-108314: Add PyDict_ContainsString() function (#108323)
-rw-r--r--Doc/c-api/dict.rst20
-rw-r--r--Doc/whatsnew/3.13.rst5
-rw-r--r--Include/cpython/dictobject.h1
-rw-r--r--Lib/test/test_capi/test_dict.py24
-rw-r--r--Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst4
-rw-r--r--Modules/_ctypes/_ctypes.c34
-rw-r--r--Modules/_testcapi/dict.c14
-rw-r--r--Objects/dictobject.c12
-rw-r--r--Python/pylifecycle.c9
-rw-r--r--Python/pythonrun.c23
10 files changed, 111 insertions, 35 deletions
diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst
index e4c1d71..7810d05 100644
--- a/Doc/c-api/dict.rst
+++ b/Doc/c-api/dict.rst
@@ -55,6 +55,15 @@ Dictionary Objects
This is equivalent to the Python expression ``key in p``.
+.. c:function:: int PyDict_ContainsString(PyObject *p, const char *key)
+
+ This is the same as :c:func:`PyDict_Contains`, but *key* is specified as a
+ :c:expr:`const char*` UTF-8 encoded bytes string, rather than a
+ :c:expr:`PyObject*`.
+
+ .. versionadded:: 3.13
+
+
.. c:function:: PyObject* PyDict_Copy(PyObject *p)
Return a new dictionary that contains the same key-value pairs as *p*.
@@ -73,7 +82,7 @@ Dictionary Objects
.. index:: single: PyUnicode_FromString()
Insert *val* into the dictionary *p* using *key* as a key. *key* should
- be a :c:expr:`const char*`. The key object is created using
+ be a :c:expr:`const char*` UTF-8 encoded bytes string. The key object is created using
``PyUnicode_FromString(key)``. Return ``0`` on success or ``-1`` on
failure. This function *does not* steal a reference to *val*.
@@ -88,7 +97,8 @@ Dictionary Objects
.. c:function:: int PyDict_DelItemString(PyObject *p, const char *key)
- Remove the entry in dictionary *p* which has a key specified by the string *key*.
+ Remove the entry in dictionary *p* which has a key specified by the UTF-8
+ encoded bytes string *key*.
If *key* is not in the dictionary, :exc:`KeyError` is raised.
Return ``0`` on success or ``-1`` on failure.
@@ -136,7 +146,8 @@ Dictionary Objects
.. c:function:: PyObject* PyDict_GetItemString(PyObject *p, const char *key)
This is the same as :c:func:`PyDict_GetItem`, but *key* is specified as a
- :c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
+ :c:expr:`const char*` UTF-8 encoded bytes string, rather than a
+ :c:expr:`PyObject*`.
.. note::
@@ -150,7 +161,8 @@ Dictionary Objects
.. c:function:: int PyDict_GetItemStringRef(PyObject *p, const char *key, PyObject **result)
Similar than :c:func:`PyDict_GetItemRef`, but *key* is specified as a
- :c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
+ :c:expr:`const char*` UTF-8 encoded bytes string, rather than a
+ :c:expr:`PyObject*`.
.. versionadded:: 3.13
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 8509e18..25eb5e9 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -862,6 +862,11 @@ New Features
not needed.
(Contributed by Victor Stinner in :gh:`106004`.)
+* Added :c:func:`PyDict_ContainsString` function: same as
+ :c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
+ UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
+ (Contributed by Victor Stinner in :gh:`108314`.)
+
* Add :c:func:`Py_IsFinalizing` function: check if the main Python interpreter is
:term:`shutting down <interpreter shutdown>`.
(Contributed by Victor Stinner in :gh:`108014`.)
diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index 470f594..f448809 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -55,6 +55,7 @@ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) {
}
#define PyDict_GET_SIZE(op) PyDict_GET_SIZE(_PyObject_CAST(op))
+PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key);
PyAPI_FUNC(int) _PyDict_ContainsId(PyObject *, _Py_Identifier *);
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
diff --git a/Lib/test/test_capi/test_dict.py b/Lib/test/test_capi/test_dict.py
index 9da6efd..b22fa20 100644
--- a/Lib/test/test_capi/test_dict.py
+++ b/Lib/test/test_capi/test_dict.py
@@ -8,6 +8,7 @@ import _testcapi
NULL = None
+INVALID_UTF8 = b'\xff'
class DictSubclass(dict):
def __getitem__(self, key):
@@ -137,7 +138,7 @@ class CAPITest(unittest.TestCase):
self.assertEqual(getitemstring(dct2, b'a'), 1)
self.assertIs(getitemstring(dct2, b'b'), KeyError)
- self.assertIs(getitemstring({}, b'\xff'), KeyError)
+ self.assertIs(getitemstring({}, INVALID_UTF8), KeyError)
self.assertIs(getitemstring(42, b'a'), KeyError)
self.assertIs(getitemstring([], b'a'), KeyError)
# CRASHES getitemstring({}, NULL)
@@ -173,7 +174,7 @@ class CAPITest(unittest.TestCase):
self.assertIs(getitemstring(dct2, b'b'), KeyError)
self.assertRaises(SystemError, getitemstring, 42, b'a')
- self.assertRaises(UnicodeDecodeError, getitemstring, {}, b'\xff')
+ self.assertRaises(UnicodeDecodeError, getitemstring, {}, INVALID_UTF8)
self.assertRaises(SystemError, getitemstring, [], b'a')
# CRASHES getitemstring({}, NULL)
# CRASHES getitemstring(NULL, b'a')
@@ -213,6 +214,21 @@ class CAPITest(unittest.TestCase):
# CRASHES contains(42, 'a')
# CRASHES contains(NULL, 'a')
+ def test_dict_contains_string(self):
+ contains_string = _testcapi.dict_containsstring
+ dct = {'a': 1, '\U0001f40d': 2}
+ self.assertTrue(contains_string(dct, b'a'))
+ self.assertFalse(contains_string(dct, b'b'))
+ self.assertTrue(contains_string(dct, '\U0001f40d'.encode()))
+ self.assertRaises(UnicodeDecodeError, contains_string, dct, INVALID_UTF8)
+
+ dct2 = DictSubclass(dct)
+ self.assertTrue(contains_string(dct2, b'a'))
+ self.assertFalse(contains_string(dct2, b'b'))
+
+ # CRASHES contains({}, NULL)
+ # CRASHES contains(NULL, b'a')
+
def test_dict_setitem(self):
setitem = _testcapi.dict_setitem
dct = {}
@@ -245,7 +261,7 @@ class CAPITest(unittest.TestCase):
setitemstring(dct2, b'a', 5)
self.assertEqual(dct2, {'a': 5})
- self.assertRaises(UnicodeDecodeError, setitemstring, {}, b'\xff', 5)
+ self.assertRaises(UnicodeDecodeError, setitemstring, {}, INVALID_UTF8, 5)
self.assertRaises(SystemError, setitemstring, UserDict(), b'a', 5)
self.assertRaises(SystemError, setitemstring, 42, b'a', 5)
# CRASHES setitemstring({}, NULL, 5)
@@ -287,7 +303,7 @@ class CAPITest(unittest.TestCase):
self.assertEqual(dct2, {'c': 2})
self.assertRaises(KeyError, delitemstring, dct2, b'b')
- self.assertRaises(UnicodeDecodeError, delitemstring, {}, b'\xff')
+ self.assertRaises(UnicodeDecodeError, delitemstring, {}, INVALID_UTF8)
self.assertRaises(SystemError, delitemstring, UserDict({'a': 1}), b'a')
self.assertRaises(SystemError, delitemstring, 42, b'a')
# CRASHES delitemstring({}, NULL)
diff --git a/Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst b/Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst
new file mode 100644
index 0000000..90ae50a
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst
@@ -0,0 +1,4 @@
+Add :c:func:`PyDict_ContainsString` function: same as
+:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
+UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
+Patch by Victor Stinner.
diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c
index dc80291..ed9efca 100644
--- a/Modules/_ctypes/_ctypes.c
+++ b/Modules/_ctypes/_ctypes.c
@@ -5139,8 +5139,6 @@ static PyObject *
Pointer_get_contents(CDataObject *self, void *closure)
{
StgDictObject *stgdict;
- PyObject *keep, *ptr_probe;
- CDataObject *ptr2ptr;
if (*(void **)self->b_ptr == NULL) {
PyErr_SetString(PyExc_ValueError,
@@ -5151,29 +5149,37 @@ Pointer_get_contents(CDataObject *self, void *closure)
stgdict = PyObject_stgdict((PyObject *)self);
assert(stgdict); /* Cannot be NULL for pointer instances */
- keep = GetKeepedObjects(self);
+ PyObject *keep = GetKeepedObjects(self);
if (keep != NULL) {
// check if it's a pointer to a pointer:
// pointers will have '0' key in the _objects
- ptr_probe = PyDict_GetItemString(keep, "0");
-
- if (ptr_probe != NULL) {
- ptr2ptr = (CDataObject*) PyDict_GetItemString(keep, "1");
- if (ptr2ptr == NULL) {
+ int ptr_probe = PyDict_ContainsString(keep, "0");
+ if (ptr_probe < 0) {
+ return NULL;
+ }
+ if (ptr_probe) {
+ PyObject *item;
+ if (PyDict_GetItemStringRef(keep, "1", &item) < 0) {
+ return NULL;
+ }
+ if (item == NULL) {
PyErr_SetString(PyExc_ValueError,
- "Unexpected NULL pointer in _objects");
+ "Unexpected NULL pointer in _objects");
return NULL;
}
- // don't construct a new object,
- // return existing one instead to preserve refcount
+#ifndef NDEBUG
+ CDataObject *ptr2ptr = (CDataObject *)item;
+ // Don't construct a new object,
+ // return existing one instead to preserve refcount.
+ // Double-check that we are returning the same thing.
assert(
*(void**) self->b_ptr == ptr2ptr->b_ptr ||
*(void**) self->b_value.c == ptr2ptr->b_ptr ||
*(void**) self->b_ptr == ptr2ptr->b_value.c ||
*(void**) self->b_value.c == ptr2ptr->b_value.c
- ); // double-check that we are returning the same thing
- Py_INCREF(ptr2ptr);
- return (PyObject *) ptr2ptr;
+ );
+#endif
+ return item;
}
}
diff --git a/Modules/_testcapi/dict.c b/Modules/_testcapi/dict.c
index b1dfcf4..6c3f9cd 100644
--- a/Modules/_testcapi/dict.c
+++ b/Modules/_testcapi/dict.c
@@ -75,6 +75,19 @@ dict_contains(PyObject *self, PyObject *args)
}
static PyObject *
+dict_containsstring(PyObject *self, PyObject *args)
+{
+ PyObject *obj;
+ const char *key;
+ Py_ssize_t size;
+ if (!PyArg_ParseTuple(args, "Oz#", &obj, &key, &size)) {
+ return NULL;
+ }
+ NULLABLE(obj);
+ RETURN_INT(PyDict_ContainsString(obj, key));
+}
+
+static PyObject *
dict_size(PyObject *self, PyObject *obj)
{
NULLABLE(obj);
@@ -349,6 +362,7 @@ static PyMethodDef test_methods[] = {
{"dict_getitemref", dict_getitemref, METH_VARARGS},
{"dict_getitemstringref", dict_getitemstringref, METH_VARARGS},
{"dict_contains", dict_contains, METH_VARARGS},
+ {"dict_containsstring", dict_containsstring, METH_VARARGS},
{"dict_setitem", dict_setitem, METH_VARARGS},
{"dict_setitemstring", dict_setitemstring, METH_VARARGS},
{"dict_delitem", dict_delitem, METH_VARARGS},
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index f9701f6..10205e3 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -3741,6 +3741,18 @@ PyDict_Contains(PyObject *op, PyObject *key)
return (ix != DKIX_EMPTY && value != NULL);
}
+int
+PyDict_ContainsString(PyObject *op, const char *key)
+{
+ PyObject *key_obj = PyUnicode_FromString(key);
+ if (key_obj == NULL) {
+ return -1;
+ }
+ int res = PyDict_Contains(op, key_obj);
+ Py_DECREF(key_obj);
+ return res;
+}
+
/* Internal version of PyDict_Contains used when the hash value is already known */
int
_PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 1861426..7d362af 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -2220,10 +2220,11 @@ add_main_module(PyInterpreterState *interp)
}
Py_DECREF(ann_dict);
- if (_PyDict_GetItemStringWithError(d, "__builtins__") == NULL) {
- if (PyErr_Occurred()) {
- return _PyStatus_ERR("Failed to test __main__.__builtins__");
- }
+ int has_builtins = PyDict_ContainsString(d, "__builtins__");
+ if (has_builtins < 0) {
+ return _PyStatus_ERR("Failed to test __main__.__builtins__");
+ }
+ if (!has_builtins) {
PyObject *bimod = PyImport_ImportModule("builtins");
if (bimod == NULL) {
return _PyStatus_ERR("Failed to retrieve builtins module");
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 05b7dfa..0e118b0 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -413,10 +413,11 @@ _PyRun_SimpleFileObject(FILE *fp, PyObject *filename, int closeit,
PyObject *dict = PyModule_GetDict(main_module); // borrowed ref
int set_file_name = 0;
- if (_PyDict_GetItemStringWithError(dict, "__file__") == NULL) {
- if (PyErr_Occurred()) {
- goto done;
- }
+ int has_file = PyDict_ContainsString(dict, "__file__");
+ if (has_file < 0) {
+ goto done;
+ }
+ if (!has_file) {
if (PyDict_SetItemString(dict, "__file__", filename) < 0) {
goto done;
}
@@ -1713,13 +1714,17 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py
_PyRuntime.signals.unhandled_keyboard_interrupt = 0;
/* Set globals['__builtins__'] if it doesn't exist */
- if (globals != NULL && _PyDict_GetItemStringWithError(globals, "__builtins__") == NULL) {
- if (PyErr_Occurred() ||
- PyDict_SetItemString(globals, "__builtins__",
- tstate->interp->builtins) < 0)
- {
+ if (globals != NULL) {
+ int has_builtins = PyDict_ContainsString(globals, "__builtins__");
+ if (has_builtins < 0) {
return NULL;
}
+ if (!has_builtins) {
+ if (PyDict_SetItemString(globals, "__builtins__",
+ tstate->interp->builtins) < 0) {
+ return NULL;
+ }
+ }
}
v = PyEval_EvalCode((PyObject*)co, globals, locals);