summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2020-06-19 09:45:31 (GMT)
committerGitHub <noreply@github.com>2020-06-19 09:45:31 (GMT)
commit37bb2895561d3e63a631f10875567b4e33b30c07 (patch)
tree25afc8f4882caeaef5f14e09499a5e949f218b0c
parent01ece63d42b830df106948db0aefa6c1ba24416a (diff)
downloadcpython-37bb2895561d3e63a631f10875567b4e33b30c07.zip
cpython-37bb2895561d3e63a631f10875567b4e33b30c07.tar.gz
cpython-37bb2895561d3e63a631f10875567b4e33b30c07.tar.bz2
bpo-40943: PY_SSIZE_T_CLEAN required for '#' formats (GH-20784)
The PY_SSIZE_T_CLEAN macro must now be defined to use PyArg_ParseTuple() and Py_BuildValue() "#" formats: "es#", "et#", "s#", "u#", "y#", "z#", "U#" and "Z#". See the PEP 353. Update _testcapi.test_buildvalue_issue38913().
-rw-r--r--Doc/c-api/arg.rst36
-rw-r--r--Doc/whatsnew/3.10.rst7
-rw-r--r--Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst5
-rw-r--r--Modules/_testcapimodule.c19
-rw-r--r--Python/getargs.c89
-rw-r--r--Python/modsupport.c33
6 files changed, 87 insertions, 102 deletions
diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst
index b7baad5..26e872c 100644
--- a/Doc/c-api/arg.rst
+++ b/Doc/c-api/arg.rst
@@ -55,13 +55,11 @@ which disallows mutable objects such as :class:`bytearray`.
.. note::
- For all ``#`` variants of formats (``s#``, ``y#``, etc.), the type of
- the length argument (int or :c:type:`Py_ssize_t`) is controlled by
- defining the macro :c:macro:`PY_SSIZE_T_CLEAN` before including
- :file:`Python.h`. If the macro was defined, length is a
- :c:type:`Py_ssize_t` rather than an :c:type:`int`. This behavior will change
- in a future Python version to only support :c:type:`Py_ssize_t` and
- drop :c:type:`int` support. It is best to always define :c:macro:`PY_SSIZE_T_CLEAN`.
+ For all ``#`` variants of formats (``s#``, ``y#``, etc.), the macro
+ :c:macro:`PY_SSIZE_T_CLEAN` must be defined before including
+ :file:`Python.h`. On Python 3.9 and older, the type of the length argument
+ is :c:type:`Py_ssize_t` if the :c:macro:`PY_SSIZE_T_CLEAN` macro is defined,
+ or int otherwise.
``s`` (:class:`str`) [const char \*]
@@ -90,7 +88,7 @@ which disallows mutable objects such as :class:`bytearray`.
In this case the resulting C string may contain embedded NUL bytes.
Unicode objects are converted to C strings using ``'utf-8'`` encoding.
-``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`]
+``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`]
Like ``s*``, except that it doesn't accept mutable objects.
The result is stored into two C variables,
the first one a pointer to a C string, the second one its length.
@@ -105,7 +103,7 @@ which disallows mutable objects such as :class:`bytearray`.
Like ``s*``, but the Python object may also be ``None``, in which case the
``buf`` member of the :c:type:`Py_buffer` structure is set to ``NULL``.
-``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Like ``s#``, but the Python object may also be ``None``, in which case the C
pointer is set to ``NULL``.
@@ -124,7 +122,7 @@ which disallows mutable objects such as :class:`bytearray`.
bytes-like objects. **This is the recommended way to accept
binary data.**
-``y#`` (read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`]
+``y#`` (read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`]
This variant on ``s#`` doesn't accept Unicode objects, only bytes-like
objects.
@@ -155,7 +153,7 @@ which disallows mutable objects such as :class:`bytearray`.
Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
:c:func:`PyUnicode_AsWideCharString`.
-``u#`` (:class:`str`) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`]
+``u#`` (:class:`str`) [const Py_UNICODE \*, :c:type:`Py_ssize_t`]
This variant on ``u`` stores into two C variables, the first one a pointer to a
Unicode data buffer, the second one its length. This variant allows
null code points.
@@ -172,7 +170,7 @@ which disallows mutable objects such as :class:`bytearray`.
Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
:c:func:`PyUnicode_AsWideCharString`.
-``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`]
+``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, :c:type:`Py_ssize_t`]
Like ``u#``, but the Python object may also be ``None``, in which case the
:c:type:`Py_UNICODE` pointer is set to ``NULL``.
@@ -213,7 +211,7 @@ which disallows mutable objects such as :class:`bytearray`.
recoding them. Instead, the implementation assumes that the byte string object uses
the encoding passed in as parameter.
-``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length]
+``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length]
This variant on ``s#`` is used for encoding Unicode into a character buffer.
Unlike the ``es`` format, this variant allows input data which contains NUL
characters.
@@ -244,7 +242,7 @@ which disallows mutable objects such as :class:`bytearray`.
In both cases, *\*buffer_length* is set to the length of the encoded data
without the trailing NUL byte.
-``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length]
+``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length]
Same as ``es#`` except that byte string objects are passed through without recoding
them. Instead, the implementation assumes that the byte string object uses the
encoding passed in as parameter.
@@ -549,7 +547,7 @@ Building values
Convert a null-terminated C string to a Python :class:`str` object using ``'utf-8'``
encoding. If the C string pointer is ``NULL``, ``None`` is used.
- ``s#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``s#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Convert a C string and its length to a Python :class:`str` object using ``'utf-8'``
encoding. If the C string pointer is ``NULL``, the length is ignored and
``None`` is returned.
@@ -558,14 +556,14 @@ Building values
This converts a C string to a Python :class:`bytes` object. If the C
string pointer is ``NULL``, ``None`` is returned.
- ``y#`` (:class:`bytes`) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``y#`` (:class:`bytes`) [const char \*, :c:type:`Py_ssize_t`]
This converts a C string and its lengths to a Python object. If the C
string pointer is ``NULL``, ``None`` is returned.
``z`` (:class:`str` or ``None``) [const char \*]
Same as ``s``.
- ``z#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``z#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Same as ``s#``.
``u`` (:class:`str`) [const wchar_t \*]
@@ -573,7 +571,7 @@ Building values
data to a Python Unicode object. If the Unicode buffer pointer is ``NULL``,
``None`` is returned.
- ``u#`` (:class:`str`) [const wchar_t \*, int or :c:type:`Py_ssize_t`]
+ ``u#`` (:class:`str`) [const wchar_t \*, :c:type:`Py_ssize_t`]
Convert a Unicode (UTF-16 or UCS-4) data buffer and its length to a Python
Unicode object. If the Unicode buffer pointer is ``NULL``, the length is ignored
and ``None`` is returned.
@@ -581,7 +579,7 @@ Building values
``U`` (:class:`str` or ``None``) [const char \*]
Same as ``s``.
- ``U#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``U#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
Same as ``s#``.
``i`` (:class:`int`) [int]
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 566827b..9c1dca1 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -155,6 +155,13 @@ New Features
Porting to Python 3.10
----------------------
+* The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use
+ :c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use
+ ``#``: ``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``.
+ See :ref:`Parsing arguments and building values
+ <arg-parsing>` and the :pep:`353`.
+ (Contributed by Victor Stinner in :issue:`40943`.)
+
* Since :c:func:`Py_TYPE()` is changed to the inline static function,
``Py_TYPE(obj) = new_type`` must be replaced with ``Py_SET_TYPE(obj, new_type)``:
see :c:func:`Py_SET_TYPE()` (available since Python 3.9). For backward
diff --git a/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst b/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst
new file mode 100644
index 0000000..360ddae
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst
@@ -0,0 +1,5 @@
+The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use
+:c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use ``#``:
+``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``.
+See :ref:`Parsing arguments and building values <arg-parsing>` and the
+:pep:`353`.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 5302641..808483e 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -6868,29 +6868,36 @@ test_buildvalue_issue38913(PyObject *self, PyObject *Py_UNUSED(ignored))
PyObject *res;
const char str[] = "string";
const Py_UNICODE unicode[] = L"unicode";
- PyErr_SetNone(PyExc_ZeroDivisionError);
+ assert(!PyErr_Occurred());
res = Py_BuildValue("(s#O)", str, 1, Py_None);
assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
+ PyErr_Clear();
+
res = Py_BuildValue("(z#O)", str, 1, Py_None);
assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
+ PyErr_Clear();
+
res = Py_BuildValue("(y#O)", str, 1, Py_None);
assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
+ PyErr_Clear();
+
res = Py_BuildValue("(u#O)", unicode, 1, Py_None);
assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
return NULL;
}
-
PyErr_Clear();
+
+
Py_RETURN_NONE;
}
diff --git a/Python/getargs.c b/Python/getargs.c
index cf0cc07..aaf687a 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -656,27 +656,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
char *msgbuf, size_t bufsize, freelist_t *freelist)
{
/* For # codes */
-#define FETCH_SIZE int *q=NULL;Py_ssize_t *q2=NULL;\
- if (flags & FLAG_SIZE_T) q2=va_arg(*p_va, Py_ssize_t*); \
- else { \
- if (PyErr_WarnEx(PyExc_DeprecationWarning, \
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) { \
- return NULL; \
- } \
- q=va_arg(*p_va, int*); \
- }
-#define STORE_SIZE(s) \
- if (flags & FLAG_SIZE_T) \
- *q2=s; \
- else { \
- if (INT_MAX < s) { \
- PyErr_SetString(PyExc_OverflowError, \
- "size does not fit in an int"); \
- return converterr("", arg, msgbuf, bufsize); \
- } \
- *q = (int)s; \
- }
-#define BUFFER_LEN ((flags & FLAG_SIZE_T) ? *q2:*q)
+#define REQUIRE_PY_SSIZE_T_CLEAN \
+ if (!(flags & FLAG_SIZE_T)) { \
+ PyErr_SetString(PyExc_SystemError, \
+ "PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \
+ return NULL; \
+ }
#define RETURN_ERR_OCCURRED return msgbuf
const char *format = *p_format;
@@ -931,8 +916,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
if (count < 0)
return converterr(buf, arg, msgbuf, bufsize);
if (*format == '#') {
- FETCH_SIZE;
- STORE_SIZE(count);
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
+ *psize = count;
format++;
} else {
if (strlen(*p) != (size_t)count) {
@@ -974,11 +960,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
} else if (*format == '#') { /* a string or read-only bytes-like object */
/* "s#" or "z#" */
const void **p = (const void **)va_arg(*p_va, const char **);
- FETCH_SIZE;
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
if (c == 'z' && arg == Py_None) {
*p = NULL;
- STORE_SIZE(0);
+ *psize = 0;
}
else if (PyUnicode_Check(arg)) {
Py_ssize_t len;
@@ -987,7 +974,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
return converterr(CONV_UNICODE,
arg, msgbuf, bufsize);
*p = sarg;
- STORE_SIZE(len);
+ *psize = len;
}
else { /* read-only bytes-like object */
/* XXX Really? */
@@ -995,7 +982,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
Py_ssize_t count = convertbuffer(arg, p, &buf);
if (count < 0)
return converterr(buf, arg, msgbuf, bufsize);
- STORE_SIZE(count);
+ *psize = count;
}
format++;
} else {
@@ -1034,18 +1021,19 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
if (*format == '#') {
/* "u#" or "Z#" */
- FETCH_SIZE;
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
if (c == 'Z' && arg == Py_None) {
*p = NULL;
- STORE_SIZE(0);
+ *psize = 0;
}
else if (PyUnicode_Check(arg)) {
Py_ssize_t len;
*p = PyUnicode_AsUnicodeAndSize(arg, &len);
if (*p == NULL)
RETURN_ERR_OCCURRED;
- STORE_SIZE(len);
+ *psize = len;
}
else
return converterr(c == 'Z' ? "str or None" : "str",
@@ -1160,22 +1148,11 @@ _Py_COMP_DIAG_POP
trailing 0-byte
*/
- int *q = NULL; Py_ssize_t *q2 = NULL;
- if (flags & FLAG_SIZE_T) {
- q2 = va_arg(*p_va, Py_ssize_t*);
- }
- else {
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1))
- {
- Py_DECREF(s);
- return NULL;
- }
- q = va_arg(*p_va, int*);
- }
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
format++;
- if (q == NULL && q2 == NULL) {
+ if (psize == NULL) {
Py_DECREF(s);
return converterr(
"(buffer_len is NULL)",
@@ -1195,30 +1172,20 @@ _Py_COMP_DIAG_POP
arg, msgbuf, bufsize);
}
} else {
- if (size + 1 > BUFFER_LEN) {
+ if (size + 1 > *psize) {
Py_DECREF(s);
PyErr_Format(PyExc_ValueError,
"encoded string too long "
"(%zd, maximum length %zd)",
- (Py_ssize_t)size, (Py_ssize_t)(BUFFER_LEN-1));
+ (Py_ssize_t)size, (Py_ssize_t)(*psize - 1));
RETURN_ERR_OCCURRED;
}
}
memcpy(*buffer, ptr, size+1);
- if (flags & FLAG_SIZE_T) {
- *q2 = size;
- }
- else {
- if (INT_MAX < size) {
- Py_DECREF(s);
- PyErr_SetString(PyExc_OverflowError,
- "size does not fit in an int");
- return converterr("", arg, msgbuf, bufsize);
- }
- *q = (int)size;
- }
- } else {
+ *psize = size;
+ }
+ else {
/* Using a 0-terminated buffer:
- the encoded string has to be 0-terminated
@@ -1356,9 +1323,7 @@ _Py_COMP_DIAG_POP
*p_format = format;
return NULL;
-#undef FETCH_SIZE
-#undef STORE_SIZE
-#undef BUFFER_LEN
+#undef REQUIRE_PY_SSIZE_T_CLEAN
#undef RETURN_ERR_OCCURRED
}
diff --git a/Python/modsupport.c b/Python/modsupport.c
index 845bdcb..2637039 100644
--- a/Python/modsupport.c
+++ b/Python/modsupport.c
@@ -283,6 +283,13 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n, int
static PyObject *
do_mkvalue(const char **p_format, va_list *p_va, int flags)
{
+#define ERROR_NEED_PY_SSIZE_T_CLEAN \
+ { \
+ PyErr_SetString(PyExc_SystemError, \
+ "PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \
+ return NULL; \
+ }
+
for (;;) {
switch (*(*p_format)++) {
case '(':
@@ -341,14 +348,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
- if (flags & FLAG_SIZE_T)
+ if (flags & FLAG_SIZE_T) {
n = va_arg(*p_va, Py_ssize_t);
+ }
else {
n = va_arg(*p_va, int);
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
- return NULL;
- }
+ ERROR_NEED_PY_SSIZE_T_CLEAN;
}
}
else
@@ -394,14 +399,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
- if (flags & FLAG_SIZE_T)
+ if (flags & FLAG_SIZE_T) {
n = va_arg(*p_va, Py_ssize_t);
+ }
else {
n = va_arg(*p_va, int);
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
- return NULL;
- }
+ ERROR_NEED_PY_SSIZE_T_CLEAN;
}
}
else
@@ -432,14 +435,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
- if (flags & FLAG_SIZE_T)
+ if (flags & FLAG_SIZE_T) {
n = va_arg(*p_va, Py_ssize_t);
+ }
else {
n = va_arg(*p_va, int);
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
- return NULL;
- }
+ ERROR_NEED_PY_SSIZE_T_CLEAN;
}
}
else
@@ -507,6 +508,8 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
}
}
+
+#undef ERROR_NEED_PY_SSIZE_T_CLEAN
}