From 8ed705c083e8e5ff37649d998a8b1524ec921519 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 1 Jun 2023 08:56:35 +0200 Subject: gh-105156: Deprecate the old Py_UNICODE type in C API (#105157) Deprecate the old Py_UNICODE and PY_UNICODE_TYPE types in the C API: use wchar_t instead. Replace Py_UNICODE with wchar_t in multiple C files. Co-authored-by: Inada Naoki --- Doc/c-api/unicode.rst | 2 ++ Doc/whatsnew/3.13.rst | 5 +++++ Include/cpython/unicodeobject.h | 4 ++-- .../next/C API/2023-05-31-18-37-57.gh-issue-105156.R4El5V.rst | 4 ++++ Modules/_io/fileio.c | 2 +- Modules/_testcapi/getargs.c | 8 ++++---- Modules/arraymodule.c | 4 ++-- Objects/unicodeobject.c | 8 ++++---- Python/modsupport.c | 2 +- 9 files changed, 25 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-05-31-18-37-57.gh-issue-105156.R4El5V.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 6771f37..e3c6ee4 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -52,6 +52,8 @@ Python: whether you selected a "narrow" or "wide" Unicode version of Python at build time. + .. deprecated-removed:: 3.13 3.15 + .. c:type:: PyASCIIObject PyCompactUnicodeObject diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 441b3ab..e263d0b 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -352,6 +352,11 @@ Porting to Python 3.13 Deprecated ---------- +* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly + the ``wchar_t`` type instead. Since Python 3.3, ``Py_UNICODE`` and + ``PY_UNICODE_TYPE`` are just aliases to ``wchar_t``. + (Contributed by Victor Stinner in :gh:`105156`.) + Removed ------- diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 3394726..92e7afd 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -6,8 +6,8 @@ Python and represents a single Unicode element in the Unicode type. With PEP 393, Py_UNICODE is deprecated and replaced with a typedef to wchar_t. */ -#define PY_UNICODE_TYPE wchar_t -/* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE; +Py_DEPRECATED(3.13) typedef wchar_t PY_UNICODE_TYPE; +Py_DEPRECATED(3.13) typedef wchar_t Py_UNICODE; /* --- Internal Unicode Operations ---------------------------------------- */ diff --git a/Misc/NEWS.d/next/C API/2023-05-31-18-37-57.gh-issue-105156.R4El5V.rst b/Misc/NEWS.d/next/C API/2023-05-31-18-37-57.gh-issue-105156.R4El5V.rst new file mode 100644 index 0000000..cbdb837 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-05-31-18-37-57.gh-issue-105156.R4El5V.rst @@ -0,0 +1,4 @@ +Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly +the ``wchar_t`` type instead. Since Python 3.3, ``Py_UNICODE`` and +``PY_UNICODE_TYPE`` are just aliases to ``wchar_t``. Patch by Victor +Stinner. diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 30944fc..005c9ff 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -231,7 +231,7 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, /*[clinic end generated code: output=23413f68e6484bbd input=588aac967e0ba74b]*/ { #ifdef MS_WINDOWS - Py_UNICODE *widename = NULL; + wchar_t *widename = NULL; #else const char *name = NULL; #endif diff --git a/Modules/_testcapi/getargs.c b/Modules/_testcapi/getargs.c index 95ef2d2..a473c41 100644 --- a/Modules/_testcapi/getargs.c +++ b/Modules/_testcapi/getargs.c @@ -594,7 +594,7 @@ getargs_y_hash(PyObject *self, PyObject *args) static PyObject * getargs_u(PyObject *self, PyObject *args) { - Py_UNICODE *str; + wchar_t *str; if (!PyArg_ParseTuple(args, "u", &str)) { return NULL; } @@ -604,7 +604,7 @@ getargs_u(PyObject *self, PyObject *args) static PyObject * getargs_u_hash(PyObject *self, PyObject *args) { - Py_UNICODE *str; + wchar_t *str; Py_ssize_t size; if (!PyArg_ParseTuple(args, "u#", &str, &size)) { return NULL; @@ -615,7 +615,7 @@ getargs_u_hash(PyObject *self, PyObject *args) static PyObject * getargs_Z(PyObject *self, PyObject *args) { - Py_UNICODE *str; + wchar_t *str; if (!PyArg_ParseTuple(args, "Z", &str)) { return NULL; } @@ -628,7 +628,7 @@ getargs_Z(PyObject *self, PyObject *args) static PyObject * getargs_Z_hash(PyObject *self, PyObject *args) { - Py_UNICODE *str; + wchar_t *str; Py_ssize_t size; if (!PyArg_ParseTuple(args, "Z#", &str, &size)) { return NULL; diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index f94bbec..999b848 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1830,10 +1830,10 @@ typecode_to_mformat_code(char typecode) return UNSIGNED_INT8; case 'u': - if (sizeof(Py_UNICODE) == 2) { + if (sizeof(wchar_t) == 2) { return UTF16_LE + is_big_endian; } - if (sizeof(Py_UNICODE) == 4) { + if (sizeof(wchar_t) == 4) { return UTF32_LE + is_big_endian; } return UNKNOWN_FORMAT; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6f25f91..ffb4a87 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1800,14 +1800,14 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size) switch (PyUnicode_KIND(unicode)) { case PyUnicode_1BYTE_KIND: - _PyUnicode_CONVERT_BYTES(Py_UNICODE, unsigned char, + _PyUnicode_CONVERT_BYTES(wchar_t, unsigned char, u, u + size, PyUnicode_1BYTE_DATA(unicode)); break; case PyUnicode_2BYTE_KIND: #if Py_UNICODE_SIZE == 2 memcpy(PyUnicode_2BYTE_DATA(unicode), u, size * 2); #else - _PyUnicode_CONVERT_BYTES(Py_UNICODE, Py_UCS2, + _PyUnicode_CONVERT_BYTES(wchar_t, Py_UCS2, u, u + size, PyUnicode_2BYTE_DATA(unicode)); #endif break; @@ -3809,9 +3809,9 @@ PyUnicode_AsUTF8(PyObject *unicode) PyUnicode_GetSize() has been deprecated since Python 3.3 because it returned length of Py_UNICODE. -But this function is part of stable abi, because it don't +But this function is part of stable abi, because it doesn't include Py_UNICODE in signature and it was not excluded from -stable abi in PEP 384. +stable ABI in PEP 384. */ PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(PyObject *unicode) diff --git a/Python/modsupport.c b/Python/modsupport.c index e2092ac..3db95f1 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -329,7 +329,7 @@ do_mkvalue(const char **p_format, va_list *p_va) case 'u': { PyObject *v; - Py_UNICODE *u = va_arg(*p_va, Py_UNICODE *); + const wchar_t *u = va_arg(*p_va, wchar_t*); Py_ssize_t n; if (**p_format == '#') { ++*p_format; -- cgit v0.12