From 610a60c601fb4380eee30e15be1cd4dcbdaeec4c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 18 Jun 2020 17:30:53 +0900 Subject: bpo-36346: Add Py_DEPRECATED to deprecated unicode APIs (GH-20878) Co-authored-by: Kyle Stanley Co-authored-by: Victor Stinner (cherry picked from commit 2c4928d37edc5e4aeec3c0b79fa3460b1ec9b60d) --- Doc/whatsnew/3.9.rst | 11 ++++++ Include/cpython/unicodeobject.h | 45 +++++++++++----------- .../C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst | 4 ++ Modules/_testcapimodule.c | 10 +++++ Objects/unicodeobject.c | 23 +++++++++++ Python/getargs.c | 4 ++ 6 files changed, 75 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 66dce83..8412d1f 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -1097,6 +1097,12 @@ Porting to Python 3.9 internal C API (``pycore_gc.h``). (Contributed by Victor Stinner in :issue:`40241`.) +* The ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``, + :c:func:`PyUnicode_FromUnicode`, :c:func:`PyUnicode_AsUnicode`, + ``_PyUnicode_AsUnicode``, and :c:func:`PyUnicode_AsUnicodeAndSize` are + marked as deprecated in C. They have been deprecated by :pep:`393` since + Python 3.3. + (Contributed by Inada Naoki in :issue:`36346`.) Removed ------- @@ -1165,3 +1171,8 @@ Removed * Remove ``_PyUnicode_ClearStaticStrings()`` function. (Contributed by Victor Stinner in :issue:`39465`.) + +* Remove ``Py_UNICODE_MATCH``. It has been deprecated by :pep:`393`, and + broken since Python 3.3. The :c:func:`PyUnicode_Tailmatch` function can be + used instead. + (Contributed by Inada Naoki in :issue:`36346`.) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 4fd674f..a9b754a 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -50,13 +50,18 @@ extern "C" { Py_UNICODE_ISDIGIT(ch) || \ Py_UNICODE_ISNUMERIC(ch)) -#define Py_UNICODE_COPY(target, source, length) \ - memcpy((target), (source), (length)*sizeof(Py_UNICODE)) +Py_DEPRECATED(3.3) static inline void +Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) { + memcpy(target, source, length * sizeof(Py_UNICODE)); +} -#define Py_UNICODE_FILL(target, value, length) \ - do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ - for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ - } while (0) +Py_DEPRECATED(3.3) static inline void +Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) { + Py_ssize_t i; + for (i = 0; i < length; i++) { + target[i] = value; + } +} /* macros to work with surrogates */ #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) @@ -71,14 +76,6 @@ extern "C" { /* low surrogate = bottom 10 bits added to DC00 */ #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) -/* Check if substring matches at given offset. The offset must be - valid, and the substring must not be empty. */ - -#define Py_UNICODE_MATCH(string, offset, substring) \ - ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ - ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ - !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) - /* --- Unicode Type ------------------------------------------------------- */ /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject @@ -251,10 +248,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( int check_content); /* Fast access macros */ -#define PyUnicode_WSTR_LENGTH(op) \ - (PyUnicode_IS_COMPACT_ASCII(op) ? \ - ((PyASCIIObject*)op)->length : \ - ((PyCompactUnicodeObject*)op)->wstr_length) /* Returns the deprecated Py_UNICODE representation's size in code units (this includes surrogate pairs as 2 units). @@ -449,6 +442,14 @@ enum PyUnicode_Kind { (0xffffU) : \ (0x10ffffU))))) +Py_DEPRECATED(3.3) +static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) { + return PyUnicode_IS_COMPACT_ASCII(op) ? + ((PyASCIIObject*)op)->length : + ((PyCompactUnicodeObject*)op)->wstr_length; +} +#define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op) + /* === Public API ========================================================= */ /* --- Plain Py_UNICODE --------------------------------------------------- */ @@ -547,7 +548,7 @@ PyAPI_FUNC(void) _PyUnicode_FastFill( only allowed if u was set to NULL. The buffer is copied into the new object. */ -/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( +Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( const Py_UNICODE *u, /* Unicode buffer */ Py_ssize_t size /* size of buffer */ ); @@ -576,13 +577,13 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( Py_UNICODE buffer. If the wchar_t/Py_UNICODE representation is not yet available, this function will calculate it. */ -/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( +Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( PyObject *unicode /* Unicode object */ ); /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string contains null characters. */ -PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( +Py_DEPRECATED(3.3) PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( PyObject *unicode /* Unicode object */ ); @@ -591,7 +592,7 @@ PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( If the wchar_t/Py_UNICODE representation is not yet available, this function will calculate it. */ -/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( +Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( PyObject *unicode, /* Unicode object */ Py_ssize_t *size /* location where to save the length */ ); diff --git a/Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst b/Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst new file mode 100644 index 0000000..902a0e6 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-06-17-11-24-00.bpo-36346.fTMr3S.rst @@ -0,0 +1,4 @@ +Mark ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``, +``PyUnicode_FromUnicode``, ``PyUnicode_AsUnicode``, ``_PyUnicode_AsUnicode``, +and ``PyUnicode_AsUnicodeAndSize`` as deprecated in C. Remove ``Py_UNICODE_MATCH`` +which was deprecated and broken since Python 3.3. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index e0457ae..5302641 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1668,6 +1668,10 @@ exit: static volatile int x; +/* Ignore use of deprecated APIs */ +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case of an error. */ @@ -1844,6 +1848,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_RETURN_NONE; } +_Py_COMP_DIAG_POP static PyObject * unicode_aswidechar(PyObject *self, PyObject *args) @@ -2064,6 +2069,10 @@ unicode_transformdecimaltoascii(PyObject *self, PyObject *args) return PyUnicode_TransformDecimalToASCII(unicode, length); } +/* Ignore use of deprecated APIs */ +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + static PyObject * unicode_legacy_string(PyObject *self, PyObject *args) { @@ -2086,6 +2095,7 @@ unicode_legacy_string(PyObject *self, PyObject *args) return u; } +_Py_COMP_DIAG_POP static PyObject * getargs_w_star(PyObject *self, PyObject *args) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1082eb4..4c8c880 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -120,6 +120,13 @@ extern "C" { _PyUnicode_UTF8_LENGTH(op)) #define _PyUnicode_WSTR(op) \ (((PyASCIIObject*)(op))->wstr) + +/* Don't use deprecated macro of unicodeobject.h */ +#undef PyUnicode_WSTR_LENGTH +#define PyUnicode_WSTR_LENGTH(op) \ + (PyUnicode_IS_COMPACT_ASCII(op) ? \ + ((PyASCIIObject*)op)->length : \ + ((PyCompactUnicodeObject*)op)->wstr_length) #define _PyUnicode_WSTR_LENGTH(op) \ (((PyCompactUnicodeObject*)(op))->wstr_length) #define _PyUnicode_LENGTH(op) \ @@ -964,11 +971,14 @@ ensure_unicode(PyObject *obj) #include "stringlib/find_max_char.h" #include "stringlib/undef.h" +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS #include "stringlib/unicodedefs.h" #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/undef.h" +_Py_COMP_DIAG_POP /* --- Unicode Object ----------------------------------------------------- */ @@ -4087,6 +4097,11 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) return w; } +/* Deprecated APIs */ + +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + Py_UNICODE * PyUnicode_AsUnicode(PyObject *unicode) { @@ -4125,6 +4140,8 @@ PyUnicode_GetSize(PyObject *unicode) return -1; } +_Py_COMP_DIAG_POP + Py_ssize_t PyUnicode_GetLength(PyObject *unicode) { @@ -12352,6 +12369,8 @@ PyUnicode_IsIdentifier(PyObject *self) return len && i == len; } else { +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self); if (len == 0) { /* an empty string is not a valid identifier */ @@ -12389,6 +12408,7 @@ PyUnicode_IsIdentifier(PyObject *self) } } return 1; +_Py_COMP_DIAG_POP } } @@ -15944,7 +15964,10 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode) PyErr_BadArgument(); return NULL; } +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS u = PyUnicode_AsUnicodeAndSize(unicode, &len); +_Py_COMP_DIAG_POP if (u == NULL) return NULL; /* Ensure we won't overflow the size. */ diff --git a/Python/getargs.c b/Python/getargs.c index 7742428..d4a531a 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1070,6 +1070,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'u': /* raw unicode buffer (Py_UNICODE *) */ case 'Z': /* raw unicode buffer or None */ { + // TODO: Raise DeprecationWarning +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); if (*format == '#') { @@ -1109,6 +1112,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, arg, msgbuf, bufsize); } break; +_Py_COMP_DIAG_POP } case 'e': {/* encoded string */ -- cgit v0.12