From f9c9354a7a173eaca2aa19e667b5cf12167b7fed Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 May 2022 14:48:38 +0900 Subject: gh-92536: PEP 623: Remove wstr and legacy APIs from Unicode (GH-92537) --- Doc/c-api/arg.rst | 47 +- Doc/c-api/unicode.rst | 177 +--- Doc/data/stable_abi.dat | 1 - Doc/howto/clinic.rst | 8 +- Doc/whatsnew/3.12.rst | 26 +- Include/cpython/unicodeobject.h | 233 +--- Include/internal/pycore_runtime_init.h | 1 - Include/unicodeobject.h | 11 +- Lib/test/clinic.test | 8 +- Lib/test/test_getargs2.py | 10 +- Lib/test/test_sys.py | 4 +- Makefile.pre.in | 3 +- .../2022-05-10-12-35-42.gh-issue-92536.cAoRCZ.rst | 1 + Misc/stable_abi.toml | 1 + Modules/_io/fileio.c | 9 - Modules/_testcapimodule.c | 175 --- Modules/clinic/_winapi.c.h | 22 +- Modules/clinic/overlapped.c.h | 10 +- Modules/clinic/posixmodule.c.h | 20 +- Modules/overlapped.c | 14 +- Modules/posixmodule.c | 48 +- Objects/stringlib/eq.h | 11 +- Objects/stringlib/unicodedefs.h | 32 - Objects/unicodeobject.c | 1116 ++------------------ PC/_msi.c | 6 - PC/clinic/_msi.c.h | 26 +- PC/clinic/winreg.c.h | 74 +- PC/winreg.c | 23 - Python/dynload_win.c | 6 - Python/fileutils.c | 15 +- Python/getargs.c | 54 - Python/traceback.c | 14 +- Tools/clinic/clinic.py | 10 - Tools/gdb/libpython.py | 57 +- Tools/scripts/deepfreeze.py | 12 - 35 files changed, 197 insertions(+), 2088 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2022-05-10-12-35-42.gh-issue-92536.cAoRCZ.rst delete mode 100644 Objects/stringlib/unicodedefs.h diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 1d93b35..6d0ad39 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -136,48 +136,6 @@ which disallows mutable objects such as :class:`bytearray`. attempting any conversion. Raises :exc:`TypeError` if the object is not a :class:`bytearray` object. The C variable may also be declared as :c:type:`PyObject*`. -``u`` (:class:`str`) [const Py_UNICODE \*] - Convert a Python Unicode object to a C pointer to a NUL-terminated buffer of - Unicode characters. You must pass the address of a :c:type:`Py_UNICODE` - pointer variable, which will be filled with the pointer to an existing - Unicode buffer. Please note that the width of a :c:type:`Py_UNICODE` - character depends on compilation options (it is either 16 or 32 bits). - The Python string must not contain embedded null code points; if it does, - a :exc:`ValueError` exception is raised. - - .. versionchanged:: 3.5 - Previously, :exc:`TypeError` was raised when embedded null code points - were encountered in the Python string. - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using - :c:func:`PyUnicode_AsWideCharString`. - -``u#`` (:class:`str`) [const Py_UNICODE \*, :c:type:`Py_ssize_t`] - This variant on ``u`` stores into two C variables, the first one a pointer to a - Unicode data buffer, the second one its length. This variant allows - null code points. - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using - :c:func:`PyUnicode_AsWideCharString`. - -``Z`` (:class:`str` or ``None``) [const Py_UNICODE \*] - Like ``u``, but the Python object may also be ``None``, in which case the - :c:type:`Py_UNICODE` pointer is set to ``NULL``. - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using - :c:func:`PyUnicode_AsWideCharString`. - -``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, :c:type:`Py_ssize_t`] - Like ``u#``, but the Python object may also be ``None``, in which case the - :c:type:`Py_UNICODE` pointer is set to ``NULL``. - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using - :c:func:`PyUnicode_AsWideCharString`. - ``U`` (:class:`str`) [PyObject \*] Requires that the Python object is a Unicode object, without attempting any conversion. Raises :exc:`TypeError` if the object is not a Unicode @@ -247,6 +205,11 @@ which disallows mutable objects such as :class:`bytearray`. them. Instead, the implementation assumes that the byte string object uses the encoding passed in as parameter. +.. versionchanged:: 3.12 + ``u``, ``u#``, ``Z``, and ``Z#`` are removed because they used legacy ``Py_UNICODE*`` + representation. + + Numbers ------- diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 792a469..8fab3b7 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -17,26 +17,12 @@ of Unicode characters while staying memory efficient. There are special cases for strings where all code points are below 128, 256, or 65536; otherwise, code points must be below 1114112 (which is the full Unicode range). -:c:type:`Py_UNICODE*` and UTF-8 representations are created on demand and cached -in the Unicode object. The :c:type:`Py_UNICODE*` representation is deprecated -and inefficient. - -Due to the transition between the old APIs and the new APIs, Unicode objects -can internally be in two states depending on how they were created: - -* "canonical" Unicode objects are all objects created by a non-deprecated - Unicode API. They use the most efficient representation allowed by the - implementation. - -* "legacy" Unicode objects have been created through one of the deprecated - APIs (typically :c:func:`PyUnicode_FromUnicode`) and only bear the - :c:type:`Py_UNICODE*` representation; you will have to call - :c:func:`PyUnicode_READY` on them before calling any other API. +UTF-8 representation is created on demand and cached in the Unicode object. .. note:: - The "legacy" Unicode object will be removed in Python 3.12 with deprecated - APIs. All Unicode objects will be "canonical" since then. See :pep:`623` - for more information. + The :c:type:`Py_UNICODE` representation has been removed since Python 3.12 + with deprecated APIs. + See :pep:`623` for more information. Unicode Type @@ -101,18 +87,12 @@ access to internal read-only data of Unicode objects: .. c:function:: int PyUnicode_READY(PyObject *o) - Ensure the string object *o* is in the "canonical" representation. This is - required before using any of the access macros described below. - - .. XXX expand on when it is not required - - Returns ``0`` on success and ``-1`` with an exception set on failure, which in - particular happens if memory allocation fails. + Returns ``0``. This API is kept only for backward compatibility. .. versionadded:: 3.3 - .. deprecated-removed:: 3.10 3.12 - This API will be removed with :c:func:`PyUnicode_FromUnicode`. + .. deprecated:: 3.10 + This API do nothing since Python 3.12. Please remove code using this function. .. c:function:: Py_ssize_t PyUnicode_GET_LENGTH(PyObject *o) @@ -130,14 +110,12 @@ access to internal read-only data of Unicode objects: Return a pointer to the canonical representation cast to UCS1, UCS2 or UCS4 integer types for direct character access. No checks are performed if the canonical representation has the correct character size; use - :c:func:`PyUnicode_KIND` to select the right function. Make sure - :c:func:`PyUnicode_READY` has been called before accessing this. + :c:func:`PyUnicode_KIND` to select the right function. .. versionadded:: 3.3 -.. c:macro:: PyUnicode_WCHAR_KIND - PyUnicode_1BYTE_KIND +.. c:macro:: PyUnicode_1BYTE_KIND PyUnicode_2BYTE_KIND PyUnicode_4BYTE_KIND @@ -145,8 +123,8 @@ access to internal read-only data of Unicode objects: .. versionadded:: 3.3 - .. deprecated-removed:: 3.10 3.12 - ``PyUnicode_WCHAR_KIND`` is deprecated. + .. versionchanged:: 3.12 + ``PyUnicode_WCHAR_KIND`` has been removed. .. c:function:: int PyUnicode_KIND(PyObject *o) @@ -155,8 +133,6 @@ access to internal read-only data of Unicode objects: bytes per character this Unicode object uses to store its data. *o* has to be a Unicode object in the "canonical" representation (not checked). - .. XXX document "0" return value? - .. versionadded:: 3.3 @@ -208,49 +184,6 @@ access to internal read-only data of Unicode objects: .. versionadded:: 3.3 -.. c:function:: Py_ssize_t PyUnicode_GET_SIZE(PyObject *o) - - Return the size of the deprecated :c:type:`Py_UNICODE` representation, in - code units (this includes surrogate pairs as 2 units). *o* has to be a - Unicode object (not checked). - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style Unicode API, please migrate to using - :c:func:`PyUnicode_GET_LENGTH`. - - -.. c:function:: Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *o) - - Return the size of the deprecated :c:type:`Py_UNICODE` representation in - bytes. *o* has to be a Unicode object (not checked). - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style Unicode API, please migrate to using - :c:func:`PyUnicode_GET_LENGTH`. - - -.. c:function:: Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *o) - const char* PyUnicode_AS_DATA(PyObject *o) - - Return a pointer to a :c:type:`Py_UNICODE` representation of the object. The - returned buffer is always terminated with an extra null code point. It - may also contain embedded null code points, which would cause the string - to be truncated when used in most C functions. The ``AS_DATA`` form - casts the pointer to :c:type:`const char *`. The *o* argument has to be - a Unicode object (not checked). - - .. versionchanged:: 3.3 - This function is now inefficient -- because in many cases the - :c:type:`Py_UNICODE` representation does not exist and needs to be created - -- and can fail (return ``NULL`` with an exception set). Try to port the - code to use the new :c:func:`PyUnicode_nBYTE_DATA` macros or use - :c:func:`PyUnicode_WRITE` or :c:func:`PyUnicode_READ`. - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style Unicode API, please migrate to using the - :c:func:`PyUnicode_nBYTE_DATA` family of macros. - - .. c:function:: int PyUnicode_IsIdentifier(PyObject *o) Return ``1`` if the string is a valid identifier according to the language @@ -436,12 +369,17 @@ APIs: Create a Unicode object from the char buffer *u*. The bytes will be interpreted as being UTF-8 encoded. The buffer is copied into the new - object. If the buffer is not ``NULL``, the return value might be a shared - object, i.e. modification of the data is not allowed. + object. + The return value might be a shared object, i.e. modification of the data is + not allowed. - If *u* is ``NULL``, this function behaves like :c:func:`PyUnicode_FromUnicode` - with the buffer set to ``NULL``. This usage is deprecated in favor of - :c:func:`PyUnicode_New`, and will be removed in Python 3.12. + This function raises :exc:`SystemError` when: + + * *size* < 0, + * *u* is ``NULL`` and *size* > 0 + + .. versionchanged:: 3.12 + *u* == ``NULL`` with *size* > 0 is not allowed anymore. .. c:function:: PyObject *PyUnicode_FromString(const char *u) @@ -680,79 +618,6 @@ APIs: .. versionadded:: 3.3 -Deprecated Py_UNICODE APIs -"""""""""""""""""""""""""" - -.. deprecated-removed:: 3.3 3.12 - -These API functions are deprecated with the implementation of :pep:`393`. -Extension modules can continue using them, as they will not be removed in Python -3.x, but need to be aware that their use can now cause performance and memory hits. - - -.. c:function:: PyObject* PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) - - Create a Unicode object from the Py_UNICODE buffer *u* of the given size. *u* - may be ``NULL`` which causes the contents to be undefined. It is the user's - responsibility to fill in the needed data. The buffer is copied into the new - object. - - If the buffer is not ``NULL``, the return value might be a shared object. - Therefore, modification of the resulting Unicode object is only allowed when - *u* is ``NULL``. - - If the buffer is ``NULL``, :c:func:`PyUnicode_READY` must be called once the - string content has been filled before using any of the access macros such as - :c:func:`PyUnicode_KIND`. - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style Unicode API, please migrate to using - :c:func:`PyUnicode_FromKindAndData`, :c:func:`PyUnicode_FromWideChar`, or - :c:func:`PyUnicode_New`. - - -.. c:function:: Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode) - - Return a read-only pointer to the Unicode object's internal - :c:type:`Py_UNICODE` buffer, or ``NULL`` on error. This will create the - :c:type:`Py_UNICODE*` representation of the object if it is not yet - available. The buffer is always terminated with an extra null code point. - Note that the resulting :c:type:`Py_UNICODE` string may also contain - embedded null code points, which would cause the string to be truncated when - used in most C functions. - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style Unicode API, please migrate to using - :c:func:`PyUnicode_AsUCS4`, :c:func:`PyUnicode_AsWideChar`, - :c:func:`PyUnicode_ReadChar` or similar new APIs. - - -.. c:function:: Py_UNICODE* PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) - - Like :c:func:`PyUnicode_AsUnicode`, but also saves the :c:func:`Py_UNICODE` - array length (excluding the extra null terminator) in *size*. - Note that the resulting :c:type:`Py_UNICODE*` string - may contain embedded null code points, which would cause the string to be - truncated when used in most C functions. - - .. versionadded:: 3.3 - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style Unicode API, please migrate to using - :c:func:`PyUnicode_AsUCS4`, :c:func:`PyUnicode_AsWideChar`, - :c:func:`PyUnicode_ReadChar` or similar new APIs. - - -.. c:function:: Py_ssize_t PyUnicode_GetSize(PyObject *unicode) - - Return the size of the deprecated :c:type:`Py_UNICODE` representation, in - code units (this includes surrogate pairs as 2 units). - - .. deprecated-removed:: 3.3 3.12 - Part of the old-style Unicode API, please migrate to using - :c:func:`PyUnicode_GET_LENGTH`. - - .. c:function:: PyObject* PyUnicode_FromObject(PyObject *obj) Copy an instance of a Unicode subtype to a new true Unicode object if diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 5387d0b..3486f33 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -761,7 +761,6 @@ function,PyUnicode_FromStringAndSize,3.2,, function,PyUnicode_FromWideChar,3.2,, function,PyUnicode_GetDefaultEncoding,3.2,, function,PyUnicode_GetLength,3.7,, -function,PyUnicode_GetSize,3.2,, function,PyUnicode_InternFromString,3.2,, function,PyUnicode_InternImmortal,3.2,, function,PyUnicode_InternInPlace,3.2,, diff --git a/Doc/howto/clinic.rst b/Doc/howto/clinic.rst index 04b1a2c..989527b 100644 --- a/Doc/howto/clinic.rst +++ b/Doc/howto/clinic.rst @@ -848,15 +848,15 @@ on the right is the text you'd replace it with. ``'s#'`` ``str(zeroes=True)`` ``'s*'`` ``Py_buffer(accept={buffer, str})`` ``'U'`` ``unicode`` -``'u'`` ``Py_UNICODE`` -``'u#'`` ``Py_UNICODE(zeroes=True)`` +``'u'`` ``wchar_t`` +``'u#'`` ``wchar_t(zeroes=True)`` ``'w*'`` ``Py_buffer(accept={rwbuffer})`` ``'Y'`` ``PyByteArrayObject`` ``'y'`` ``str(accept={bytes})`` ``'y#'`` ``str(accept={robuffer}, zeroes=True)`` ``'y*'`` ``Py_buffer`` -``'Z'`` ``Py_UNICODE(accept={str, NoneType})`` -``'Z#'`` ``Py_UNICODE(accept={str, NoneType}, zeroes=True)`` +``'Z'`` ``wchar_t(accept={str, NoneType})`` +``'Z#'`` ``wchar_t(accept={str, NoneType}, zeroes=True)`` ``'z'`` ``str(accept={str, NoneType})`` ``'z#'`` ``str(accept={str, NoneType}, zeroes=True)`` ``'z*'`` ``Py_buffer(accept={buffer, str, NoneType})`` diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 58fcb7d..d5017c0 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -66,6 +66,9 @@ Summary -- Release highlights .. PEP-sized items next. +Important deprecations, removals or restrictions: + +* :pep:`623`, Remove wstr from Unicode New Features @@ -91,7 +94,9 @@ Improved Modules Optimizations ============= - +* Removed ``wstr`` and ``wstr_length`` members from Unicode objects. + It reduces object size by 8 or 16 bytes on 64bit platform. (:pep:`623`) + (Contributed by Inada Naoki in :gh:`92536`.) Deprecated @@ -140,6 +145,13 @@ New Features Porting to Python 3.12 ---------------------- +* Legacy Unicode APIs based on ``Py_UNICODE*`` representation has been removed. + Please migrate to APIs based on UTF-8 or ``wchar_t*``. + +* Argument parsing functions like :c:func:`PyArg_ParseTuple` doesn't support + ``Py_UNICODE*`` based format (e.g. ``u``, ``Z``) anymore. Please migrate + to other formats for Unicode like ``s``, ``z``, ``es``, and ``U``. + Deprecated ---------- @@ -150,3 +162,15 @@ Removed API. The ``token.h`` header file was only designed to be used by Python internals. (Contributed by Victor Stinner in :gh:`92651`.) + +* Leagcy Unicode APIs has been removed. See :pep:`623` for detail. + + * :c:macro:`PyUnicode_WCHAR_KIND` + * :c:func:`PyUnicode_AS_UNICODE` + * :c:func:`PyUnicode_AsUnicode` + * :c:func:`PyUnicode_AsUnicodeAndSize` + * :c:func:`PyUnicode_AS_DATA` + * :c:func:`PyUnicode_FromUnicode` + * :c:func:`PyUnicode_GET_SIZE` + * :c:func:`PyUnicode_GetSize` + * :c:func:`PyUnicode_GET_DATA_SIZE` diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 1e3bdad..8c53962 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -11,10 +11,6 @@ /* --- Internal Unicode Operations ---------------------------------------- */ -#ifndef USE_UNICODE_WCHAR_CACHE -# define USE_UNICODE_WCHAR_CACHE 1 -#endif /* USE_UNICODE_WCHAR_CACHE */ - // Static inline functions to work with surrogates static inline int Py_UNICODE_IS_SURROGATE(Py_UCS4 ch) { return (0xD800 <= ch && ch <= 0xDFFF); @@ -51,7 +47,7 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) { /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject structure. state.ascii and state.compact are set, and the data - immediately follow the structure. utf8_length and wstr_length can be found + immediately follow the structure. utf8_length can be found in the length field; the utf8 pointer is equal to the data pointer. */ typedef struct { /* There are 4 forms of Unicode strings: @@ -63,8 +59,7 @@ typedef struct { * kind = PyUnicode_1BYTE_KIND * compact = 1 * ascii = 1 - * ready = 1 - * (length is the length of the utf8 and wstr strings) + * (length is the length of the utf8) * (data starts just after the structure) * (since ASCII is decoded from UTF-8, the utf8 string are the data) @@ -75,55 +70,27 @@ typedef struct { * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or PyUnicode_4BYTE_KIND * compact = 1 - * ready = 1 * ascii = 0 * utf8 is not shared with data * utf8_length = 0 if utf8 is NULL - * wstr is shared with data and wstr_length=length - if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 - or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 - * wstr_length = 0 if wstr is NULL * (data starts just after the structure) - - legacy string, not ready: - - * structure = PyUnicodeObject - * test: kind == PyUnicode_WCHAR_KIND - * length = 0 (use wstr_length) - * hash = -1 - * kind = PyUnicode_WCHAR_KIND - * compact = 0 - * ascii = 0 - * ready = 0 - * interned = SSTATE_NOT_INTERNED - * wstr is not NULL - * data.any is NULL - * utf8 is NULL - * utf8_length = 0 - - - legacy string, ready: + - legacy string: * structure = PyUnicodeObject structure - * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND + * test: !PyUnicode_IS_COMPACT(op) * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or PyUnicode_4BYTE_KIND * compact = 0 - * ready = 1 * data.any is not NULL * utf8 is shared and utf8_length = length with data.any if ascii = 1 * utf8_length = 0 if utf8 is NULL - * wstr is shared with data.any and wstr_length = length - if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 - or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 - * wstr_length = 0 if wstr is NULL Compact strings use only one memory block (structure + characters), whereas legacy strings use one block for the structure and one block for characters. - Legacy strings are created by PyUnicode_FromUnicode() and - PyUnicode_FromStringAndSize(NULL, size) functions. They become ready - when PyUnicode_READY() is called. + Legacy strings are created by subclasses of Unicode. See also _PyUnicode_CheckConsistency(). */ @@ -142,11 +109,6 @@ typedef struct { unsigned int interned:2; /* Character size: - - PyUnicode_WCHAR_KIND (0): - - * character type = wchar_t (16 or 32 bits, depending on the - platform) - - PyUnicode_1BYTE_KIND (1): * character type = Py_UCS1 (8 bits, unsigned) @@ -177,16 +139,10 @@ typedef struct { and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is set, use the PyASCIIObject structure. */ unsigned int ascii:1; - /* The ready flag indicates whether the object layout is initialized - completely. This means that this is either a compact object, or - the data pointer is filled out. The bit is redundant, and helps - to minimize the test in PyUnicode_IS_READY(). */ - unsigned int ready:1; /* Padding to ensure that PyUnicode_DATA() is always aligned to 4 bytes (see issue #19537 on m68k). */ - unsigned int :24; + unsigned int :25; } state; - wchar_t *wstr; /* wchar_t representation (null-terminated) */ } PyASCIIObject; /* Non-ASCII strings allocated through PyUnicode_New use the @@ -197,13 +153,9 @@ typedef struct { Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the * terminating \0. */ char *utf8; /* UTF-8 representation (null-terminated) */ - Py_ssize_t wstr_length; /* Number of code points in wstr, possible - * surrogates count as two code points. */ } PyCompactUnicodeObject; -/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the - PyUnicodeObject structure. The actual string data is initially in the wstr - block, and copied into the data block using _PyUnicode_Ready. */ +/* Object format for Unicode subclasses. */ typedef struct { PyCompactUnicodeObject _base; union { @@ -247,10 +199,9 @@ static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { # define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op)) #endif -/* Fast check to determine whether an object is ready. Equivalent to: - PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */ +/* For backward compatibility */ static inline unsigned int PyUnicode_IS_READY(PyObject *op) { - return _PyASCIIObject_CAST(op)->state.ready; + return 1; } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op)) @@ -260,7 +211,6 @@ static inline unsigned int PyUnicode_IS_READY(PyObject *op) { string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be ready. */ static inline unsigned int PyUnicode_IS_ASCII(PyObject *op) { - assert(PyUnicode_IS_READY(op)); return _PyASCIIObject_CAST(op)->state.ascii; } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 @@ -286,10 +236,6 @@ static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) { #endif enum PyUnicode_Kind { -/* String contains only wstr byte characters. This is only possible - when the string was created with a legacy API and _PyUnicode_Ready() - has not been called yet. */ - PyUnicode_WCHAR_KIND = 0, /* Return values of the PyUnicode_KIND() function: */ PyUnicode_1BYTE_KIND = 1, PyUnicode_2BYTE_KIND = 2, @@ -298,8 +244,7 @@ enum PyUnicode_Kind { /* Return one of the PyUnicode_*_KIND values defined above. */ #define PyUnicode_KIND(op) \ - (assert(PyUnicode_IS_READY(op)), \ - _PyASCIIObject_CAST(op)->state.kind) + (_PyASCIIObject_CAST(op)->state.kind) /* Return a void pointer to the raw unicode buffer. */ static inline void* _PyUnicode_COMPACT_DATA(PyObject *op) { @@ -335,11 +280,8 @@ static inline void* PyUnicode_DATA(PyObject *op) { #define PyUnicode_2BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS2*, PyUnicode_DATA(op)) #define PyUnicode_4BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS4*, PyUnicode_DATA(op)) -/* Returns the length of the unicode string. The caller has to make sure that - the string has it's canonical representation set before calling - this function. Call PyUnicode_(FAST_)Ready to ensure that. */ +/* Returns the length of the unicode string. */ static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) { - assert(PyUnicode_IS_READY(op)); return _PyASCIIObject_CAST(op)->length; } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 @@ -400,7 +342,6 @@ static inline Py_UCS4 PyUnicode_READ(int kind, cache kind and use PyUnicode_READ instead. */ static inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index) { - assert(PyUnicode_IS_READY(unicode)); int kind = PyUnicode_KIND(unicode); if (kind == PyUnicode_1BYTE_KIND) { return PyUnicode_1BYTE_DATA(unicode)[index]; @@ -421,7 +362,6 @@ static inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index) than iterating over the string. */ static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op) { - assert(PyUnicode_IS_READY(op)); if (PyUnicode_IS_ASCII(op)) { return 0x7fU; } @@ -453,27 +393,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_New( Py_UCS4 maxchar /* maximum code point value in the string */ ); -/* Initializes the canonical string representation from the deprecated - wstr/Py_UNICODE representation. This function is used to convert Unicode - objects which were created using the old API to the new flexible format - introduced with PEP 393. - - Don't call this function directly, use the public PyUnicode_READY() function - instead. */ -PyAPI_FUNC(int) _PyUnicode_Ready( - PyObject *unicode /* Unicode object */ - ); - -/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best - case. If the canonical representation is not yet set, it will still call - _PyUnicode_Ready(). - Returns 0 on success and -1 on errors. */ +/* For backward compatibility */ static inline int PyUnicode_READY(PyObject *op) { - if (PyUnicode_IS_READY(op)) { - return 0; - } - return _PyUnicode_Ready(op); + return 0; } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op)) @@ -565,133 +488,6 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( Py_ssize_t start, Py_ssize_t end); -/* --- Legacy deprecated API ---------------------------------------------- */ - -/* Create a Unicode Object from the Py_UNICODE buffer u of the given - size. - - u may be NULL which causes the contents to be undefined. It is the - user's responsibility to fill in the needed data afterwards. Note - that modifying the Unicode object contents after construction is - only allowed if u was set to NULL. - - The buffer is copied into the new object. */ -Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( - const Py_UNICODE *u, /* Unicode buffer */ - Py_ssize_t size /* size of buffer */ - ); - -/* Return a read-only pointer to the Unicode object's internal - Py_UNICODE buffer. - If the wchar_t/Py_UNICODE representation is not yet available, this - function will calculate it. */ -Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( - PyObject *unicode /* Unicode object */ - ); - -/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string - contains null characters. */ -PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( - PyObject *unicode /* Unicode object */ - ); - -/* Return a read-only pointer to the Unicode object's internal - Py_UNICODE buffer and save the length at size. - If the wchar_t/Py_UNICODE representation is not yet available, this - function will calculate it. */ - -Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( - PyObject *unicode, /* Unicode object */ - Py_ssize_t *size /* location where to save the length */ - ); - - -/* Fast access macros */ - -Py_DEPRECATED(3.3) -static inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op) -{ - if (PyUnicode_IS_COMPACT_ASCII(op)) { - return _PyASCIIObject_CAST(op)->length; - } - else { - return _PyCompactUnicodeObject_CAST(op)->wstr_length; - } -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op)) -#endif - -/* Returns the deprecated Py_UNICODE representation's size in code units - (this includes surrogate pairs as 2 units). - If the Py_UNICODE representation is not available, it will be computed - on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ - -Py_DEPRECATED(3.3) -static inline Py_ssize_t PyUnicode_GET_SIZE(PyObject *op) -{ - _Py_COMP_DIAG_PUSH - _Py_COMP_DIAG_IGNORE_DEPR_DECLS - if (_PyASCIIObject_CAST(op)->wstr == _Py_NULL) { - (void)PyUnicode_AsUnicode(op); - assert(_PyASCIIObject_CAST(op)->wstr != _Py_NULL); - } - return PyUnicode_WSTR_LENGTH(op); - _Py_COMP_DIAG_POP -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define PyUnicode_GET_SIZE(op) PyUnicode_GET_SIZE(_PyObject_CAST(op)) -#endif - -Py_DEPRECATED(3.3) -static inline Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *op) -{ - _Py_COMP_DIAG_PUSH - _Py_COMP_DIAG_IGNORE_DEPR_DECLS - return PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE; - _Py_COMP_DIAG_POP -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define PyUnicode_GET_DATA_SIZE(op) PyUnicode_GET_DATA_SIZE(_PyObject_CAST(op)) -#endif - -/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE - representation on demand. Using this macro is very inefficient now, - try to port your code to use the new PyUnicode_*BYTE_DATA() macros or - use PyUnicode_WRITE() and PyUnicode_READ(). */ - -Py_DEPRECATED(3.3) -static inline Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *op) -{ - wchar_t *wstr = _PyASCIIObject_CAST(op)->wstr; - if (wstr != _Py_NULL) { - return wstr; - } - - _Py_COMP_DIAG_PUSH - _Py_COMP_DIAG_IGNORE_DEPR_DECLS - return PyUnicode_AsUnicode(op); - _Py_COMP_DIAG_POP -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define PyUnicode_AS_UNICODE(op) PyUnicode_AS_UNICODE(_PyObject_CAST(op)) -#endif - -Py_DEPRECATED(3.3) -static inline const char* PyUnicode_AS_DATA(PyObject *op) -{ - _Py_COMP_DIAG_PUSH - _Py_COMP_DIAG_IGNORE_DEPR_DECLS - Py_UNICODE *data = PyUnicode_AS_UNICODE(op); - // In C++, casting directly PyUnicode* to const char* is not valid - return _Py_STATIC_CAST(const char*, _Py_STATIC_CAST(const void*, data)); - _Py_COMP_DIAG_POP -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define PyUnicode_AS_DATA(op) PyUnicode_AS_DATA(_PyObject_CAST(op)) -#endif - - /* --- _PyUnicodeWriter API ----------------------------------------------- */ typedef struct { @@ -748,8 +544,7 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Return 0 on success, raise an exception and return -1 on error. */ #define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ - (assert((KIND) != PyUnicode_WCHAR_KIND), \ - (KIND) <= (WRITER)->kind \ + ((KIND) <= (WRITER)->kind \ ? 0 \ : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 57cacb9..737507f 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -102,7 +102,6 @@ extern "C" { .kind = 1, \ .compact = 1, \ .ascii = ASCII, \ - .ready = 1, \ }, \ } #define _PyASCIIObject_INIT(LITERAL) \ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 1d2f546..f71f379 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -171,13 +171,6 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( ); #endif -/* Get the number of Py_UNICODE units in the - string representation. */ - -Py_DEPRECATED(3.3) PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( - PyObject *unicode /* Unicode object */ - ); - #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Read a character from the string. */ @@ -198,9 +191,7 @@ PyAPI_FUNC(int) PyUnicode_WriteChar( ); #endif -/* Resize a Unicode object. The length is the number of characters, except - if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length - is the number of Py_UNICODE characters. +/* Resize a Unicode object. The length is the number of codepoints. *unicode is modified to point to the new (resized) object and 0 returned on success. diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index 9ef3610..94322f6 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -1819,17 +1819,11 @@ test_Py_UNICODE_converter(PyObject *module, PyObject *const *args, Py_ssize_t na exit: /* Cleanup for a */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)a); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for b */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)b); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for c */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)c); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1839,7 +1833,7 @@ test_Py_UNICODE_converter_impl(PyObject *module, const Py_UNICODE *a, const Py_UNICODE *b, const Py_UNICODE *c, const Py_UNICODE *d, Py_ssize_t d_length, const Py_UNICODE *e, Py_ssize_t e_length) -/*[clinic end generated code: output=45e92604de227552 input=064a3b68ad7f04b0]*/ +/*[clinic end generated code: output=4d426808cdbb3ea3 input=064a3b68ad7f04b0]*/ /*[clinic input] diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py index e0db9e4..7c11c6c 100644 --- a/Lib/test/test_getargs2.py +++ b/Lib/test/test_getargs2.py @@ -1162,7 +1162,7 @@ class SkipitemTest(unittest.TestCase): dict_b = {'b':1} keywords = ["a", "b"] - supported = ('s#', 's*', 'z#', 'z*', 'u#', 'Z#', 'y#', 'y*', 'w#', 'w*') + supported = ('s#', 's*', 'z#', 'z*', 'y#', 'y*', 'w#', 'w*') for c in string.ascii_letters: for c2 in '#*': f = c + c2 @@ -1255,14 +1255,6 @@ class Test_testcapi(unittest.TestCase): for name in dir(_testcapi) if name.startswith('test_') and name.endswith('_code')) - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_u_code(self): - _testcapi.test_u_code() - - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_Z_code(self): - _testcapi.test_Z_code() - if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 8aaf232..9c0f4a6 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1538,8 +1538,8 @@ class SizeofTest(unittest.TestCase): samples = ['1'*100, '\xff'*50, '\u0100'*40, '\uffff'*100, '\U00010000'*30, '\U0010ffff'*100] - asciifields = "nnbP" - compactfields = asciifields + "nPn" + asciifields = "nnb" + compactfields = asciifields + "nP" unicodefields = compactfields + "P" for s in samples: maxchar = ord(max(s)) diff --git a/Makefile.pre.in b/Makefile.pre.in index 869c78e..515c18c 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1405,8 +1405,7 @@ UNICODE_DEPS = \ $(srcdir)/Objects/stringlib/ucs2lib.h \ $(srcdir)/Objects/stringlib/ucs4lib.h \ $(srcdir)/Objects/stringlib/undef.h \ - $(srcdir)/Objects/stringlib/unicode_format.h \ - $(srcdir)/Objects/stringlib/unicodedefs.h + $(srcdir)/Objects/stringlib/unicode_format.h Objects/bytes_methods.o: $(srcdir)/Objects/bytes_methods.c $(BYTESTR_DEPS) Objects/bytesobject.o: $(srcdir)/Objects/bytesobject.c $(BYTESTR_DEPS) diff --git a/Misc/NEWS.d/next/C API/2022-05-10-12-35-42.gh-issue-92536.cAoRCZ.rst b/Misc/NEWS.d/next/C API/2022-05-10-12-35-42.gh-issue-92536.cAoRCZ.rst new file mode 100644 index 0000000..a0b1bc6 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-05-10-12-35-42.gh-issue-92536.cAoRCZ.rst @@ -0,0 +1 @@ +Remove legacy Unicode APIs based on ``Py_UNICODE*``. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index e34bfcd..07cce36 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -1524,6 +1524,7 @@ added = '3.2' [function.PyUnicode_GetSize] added = '3.2' + abi_only = true [function.PyUnicode_IsIdentifier] added = '3.2' [function.PyUnicode_Join] diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 8b1cff5..0085997 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -268,14 +268,7 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, if (!PyUnicode_FSDecoder(nameobj, &stringobj)) { return -1; } -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - widename = PyUnicode_AsUnicode(stringobj); -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ widename = PyUnicode_AsWideCharString(stringobj, NULL); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (widename == NULL) return -1; #else @@ -497,9 +490,7 @@ _Py_COMP_DIAG_POP done: #ifdef MS_WINDOWS -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(widename); -#endif /* USE_UNICODE_WCHAR_CACHE */ #endif Py_CLEAR(stringobj); return ret; diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index be9ed50..4371bf7 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1991,116 +1991,6 @@ exit: return return_value; } -static volatile int x; - -#if USE_UNICODE_WCHAR_CACHE -/* Ignore use of deprecated APIs */ -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - -/* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case - of an error. -*/ -static PyObject * -test_u_code(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ - PyObject *tuple, *obj; - Py_UNICODE *value; - Py_ssize_t len; - - /* issue4122: Undefined reference to _Py_ascii_whitespace on Windows */ - /* Just use the macro and check that it compiles */ - x = Py_UNICODE_ISSPACE(25); - - tuple = PyTuple_New(1); - if (tuple == NULL) - return NULL; - - obj = PyUnicode_Decode("test", strlen("test"), - "ascii", NULL); - if (obj == NULL) - return NULL; - - PyTuple_SET_ITEM(tuple, 0, obj); - - value = 0; - if (!PyArg_ParseTuple(tuple, "u:test_u_code", &value)) { - return NULL; - } - if (value != PyUnicode_AS_UNICODE(obj)) - return raiseTestError("test_u_code", - "u code returned wrong value for u'test'"); - value = 0; - if (!PyArg_ParseTuple(tuple, "u#:test_u_code", &value, &len)) { - return NULL; - } - if (value != PyUnicode_AS_UNICODE(obj) || - len != PyUnicode_GET_SIZE(obj)) - return raiseTestError("test_u_code", - "u# code returned wrong values for u'test'"); - - Py_DECREF(tuple); - Py_RETURN_NONE; -} - -/* Test Z and Z# codes for PyArg_ParseTuple */ -static PyObject * -test_Z_code(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ - PyObject *tuple, *obj; - const Py_UNICODE *value1, *value2; - Py_ssize_t len1, len2; - - tuple = PyTuple_New(2); - if (tuple == NULL) - return NULL; - - obj = PyUnicode_FromString("test"); - PyTuple_SET_ITEM(tuple, 0, obj); - Py_INCREF(Py_None); - PyTuple_SET_ITEM(tuple, 1, Py_None); - - /* swap values on purpose */ - value1 = NULL; - value2 = PyUnicode_AS_UNICODE(obj); - - /* Test Z for both values */ - if (!PyArg_ParseTuple(tuple, "ZZ:test_Z_code", &value1, &value2)) { - return NULL; - } - if (value1 != PyUnicode_AS_UNICODE(obj)) - return raiseTestError("test_Z_code", - "Z code returned wrong value for 'test'"); - if (value2 != NULL) - return raiseTestError("test_Z_code", - "Z code returned wrong value for None"); - - value1 = NULL; - value2 = PyUnicode_AS_UNICODE(obj); - len1 = -1; - len2 = -1; - - /* Test Z# for both values */ - if (!PyArg_ParseTuple(tuple, "Z#Z#:test_Z_code", &value1, &len1, - &value2, &len2)) - { - return NULL; - } - if (value1 != PyUnicode_AS_UNICODE(obj) || - len1 != PyUnicode_GET_SIZE(obj)) - return raiseTestError("test_Z_code", - "Z# code returned wrong values for 'test'"); - if (value2 != NULL || - len2 != 0) - return raiseTestError("test_Z_code", - "Z# code returned wrong values for None'"); - - Py_DECREF(tuple); - Py_RETURN_NONE; -} -_Py_COMP_DIAG_POP -#endif /* USE_UNICODE_WCHAR_CACHE */ - static PyObject * test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) { @@ -2151,35 +2041,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) else return raiseTestError("test_widechar", "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); - -#if USE_UNICODE_WCHAR_CACHE -/* Ignore use of deprecated APIs */ -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - wide = PyUnicode_FromUnicode(invalid, 1); - if (wide == NULL) - PyErr_Clear(); - else - return raiseTestError("test_widechar", - "PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail"); - - wide = PyUnicode_FromUnicode(NULL, 1); - if (wide == NULL) - return NULL; - PyUnicode_AS_UNICODE(wide)[0] = invalid[0]; - if (_PyUnicode_Ready(wide) < 0) { - Py_DECREF(wide); - PyErr_Clear(); - } - else { - Py_DECREF(wide); - return raiseTestError("test_widechar", - "PyUnicode_Ready() didn't fail"); - } -_Py_COMP_DIAG_POP -#endif /* USE_UNICODE_WCHAR_CACHE */ #endif - Py_RETURN_NONE; } @@ -2357,36 +2219,6 @@ unicode_copycharacters(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", to_copy, copied); } -#if USE_UNICODE_WCHAR_CACHE -/* Ignore use of deprecated APIs */ -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - -static PyObject * -unicode_legacy_string(PyObject *self, PyObject *args) -{ - Py_UNICODE *data; - Py_ssize_t len; - PyObject *u; - - if (!PyArg_ParseTuple(args, "u#", &data, &len)) - return NULL; - - u = PyUnicode_FromUnicode(NULL, len); - if (u == NULL) - return NULL; - - memcpy(PyUnicode_AS_UNICODE(u), data, len * sizeof(Py_UNICODE)); - - if (len > 0) { /* The empty string is always ready. */ - assert(!PyUnicode_IS_READY(u)); - } - - return u; -} -_Py_COMP_DIAG_POP -#endif /* USE_UNICODE_WCHAR_CACHE */ - static PyObject * getargs_w_star(PyObject *self, PyObject *args) { @@ -6092,10 +5924,6 @@ static PyMethodDef TestMethods[] = { {"codec_incrementaldecoder", (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, {"test_s_code", test_s_code, METH_NOARGS}, -#if USE_UNICODE_WCHAR_CACHE - {"test_u_code", test_u_code, METH_NOARGS}, - {"test_Z_code", test_Z_code, METH_NOARGS}, -#endif /* USE_UNICODE_WCHAR_CACHE */ {"test_widechar", test_widechar, METH_NOARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, @@ -6104,9 +5932,6 @@ static PyMethodDef TestMethods[] = { {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, -#if USE_UNICODE_WCHAR_CACHE - {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, -#endif /* USE_UNICODE_WCHAR_CACHE */ {"_test_thread_state", test_thread_state, METH_VARARGS}, {"_pending_threadfunc", pending_threadfunc, METH_VARARGS}, #ifdef HAVE_GETTIMEOFDAY diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 9c83d0b..4d89888 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -210,9 +210,7 @@ _winapi_CreateFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t na exit: /* Cleanup for name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -243,11 +241,7 @@ _winapi_CreateJunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs _PyArg_BadArgument("CreateJunction", "argument 1", "str", args[0]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - src_path = _PyUnicode_AsUnicode(args[0]); - #else /* USE_UNICODE_WCHAR_CACHE */ src_path = PyUnicode_AsWideCharString(args[0], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (src_path == NULL) { goto exit; } @@ -255,11 +249,7 @@ _winapi_CreateJunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs _PyArg_BadArgument("CreateJunction", "argument 2", "str", args[1]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - dst_path = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ dst_path = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (dst_path == NULL) { goto exit; } @@ -267,13 +257,9 @@ _winapi_CreateJunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs exit: /* Cleanup for src_path */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)src_path); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for dst_path */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)dst_path); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -412,13 +398,9 @@ _winapi_CreateProcess(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for application_name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)application_name); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for current_directory */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)current_directory); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -767,9 +749,7 @@ _winapi_OpenFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t narg exit: /* Cleanup for name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1184,4 +1164,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args exit: return return_value; } -/*[clinic end generated code: output=a4ede01aede352a4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b007dde2e7f2fff8 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/overlapped.c.h b/Modules/clinic/overlapped.c.h index 2861338..cac44d0 100644 --- a/Modules/clinic/overlapped.c.h +++ b/Modules/clinic/overlapped.c.h @@ -230,9 +230,7 @@ _overlapped_CreateEvent(PyObject *module, PyObject *const *args, Py_ssize_t narg exit: /* Cleanup for Name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)Name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -812,11 +810,7 @@ _overlapped_Overlapped_ConnectPipe(OverlappedObject *self, PyObject *arg) _PyArg_BadArgument("ConnectPipe", "argument", "str", arg); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - Address = _PyUnicode_AsUnicode(arg); - #else /* USE_UNICODE_WCHAR_CACHE */ Address = PyUnicode_AsWideCharString(arg, NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (Address == NULL) { goto exit; } @@ -824,9 +818,7 @@ _overlapped_Overlapped_ConnectPipe(OverlappedObject *self, PyObject *arg) exit: /* Cleanup for Address */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)Address); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -968,4 +960,4 @@ exit: return return_value; } -/*[clinic end generated code: output=b0f15f5c09f1147e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9078d9f9984864a2 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index d62b09e..1ce7d86 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1760,11 +1760,7 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k _PyArg_BadArgument("system", "argument 'command'", "str", args[0]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - command = _PyUnicode_AsUnicode(args[0]); - #else /* USE_UNICODE_WCHAR_CACHE */ command = PyUnicode_AsWideCharString(args[0], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (command == NULL) { goto exit; } @@ -1776,9 +1772,7 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k exit: /* Cleanup for command */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)command); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -7264,11 +7258,7 @@ os_startfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("startfile", "argument 'operation'", "str", args[1]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - operation = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ operation = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (operation == NULL) { goto exit; } @@ -7281,11 +7271,7 @@ os_startfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("startfile", "argument 'arguments'", "str", args[2]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - arguments = _PyUnicode_AsUnicode(args[2]); - #else /* USE_UNICODE_WCHAR_CACHE */ arguments = PyUnicode_AsWideCharString(args[2], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (arguments == NULL) { goto exit; } @@ -7312,13 +7298,9 @@ exit: /* Cleanup for filepath */ path_cleanup(&filepath); /* Cleanup for operation */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)operation); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for arguments */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)arguments); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for cwd */ path_cleanup(&cwd); @@ -9370,4 +9352,4 @@ exit: #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=6150bcc25f5e4bc7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bae15f09a1b3d2e7 input=a9049054013a1b77]*/ diff --git a/Modules/overlapped.c b/Modules/overlapped.c index 74fba83..e40a524 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -1346,7 +1346,7 @@ static int parse_address(PyObject *obj, SOCKADDR *Address, int Length) { PyObject *Host_obj; - Py_UNICODE *Host; + wchar_t *Host; unsigned short Port; unsigned long FlowInfo; unsigned long ScopeId; @@ -1358,11 +1358,7 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) if (!PyArg_ParseTuple(obj, "UH", &Host_obj, &Port)) { return -1; } -#if USE_UNICODE_WCHAR_CACHE - Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); -#else /* USE_UNICODE_WCHAR_CACHE */ Host = PyUnicode_AsWideCharString(Host_obj, NULL); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (Host == NULL) { return -1; } @@ -1374,9 +1370,7 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) else { ((SOCKADDR_IN*)Address)->sin_port = htons(Port); } -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(Host); -#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } case 4: { @@ -1386,11 +1380,7 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) { return -1; } -#if USE_UNICODE_WCHAR_CACHE - Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); -#else /* USE_UNICODE_WCHAR_CACHE */ Host = PyUnicode_AsWideCharString(Host_obj, NULL); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (Host == NULL) { return -1; } @@ -1404,9 +1394,7 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) ((SOCKADDR_IN6*)Address)->sin6_flowinfo = FlowInfo; ((SOCKADDR_IN6*)Address)->sin6_scope_id = ScopeId; } -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(Host); -#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } default: diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 4015889..0a72aca 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1098,11 +1098,9 @@ typedef struct { static void path_cleanup(path_t *path) { -#if !USE_UNICODE_WCHAR_CACHE wchar_t *wide = (wchar_t *)path->wide; path->wide = NULL; PyMem_Free(wide); -#endif /* USE_UNICODE_WCHAR_CACHE */ Py_CLEAR(path->object); Py_CLEAR(path->cleanup); } @@ -1190,14 +1188,7 @@ path_converter(PyObject *o, void *p) if (is_unicode) { #ifdef MS_WINDOWS -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - wide = PyUnicode_AsUnicodeAndSize(o, &length); -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ wide = PyUnicode_AsWideCharString(o, &length); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1213,9 +1204,7 @@ _Py_COMP_DIAG_POP path->wide = wide; path->narrow = FALSE; path->fd = -1; -#if !USE_UNICODE_WCHAR_CACHE wide = NULL; -#endif /* USE_UNICODE_WCHAR_CACHE */ goto success_exit; #else if (!PyUnicode_FSConverter(o, &bytes)) { @@ -1291,15 +1280,8 @@ _Py_COMP_DIAG_POP goto error_exit; } -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - wide = PyUnicode_AsUnicodeAndSize(wo, &length); -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ wide = PyUnicode_AsWideCharString(wo, &length); Py_DECREF(wo); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1314,11 +1296,7 @@ _Py_COMP_DIAG_POP path->wide = wide; path->narrow = TRUE; Py_DECREF(bytes); -#if USE_UNICODE_WCHAR_CACHE - path->cleanup = wo; -#else /* USE_UNICODE_WCHAR_CACHE */ wide = NULL; -#endif /* USE_UNICODE_WCHAR_CACHE */ #else path->wide = NULL; path->narrow = narrow; @@ -1342,11 +1320,7 @@ _Py_COMP_DIAG_POP Py_XDECREF(o); Py_XDECREF(bytes); #ifdef MS_WINDOWS -#if USE_UNICODE_WCHAR_CACHE - Py_XDECREF(wo); -#else /* USE_UNICODE_WCHAR_CACHE */ PyMem_Free(wide); -#endif /* USE_UNICODE_WCHAR_CACHE */ #endif return 0; } @@ -13575,15 +13549,8 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) #ifdef MS_WINDOWS if (!PyUnicode_FSDecoder(self->path, &ub)) return NULL; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - const wchar_t *path = PyUnicode_AsUnicode(ub); -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *path = PyUnicode_AsWideCharString(ub, NULL); Py_DECREF(ub); -#endif /* USE_UNICODE_WCHAR_CACHE */ #else /* POSIX */ if (!PyUnicode_FSConverter(self->path, &ub)) return NULL; @@ -13616,11 +13583,11 @@ _Py_COMP_DIAG_POP } Py_END_ALLOW_THREADS } -#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE +#if defined(MS_WINDOWS) PyMem_Free(path); -#else /* USE_UNICODE_WCHAR_CACHE */ +#else Py_DECREF(ub); -#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif if (result != 0) return path_object_error(self->path); @@ -13814,19 +13781,10 @@ os_DirEntry_inode_impl(DirEntry *self) if (!PyUnicode_FSDecoder(self->path, &unicode)) return NULL; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - const wchar_t *path = PyUnicode_AsUnicode(unicode); - result = LSTAT(path, &stat); - Py_DECREF(unicode); -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *path = PyUnicode_AsWideCharString(unicode, NULL); Py_DECREF(unicode); result = LSTAT(path, &stat); PyMem_Free(path); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (result != 0) return path_object_error(self->path); diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h index 9c1058b..2f24388 100644 --- a/Objects/stringlib/eq.h +++ b/Objects/stringlib/eq.h @@ -4,15 +4,10 @@ * unicode_eq() is called when the hash of two unicode objects is equal. */ Py_LOCAL_INLINE(int) -unicode_eq(PyObject *aa, PyObject *bb) +unicode_eq(PyObject *a, PyObject *b) { - assert(PyUnicode_Check(aa)); - assert(PyUnicode_Check(bb)); - assert(PyUnicode_IS_READY(aa)); - assert(PyUnicode_IS_READY(bb)); - - PyUnicodeObject *a = (PyUnicodeObject *)aa; - PyUnicodeObject *b = (PyUnicodeObject *)bb; + assert(PyUnicode_Check(a)); + assert(PyUnicode_Check(b)); if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b)) return 0; diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h deleted file mode 100644 index ba2ce0a..0000000 --- a/Objects/stringlib/unicodedefs.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef STRINGLIB_UNICODEDEFS_H -#define STRINGLIB_UNICODEDEFS_H - -/* this is sort of a hack. there's at least one place (formatting - floats) where some stringlib code takes a different path if it's - compiled as unicode. */ -#define STRINGLIB_IS_UNICODE 1 - -#define FASTSEARCH fastsearch -#define STRINGLIB(F) stringlib_##F -#define STRINGLIB_OBJECT PyUnicodeObject -#define STRINGLIB_SIZEOF_CHAR Py_UNICODE_SIZE -#define STRINGLIB_CHAR Py_UNICODE -#define STRINGLIB_TYPE_NAME "unicode" -#define STRINGLIB_PARSE_CODE "U" -#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE -#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK -#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL -#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL -#define STRINGLIB_STR PyUnicode_AS_UNICODE -#define STRINGLIB_LEN PyUnicode_GET_SIZE -#define STRINGLIB_NEW PyUnicode_FromUnicode -#define STRINGLIB_CHECK PyUnicode_Check -#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#define STRINGLIB_MUTABLE 0 - -#define STRINGLIB_TOSTR PyObject_Str -#define STRINGLIB_TOASCII PyObject_ASCII - -#define STRINGLIB_WANT_CONTAINS_OBJ 1 - -#endif /* !STRINGLIB_UNICODEDEFS_H */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 656c7cc..cc50fcd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -115,7 +115,6 @@ extern "C" { (_PyCompactUnicodeObject_CAST(op)->utf8) #define PyUnicode_UTF8(op) \ (assert(_PyUnicode_CHECK(op)), \ - assert(PyUnicode_IS_READY(op)), \ PyUnicode_IS_COMPACT_ASCII(op) ? \ ((char*)(_PyASCIIObject_CAST(op) + 1)) : \ _PyUnicode_UTF8(op)) @@ -123,21 +122,10 @@ extern "C" { (_PyCompactUnicodeObject_CAST(op)->utf8_length) #define PyUnicode_UTF8_LENGTH(op) \ (assert(_PyUnicode_CHECK(op)), \ - assert(PyUnicode_IS_READY(op)), \ PyUnicode_IS_COMPACT_ASCII(op) ? \ _PyASCIIObject_CAST(op)->length : \ _PyUnicode_UTF8_LENGTH(op)) -#define _PyUnicode_WSTR(op) \ - (_PyASCIIObject_CAST(op)->wstr) - -/* Don't use deprecated macro of unicodeobject.h */ -#undef PyUnicode_WSTR_LENGTH -#define PyUnicode_WSTR_LENGTH(op) \ - (PyUnicode_IS_COMPACT_ASCII(op) ? \ - _PyASCIIObject_CAST(op)->length : \ - _PyCompactUnicodeObject_CAST(op)->wstr_length) -#define _PyUnicode_WSTR_LENGTH(op) \ - (_PyCompactUnicodeObject_CAST(op)->wstr_length) + #define _PyUnicode_LENGTH(op) \ (_PyASCIIObject_CAST(op)->length) #define _PyUnicode_STATE(op) \ @@ -153,20 +141,10 @@ extern "C" { #define _PyUnicode_DATA_ANY(op) \ (_PyUnicodeObject_CAST(op)->data.any) -#undef PyUnicode_READY -#define PyUnicode_READY(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (PyUnicode_IS_READY(op) ? \ - 0 : \ - _PyUnicode_Ready(op))) - #define _PyUnicode_SHARE_UTF8(op) \ (assert(_PyUnicode_CHECK(op)), \ assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) -#define _PyUnicode_SHARE_WSTR(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op))) /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ @@ -175,13 +153,6 @@ extern "C" { && _PyUnicode_UTF8(op) \ && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) -/* true if the Unicode object has an allocated wstr memory block - (not shared with other data) */ -#define _PyUnicode_HAS_WSTR_MEMORY(op) \ - ((_PyUnicode_WSTR(op) && \ - (!PyUnicode_IS_READY(op) || \ - _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) - /* Generic helper macro to convert characters of different types. from_type and to_type have to be valid type names, begin and end are pointers to the source characters which should be of type @@ -280,7 +251,6 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value, Py_ssize_t start, Py_ssize_t length) { assert(0 <= start); - assert(kind != PyUnicode_WCHAR_KIND); switch (kind) { case PyUnicode_1BYTE_KIND: { assert(value <= 0xff); @@ -342,7 +312,6 @@ const unsigned char _Py_ascii_whitespace[] = { }; /* forward */ -static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); static PyObject* get_latin1_char(unsigned char ch); static int unicode_modifiable(PyObject *unicode); @@ -518,7 +487,6 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) if (ascii->state.ascii == 1 && ascii->state.compact == 1) { CHECK(kind == PyUnicode_1BYTE_KIND); - CHECK(ascii->state.ready == 1); } else { PyCompactUnicodeObject *compact = _PyCompactUnicodeObject_CAST(op); @@ -530,62 +498,32 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND); CHECK(ascii->state.ascii == 0); - CHECK(ascii->state.ready == 1); CHECK(compact->utf8 != data); } else { PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op); data = unicode->data.any; - if (kind == PyUnicode_WCHAR_KIND) { - CHECK(ascii->length == 0); - CHECK(ascii->hash == -1); - CHECK(ascii->state.compact == 0); - CHECK(ascii->state.ascii == 0); - CHECK(ascii->state.ready == 0); - CHECK(ascii->state.interned == SSTATE_NOT_INTERNED); - CHECK(ascii->wstr != NULL); - CHECK(data == NULL); - CHECK(compact->utf8 == NULL); + CHECK(kind == PyUnicode_1BYTE_KIND + || kind == PyUnicode_2BYTE_KIND + || kind == PyUnicode_4BYTE_KIND); + CHECK(ascii->state.compact == 0); + CHECK(data != NULL); + if (ascii->state.ascii) { + CHECK(compact->utf8 == data); + CHECK(compact->utf8_length == ascii->length); } else { - CHECK(kind == PyUnicode_1BYTE_KIND - || kind == PyUnicode_2BYTE_KIND - || kind == PyUnicode_4BYTE_KIND); - CHECK(ascii->state.compact == 0); - CHECK(ascii->state.ready == 1); - CHECK(data != NULL); - if (ascii->state.ascii) { - CHECK(compact->utf8 == data); - CHECK(compact->utf8_length == ascii->length); - } - else - CHECK(compact->utf8 != data); + CHECK(compact->utf8 != data); } } - if (kind != PyUnicode_WCHAR_KIND) { - if ( -#if SIZEOF_WCHAR_T == 2 - kind == PyUnicode_2BYTE_KIND -#else - kind == PyUnicode_4BYTE_KIND -#endif - ) - { - CHECK(ascii->wstr == data); - CHECK(compact->wstr_length == ascii->length); - } else - CHECK(ascii->wstr != data); - } if (compact->utf8 == NULL) CHECK(compact->utf8_length == 0); - if (ascii->wstr == NULL) - CHECK(compact->wstr_length == 0); } /* check that the best kind is used: O(n) operation */ - if (check_content && kind != PyUnicode_WCHAR_KIND) { + if (check_content) { Py_ssize_t i; Py_UCS4 maxchar = 0; const void *data; @@ -621,47 +559,12 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) #undef CHECK } - -static PyObject* -unicode_result_wchar(PyObject *unicode) -{ -#ifndef Py_DEBUG - Py_ssize_t len; - - len = _PyUnicode_WSTR_LENGTH(unicode); - if (len == 0) { - Py_DECREF(unicode); - _Py_RETURN_UNICODE_EMPTY(); - } - - if (len == 1) { - wchar_t ch = _PyUnicode_WSTR(unicode)[0]; - if ((Py_UCS4)ch < 256) { - Py_DECREF(unicode); - return get_latin1_char((unsigned char)ch); - } - } - - if (_PyUnicode_Ready(unicode) < 0) { - Py_DECREF(unicode); - return NULL; - } -#else - assert(Py_REFCNT(unicode) == 1); - - /* don't make the result ready in debug mode to ensure that the caller - makes the string ready before using it */ - assert(_PyUnicode_CheckConsistency(unicode, 1)); -#endif - return unicode; -} - static PyObject* -unicode_result_ready(PyObject *unicode) +unicode_result(PyObject *unicode) { - Py_ssize_t length; + assert(_PyUnicode_CHECK(unicode)); - length = PyUnicode_GET_LENGTH(unicode); + Py_ssize_t length = PyUnicode_GET_LENGTH(unicode); if (length == 0) { PyObject *empty = unicode_get_empty(); if (unicode != empty) { @@ -690,21 +593,9 @@ unicode_result_ready(PyObject *unicode) } static PyObject* -unicode_result(PyObject *unicode) -{ - assert(_PyUnicode_CHECK(unicode)); - if (PyUnicode_IS_READY(unicode)) - return unicode_result_ready(unicode); - else - return unicode_result_wchar(unicode); -} - -static PyObject* unicode_result_unchanged(PyObject *unicode) { if (PyUnicode_CheckExact(unicode)) { - if (PyUnicode_READY(unicode) == -1) - return NULL; Py_INCREF(unicode); return unicode; } @@ -724,7 +615,6 @@ backslashreplace(_PyBytesWriter *writer, char *str, enum PyUnicode_Kind kind; const void *data; - assert(PyUnicode_IS_READY(unicode)); kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); @@ -791,7 +681,6 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, enum PyUnicode_Kind kind; const void *data; - assert(PyUnicode_IS_READY(unicode)); kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); @@ -915,7 +804,7 @@ ensure_unicode(PyObject *obj) Py_TYPE(obj)->tp_name); return -1; } - return PyUnicode_READY(obj); + return 0; } /* Compilation of templated routines */ @@ -961,15 +850,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find_max_char.h" #include "stringlib/undef.h" -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS -#include "stringlib/unicodedefs.h" -#include "stringlib/fastsearch.h" -#include "stringlib/count.h" -#include "stringlib/find.h" -#include "stringlib/undef.h" -_Py_COMP_DIAG_POP - #undef STRINGLIB_GET_EMPTY /* --- Unicode Object ----------------------------------------------------- */ @@ -1029,14 +909,12 @@ resize_compact(PyObject *unicode, Py_ssize_t length) Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; - int share_wstr; PyObject *new_unicode; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); #endif assert(unicode_modifiable(unicode)); - assert(PyUnicode_IS_READY(unicode)); assert(PyUnicode_IS_COMPACT(unicode)); char_size = PyUnicode_KIND(unicode); @@ -1044,7 +922,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) struct_size = sizeof(PyASCIIObject); else struct_size = sizeof(PyCompactUnicodeObject); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); @@ -1074,17 +951,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _Py_NewReference(unicode); _PyUnicode_LENGTH(unicode) = length; - if (share_wstr) { - _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); - if (!PyUnicode_IS_ASCII(unicode)) - _PyUnicode_WSTR_LENGTH(unicode) = length; - } - else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_WSTR(unicode)); - _PyUnicode_WSTR(unicode) = NULL; - if (!PyUnicode_IS_ASCII(unicode)) - _PyUnicode_WSTR_LENGTH(unicode) = 0; - } #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif @@ -1097,78 +963,55 @@ resize_compact(PyObject *unicode, Py_ssize_t length) static int resize_inplace(PyObject *unicode, Py_ssize_t length) { - wchar_t *wstr; - Py_ssize_t new_size; assert(!PyUnicode_IS_COMPACT(unicode)); assert(Py_REFCNT(unicode) == 1); - if (PyUnicode_IS_READY(unicode)) { - Py_ssize_t char_size; - int share_wstr, share_utf8; - void *data; + Py_ssize_t new_size; + Py_ssize_t char_size; + int share_utf8; + void *data; #ifdef Py_DEBUG - Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); + Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); #endif - data = _PyUnicode_DATA_ANY(unicode); - char_size = PyUnicode_KIND(unicode); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); - share_utf8 = _PyUnicode_SHARE_UTF8(unicode); + data = _PyUnicode_DATA_ANY(unicode); + char_size = PyUnicode_KIND(unicode); + share_utf8 = _PyUnicode_SHARE_UTF8(unicode); - if (length > (PY_SSIZE_T_MAX / char_size - 1)) { - PyErr_NoMemory(); - return -1; - } - new_size = (length + 1) * char_size; + if (length > (PY_SSIZE_T_MAX / char_size - 1)) { + PyErr_NoMemory(); + return -1; + } + new_size = (length + 1) * char_size; - if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) - { - PyObject_Free(_PyUnicode_UTF8(unicode)); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - } + if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) + { + PyObject_Free(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } - data = (PyObject *)PyObject_Realloc(data, new_size); - if (data == NULL) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_DATA_ANY(unicode) = data; - if (share_wstr) { - _PyUnicode_WSTR(unicode) = data; - _PyUnicode_WSTR_LENGTH(unicode) = length; - } - if (share_utf8) { - _PyUnicode_UTF8(unicode) = data; - _PyUnicode_UTF8_LENGTH(unicode) = length; - } - _PyUnicode_LENGTH(unicode) = length; - PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); + data = (PyObject *)PyObject_Realloc(data, new_size); + if (data == NULL) { + PyErr_NoMemory(); + return -1; + } + _PyUnicode_DATA_ANY(unicode) = data; + if (share_utf8) { + _PyUnicode_UTF8(unicode) = data; + _PyUnicode_UTF8_LENGTH(unicode) = length; + } + _PyUnicode_LENGTH(unicode) = length; + PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); #ifdef Py_DEBUG - unicode_fill_invalid(unicode, old_length); + unicode_fill_invalid(unicode, old_length); #endif - if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { - assert(_PyUnicode_CheckConsistency(unicode, 0)); - return 0; - } - } - assert(_PyUnicode_WSTR(unicode) != NULL); /* check for integer overflow */ if (length > PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1) { PyErr_NoMemory(); return -1; } - new_size = sizeof(wchar_t) * (length + 1); - wstr = _PyUnicode_WSTR(unicode); - wstr = PyObject_Realloc(wstr, new_size); - if (!wstr) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_WSTR(unicode) = wstr; - _PyUnicode_WSTR(unicode)[length] = 0; - _PyUnicode_WSTR_LENGTH(unicode) = length; assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; } @@ -1177,99 +1020,15 @@ static PyObject* resize_copy(PyObject *unicode, Py_ssize_t length) { Py_ssize_t copy_length; - if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) { - PyObject *copy; - - assert(PyUnicode_IS_READY(unicode)); - - copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); - if (copy == NULL) - return NULL; - - copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); - return copy; - } - else { - PyObject *w; - - w = (PyObject*)_PyUnicode_New(length); - if (w == NULL) - return NULL; - copy_length = _PyUnicode_WSTR_LENGTH(unicode); - copy_length = Py_MIN(copy_length, length); - memcpy(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode), - copy_length * sizeof(wchar_t)); - return w; - } -} - -/* We allocate one more byte to make sure the string is - Ux0000 terminated; some code (e.g. new_identifier) - relies on that. - - XXX This allocator could further be enhanced by assuring that the - free list never reduces its size below 1. - -*/ - -static PyUnicodeObject * -_PyUnicode_New(Py_ssize_t length) -{ - PyUnicodeObject *unicode; - size_t new_size; - - /* Optimization for empty strings */ - if (length == 0) { - return (PyUnicodeObject *)unicode_new_empty(); - } - - /* Ensure we won't overflow the size. */ - if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { - return (PyUnicodeObject *)PyErr_NoMemory(); - } - if (length < 0) { - PyErr_SetString(PyExc_SystemError, - "Negative size passed to _PyUnicode_New"); - return NULL; - } - - unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); - if (unicode == NULL) - return NULL; - new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + PyObject *copy; - _PyUnicode_WSTR_LENGTH(unicode) = length; - _PyUnicode_HASH(unicode) = -1; - _PyUnicode_STATE(unicode).interned = 0; - _PyUnicode_STATE(unicode).kind = 0; - _PyUnicode_STATE(unicode).compact = 0; - _PyUnicode_STATE(unicode).ready = 0; - _PyUnicode_STATE(unicode).ascii = 0; - _PyUnicode_DATA_ANY(unicode) = NULL; - _PyUnicode_LENGTH(unicode) = 0; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - - _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_Malloc(new_size); - if (!_PyUnicode_WSTR(unicode)) { - Py_DECREF(unicode); - PyErr_NoMemory(); + copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); + if (copy == NULL) return NULL; - } - /* Initialize the first element to guard against cases where - * the caller fails before initializing str -- unicode_resize() - * reads str[0], and the Keep-Alive optimization can keep memory - * allocated for str alive across a call to unicode_dealloc(unicode). - * We don't want unicode_resize to read uninitialized memory in - * that case. - */ - _PyUnicode_WSTR(unicode)[0] = 0; - _PyUnicode_WSTR(unicode)[length] = 0; - - assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); - return unicode; + copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); + return copy; } static const char* @@ -1279,8 +1038,6 @@ unicode_kind_name(PyObject *unicode) _PyUnicode_Dump() */ if (!PyUnicode_IS_COMPACT(unicode)) { - if (!PyUnicode_IS_READY(unicode)) - return "wstr"; switch (PyUnicode_KIND(unicode)) { case PyUnicode_1BYTE_KIND: @@ -1296,7 +1053,6 @@ unicode_kind_name(PyObject *unicode) return ""; } } - assert(PyUnicode_IS_READY(unicode)); switch (PyUnicode_KIND(unicode)) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(unicode)) @@ -1353,15 +1109,7 @@ _PyUnicode_Dump(PyObject *op) data = unicode->data.any; printf("%s: len=%zu, ", unicode_kind_name(op), ascii->length); - if (ascii->wstr == data) - printf("shared "); - printf("wstr=%p", (void *)ascii->wstr); - - if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) { - printf(" (%zu), ", compact->wstr_length); - if (!ascii->state.compact && compact->utf8 == unicode->data.any) { - printf("shared "); - } + if (!ascii->state.ascii) { printf("utf8=%p (%zu)", (void *)compact->utf8, compact->utf8_length); } printf(", data=%p\n", data); @@ -1381,12 +1129,11 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) PyCompactUnicodeObject *unicode; void *data; enum PyUnicode_Kind kind; - int is_sharing, is_ascii; + int is_ascii; Py_ssize_t char_size; Py_ssize_t struct_size; is_ascii = 0; - is_sharing = 0; struct_size = sizeof(PyCompactUnicodeObject); if (maxchar < 128) { kind = PyUnicode_1BYTE_KIND; @@ -1401,8 +1148,6 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) else if (maxchar < 65536) { kind = PyUnicode_2BYTE_KIND; char_size = 2; - if (sizeof(wchar_t) == 2) - is_sharing = 1; } else { if (maxchar > MAX_UNICODE) { @@ -1412,8 +1157,6 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) } kind = PyUnicode_4BYTE_KIND; char_size = 4; - if (sizeof(wchar_t) == 4) - is_sharing = 1; } /* Ensure we won't overflow the size. */ @@ -1445,16 +1188,12 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) _PyUnicode_STATE(unicode).interned = 0; _PyUnicode_STATE(unicode).kind = kind; _PyUnicode_STATE(unicode).compact = 1; - _PyUnicode_STATE(unicode).ready = 1; _PyUnicode_STATE(unicode).ascii = is_ascii; if (is_ascii) { ((char*)data)[size] = 0; - _PyUnicode_WSTR(unicode) = NULL; } else if (kind == PyUnicode_1BYTE_KIND) { ((char*)data)[size] = 0; - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; unicode->utf8 = NULL; unicode->utf8_length = 0; } @@ -1465,14 +1204,6 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) ((Py_UCS2*)data)[size] = 0; else /* kind == PyUnicode_4BYTE_KIND */ ((Py_UCS4*)data)[size] = 0; - if (is_sharing) { - _PyUnicode_WSTR_LENGTH(unicode) = size; - _PyUnicode_WSTR(unicode) = (wchar_t *)data; - } - else { - _PyUnicode_WSTR_LENGTH(unicode) = 0; - _PyUnicode_WSTR(unicode) = NULL; - } } #ifdef Py_DEBUG unicode_fill_invalid((PyObject*)unicode, 0); @@ -1545,11 +1276,9 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, assert(0 <= from_start); assert(0 <= to_start); assert(PyUnicode_Check(from)); - assert(PyUnicode_IS_READY(from)); assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); assert(PyUnicode_Check(to)); - assert(PyUnicode_IS_READY(to)); assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); if (how_many == 0) @@ -1694,11 +1423,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, return -1; } - if (PyUnicode_READY(from) == -1) - return -1; - if (PyUnicode_READY(to) == -1) - return -1; - if ((size_t)from_start > (size_t)PyUnicode_GET_LENGTH(from)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return -1; @@ -1783,135 +1507,6 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, return 0; } -int -_PyUnicode_Ready(PyObject *unicode) -{ - wchar_t *end; - Py_UCS4 maxchar = 0; - Py_ssize_t num_surrogates; -#if SIZEOF_WCHAR_T == 2 - Py_ssize_t length_wo_surrogates; -#endif - - /* _PyUnicode_Ready() is only intended for old-style API usage where - strings were created using _PyObject_New() and where no canonical - representation (the str field) has been set yet aka strings - which are not yet ready. */ - assert(_PyUnicode_CHECK(unicode)); - assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND); - assert(_PyUnicode_WSTR(unicode) != NULL); - assert(_PyUnicode_DATA_ANY(unicode) == NULL); - assert(_PyUnicode_UTF8(unicode) == NULL); - /* Actually, it should neither be interned nor be anything else: */ - assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED); - - end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); - if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, - &maxchar, &num_surrogates) == -1) - return -1; - - if (maxchar < 256) { - _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc(_PyUnicode_WSTR_LENGTH(unicode) + 1); - if (!_PyUnicode_DATA_ANY(unicode)) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_CONVERT_BYTES(wchar_t, unsigned char, - _PyUnicode_WSTR(unicode), end, - PyUnicode_1BYTE_DATA(unicode)); - PyUnicode_1BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0'; - _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - _PyUnicode_STATE(unicode).kind = PyUnicode_1BYTE_KIND; - if (maxchar < 128) { - _PyUnicode_STATE(unicode).ascii = 1; - _PyUnicode_UTF8(unicode) = _PyUnicode_DATA_ANY(unicode); - _PyUnicode_UTF8_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - } - else { - _PyUnicode_STATE(unicode).ascii = 0; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - } - PyObject_Free(_PyUnicode_WSTR(unicode)); - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; - } - /* In this case we might have to convert down from 4-byte native - wchar_t to 2-byte unicode. */ - else if (maxchar < 65536) { - assert(num_surrogates == 0 && - "FindMaxCharAndNumSurrogatePairs() messed up"); - -#if SIZEOF_WCHAR_T == 2 - /* We can share representations and are done. */ - _PyUnicode_DATA_ANY(unicode) = _PyUnicode_WSTR(unicode); - PyUnicode_2BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0'; - _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; -#else - /* sizeof(wchar_t) == 4 */ - _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc( - 2 * (_PyUnicode_WSTR_LENGTH(unicode) + 1)); - if (!_PyUnicode_DATA_ANY(unicode)) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_CONVERT_BYTES(wchar_t, Py_UCS2, - _PyUnicode_WSTR(unicode), end, - PyUnicode_2BYTE_DATA(unicode)); - PyUnicode_2BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0'; - _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - PyObject_Free(_PyUnicode_WSTR(unicode)); - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; -#endif - } - /* maxchar exceeds 16 bit, wee need 4 bytes for unicode characters */ - else { -#if SIZEOF_WCHAR_T == 2 - /* in case the native representation is 2-bytes, we need to allocate a - new normalized 4-byte version. */ - length_wo_surrogates = _PyUnicode_WSTR_LENGTH(unicode) - num_surrogates; - if (length_wo_surrogates > PY_SSIZE_T_MAX / 4 - 1) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc(4 * (length_wo_surrogates + 1)); - if (!_PyUnicode_DATA_ANY(unicode)) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_LENGTH(unicode) = length_wo_surrogates; - _PyUnicode_STATE(unicode).kind = PyUnicode_4BYTE_KIND; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - /* unicode_convert_wchar_to_ucs4() requires a ready string */ - _PyUnicode_STATE(unicode).ready = 1; - unicode_convert_wchar_to_ucs4(_PyUnicode_WSTR(unicode), end, unicode); - PyObject_Free(_PyUnicode_WSTR(unicode)); - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; -#else - assert(num_surrogates == 0); - - _PyUnicode_DATA_ANY(unicode) = _PyUnicode_WSTR(unicode); - _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - _PyUnicode_STATE(unicode).kind = PyUnicode_4BYTE_KIND; -#endif - PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0'; - } - _PyUnicode_STATE(unicode).ready = 1; - assert(_PyUnicode_CheckConsistency(unicode, 1)); - return 0; -} - static void unicode_dealloc(PyObject *unicode) { @@ -1953,9 +1548,6 @@ unicode_dealloc(PyObject *unicode) Py_UNREACHABLE(); } - if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_WSTR(unicode)); - } if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_Free(_PyUnicode_UTF8(unicode)); } @@ -1975,7 +1567,7 @@ unicode_is_singleton(PyObject *unicode) } PyASCIIObject *ascii = _PyASCIIObject_CAST(unicode); - if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) { + if (ascii->length == 1) { Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); if (ch < 256 && LATIN1(ch) == unicode) { return 1; @@ -2017,10 +1609,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) assert(PyUnicode_Check(unicode)); assert(0 <= length); - if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) - old_length = PyUnicode_WSTR_LENGTH(unicode); - else - old_length = PyUnicode_GET_LENGTH(unicode); + old_length = PyUnicode_GET_LENGTH(unicode); if (old_length == length) return 0; @@ -2150,28 +1739,6 @@ unicode_char(Py_UCS4 ch) } PyObject * -PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) -{ - if (u == NULL) { - if (size > 0) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_FromUnicode(NULL, size) is deprecated; " - "use PyUnicode_New() instead", 1) < 0) { - return NULL; - } - } - return (PyObject*)_PyUnicode_New(size); - } - - if (size < 0) { - PyErr_BadInternalCall(); - return NULL; - } - - return PyUnicode_FromWideChar(u, size); -} - -PyObject * PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size) { PyObject *unicode; @@ -2264,16 +1831,12 @@ PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) if (u != NULL) { return PyUnicode_DecodeUTF8Stateful(u, size, NULL, NULL); } - else { - if (size > 0) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_FromStringAndSize(NULL, size) is deprecated; " - "use PyUnicode_New() instead", 1) < 0) { - return NULL; - } - } - return (PyObject *)_PyUnicode_New(size); + if (size > 0) { + PyErr_SetString(PyExc_SystemError, + "NULL string with positive size with NULL passed to PyUnicode_FromStringAndSize"); + return NULL; } + return unicode_new_empty(); } PyObject * @@ -2504,7 +2067,6 @@ _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end) enum PyUnicode_Kind kind; const void *startptr, *endptr; - assert(PyUnicode_IS_READY(unicode)); assert(0 <= start); assert(end <= PyUnicode_GET_LENGTH(unicode)); assert(start <= end); @@ -2547,7 +2109,6 @@ unicode_adjust_maxchar(PyObject **p_unicode) assert(p_unicode != NULL); unicode = *p_unicode; - assert(PyUnicode_IS_READY(unicode)); if (PyUnicode_IS_ASCII(unicode)) return; @@ -2591,8 +2152,6 @@ _PyUnicode_Copy(PyObject *unicode) PyErr_BadInternalCall(); return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; length = PyUnicode_GET_LENGTH(unicode); copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); @@ -2661,8 +2220,6 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, int kind; const void *data; Py_ssize_t len, targetlen; - if (PyUnicode_READY(string) == -1) - return NULL; kind = PyUnicode_KIND(string); data = PyUnicode_DATA(string); len = PyUnicode_GET_LENGTH(string); @@ -2733,9 +2290,6 @@ unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str, Py_ssize_t length, fill, arglen; Py_UCS4 maxchar; - if (PyUnicode_READY(str) == -1) - return -1; - length = PyUnicode_GET_LENGTH(str); if ((precision == -1 || precision >= length) && width <= length) @@ -3172,13 +2726,6 @@ unicode_get_widechar_size(PyObject *unicode) assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); -#if USE_UNICODE_WCHAR_CACHE - if (_PyUnicode_WSTR(unicode) != NULL) { - return PyUnicode_WSTR_LENGTH(unicode); - } -#endif /* USE_UNICODE_WCHAR_CACHE */ - assert(PyUnicode_IS_READY(unicode)); - res = _PyUnicode_LENGTH(unicode); #if SIZEOF_WCHAR_T == 2 if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) { @@ -3200,19 +2747,10 @@ unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size) assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); -#if USE_UNICODE_WCHAR_CACHE - const wchar_t *wstr = _PyUnicode_WSTR(unicode); - if (wstr != NULL) { - memcpy(w, wstr, size * sizeof(wchar_t)); - return; - } -#else /* USE_UNICODE_WCHAR_CACHE */ if (PyUnicode_KIND(unicode) == sizeof(wchar_t)) { memcpy(w, PyUnicode_DATA(unicode), size * sizeof(wchar_t)); return; } -#endif /* USE_UNICODE_WCHAR_CACHE */ - assert(PyUnicode_IS_READY(unicode)); if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { const Py_UCS1 *s = PyUnicode_1BYTE_DATA(unicode); @@ -3353,26 +2891,16 @@ _PyUnicode_WideCharString_Converter(PyObject *obj, void *ptr) { wchar_t **p = (wchar_t **)ptr; if (obj == NULL) { -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(*p); -#endif /* USE_UNICODE_WCHAR_CACHE */ *p = NULL; return 1; } if (PyUnicode_Check(obj)) { -#if USE_UNICODE_WCHAR_CACHE - *p = (wchar_t *)_PyUnicode_AsUnicode(obj); - if (*p == NULL) { - return 0; - } - return 1; -#else /* USE_UNICODE_WCHAR_CACHE */ *p = PyUnicode_AsWideCharString(obj, NULL); if (*p == NULL) { return 0; } return Py_CLEANUP_SUPPORTED; -#endif /* USE_UNICODE_WCHAR_CACHE */ } PyErr_Format(PyExc_TypeError, "argument must be str, not %.50s", @@ -3385,9 +2913,7 @@ _PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr) { wchar_t **p = (wchar_t **)ptr; if (obj == NULL) { -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(*p); -#endif /* USE_UNICODE_WCHAR_CACHE */ *p = NULL; return 1; } @@ -3396,19 +2922,11 @@ _PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr) return 1; } if (PyUnicode_Check(obj)) { -#if USE_UNICODE_WCHAR_CACHE - *p = (wchar_t *)_PyUnicode_AsUnicode(obj); - if (*p == NULL) { - return 0; - } - return 1; -#else /* USE_UNICODE_WCHAR_CACHE */ *p = PyUnicode_AsWideCharString(obj, NULL); if (*p == NULL) { return 0; } return Py_CLEANUP_SUPPORTED; -#endif /* USE_UNICODE_WCHAR_CACHE */ } PyErr_Format(PyExc_TypeError, "argument must be str or None, not %.50s", @@ -3434,8 +2952,6 @@ PyUnicode_FromObject(PyObject *obj) /* XXX Perhaps we should make this API an alias of PyObject_Str() instead ?! */ if (PyUnicode_CheckExact(obj)) { - if (PyUnicode_READY(obj) == -1) - return NULL; Py_INCREF(obj); return obj; } @@ -4161,10 +3677,6 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr) Py_DECREF(path); return 0; } - if (PyUnicode_READY(output) == -1) { - Py_DECREF(output); - return 0; - } if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output), PyUnicode_GET_LENGTH(output), 0, 1) >= 0) { PyErr_SetString(PyExc_ValueError, "embedded null character"); @@ -4185,8 +3697,6 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; if (PyUnicode_UTF8(unicode) == NULL) { if (unicode_fill_utf8(unicode) == -1) { @@ -4205,85 +3715,22 @@ PyUnicode_AsUTF8(PyObject *unicode) return PyUnicode_AsUTF8AndSize(unicode, NULL); } -Py_UNICODE * -PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) -{ - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - Py_UNICODE *w = _PyUnicode_WSTR(unicode); - if (w == NULL) { - /* Non-ASCII compact unicode object */ - assert(_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND); - assert(PyUnicode_IS_READY(unicode)); - - Py_ssize_t wlen = unicode_get_widechar_size(unicode); - if ((size_t)wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { - PyErr_NoMemory(); - return NULL; - } - w = (wchar_t *) PyObject_Malloc(sizeof(wchar_t) * (wlen + 1)); - if (w == NULL) { - PyErr_NoMemory(); - return NULL; - } - unicode_copy_as_widechar(unicode, w, wlen + 1); - _PyUnicode_WSTR(unicode) = w; - if (!PyUnicode_IS_COMPACT_ASCII(unicode)) { - _PyUnicode_WSTR_LENGTH(unicode) = wlen; - } - } - if (size != NULL) - *size = PyUnicode_WSTR_LENGTH(unicode); - return w; -} - -/* Deprecated APIs */ - -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - -Py_UNICODE * -PyUnicode_AsUnicode(PyObject *unicode) -{ - return PyUnicode_AsUnicodeAndSize(unicode, NULL); -} - -const Py_UNICODE * -_PyUnicode_AsUnicode(PyObject *unicode) -{ - Py_ssize_t size; - const Py_UNICODE *wstr; - - wstr = PyUnicode_AsUnicodeAndSize(unicode, &size); - if (wstr && wcslen(wstr) != (size_t)size) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return NULL; - } - return wstr; -} - +/* +PyUnicode_GetSize() has been deprecated since Python 3.3 +because it returned length of Py_UNICODE. -Py_ssize_t +But this function is part of stable abi, because it don't +include Py_UNICODE in signature and it was not excluded from +stable abi in PEP 384. +*/ +PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(PyObject *unicode) { - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - if (_PyUnicode_WSTR(unicode) == NULL) { - if (PyUnicode_AsUnicode(unicode) == NULL) - goto onError; - } - return PyUnicode_WSTR_LENGTH(unicode); - - onError: + PyErr_SetString(PyExc_RuntimeError, + "PyUnicode_GetSize has been removed."); return -1; } -_Py_COMP_DIAG_POP - Py_ssize_t PyUnicode_GetLength(PyObject *unicode) { @@ -4291,8 +3738,6 @@ PyUnicode_GetLength(PyObject *unicode) PyErr_BadArgument(); return -1; } - if (PyUnicode_READY(unicode) == -1) - return -1; return PyUnicode_GET_LENGTH(unicode); } @@ -4306,9 +3751,6 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index) PyErr_BadArgument(); return (Py_UCS4)-1; } - if (PyUnicode_READY(unicode) == -1) { - return (Py_UCS4)-1; - } if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return (Py_UCS4)-1; @@ -4325,7 +3767,6 @@ PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch) PyErr_BadArgument(); return -1; } - assert(PyUnicode_IS_READY(unicode)); if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return -1; @@ -4458,19 +3899,10 @@ unicode_decode_call_errorhandler_wchar( goto onError; } -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - repwlen = PyUnicode_GetSize(repunicode); - if (repwlen < 0) - goto onError; -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0); if (repwlen < 0) goto onError; repwlen--; -#endif /* USE_UNICODE_WCHAR_CACHE */ /* need more space? (at least enough for what we have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space @@ -4920,8 +4352,6 @@ _PyUnicode_EncodeUTF7(PyObject *str, char * out; const char * start; - if (PyUnicode_READY(str) == -1) - return NULL; kind = PyUnicode_KIND(str); data = PyUnicode_DATA(str); len = PyUnicode_GET_LENGTH(str); @@ -5550,9 +4980,6 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; - if (PyUnicode_UTF8(unicode)) return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode), PyUnicode_UTF8_LENGTH(unicode)); @@ -5833,8 +5260,6 @@ _PyUnicode_EncodeUTF32(PyObject *str, PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(str) == -1) - return NULL; kind = PyUnicode_KIND(str); data = PyUnicode_DATA(str); len = PyUnicode_GET_LENGTH(str); @@ -5901,8 +5326,6 @@ _PyUnicode_EncodeUTF32(PyObject *str, } else { assert(PyUnicode_Check(rep)); - if (PyUnicode_READY(rep) < 0) - goto error; moreunits = repsize = PyUnicode_GET_LENGTH(rep); if (!PyUnicode_IS_ASCII(rep)) { raise_encode_exception(&exc, encoding, @@ -6155,8 +5578,6 @@ _PyUnicode_EncodeUTF16(PyObject *str, PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(str) == -1) - return NULL; kind = PyUnicode_KIND(str); data = PyUnicode_DATA(str); len = PyUnicode_GET_LENGTH(str); @@ -6240,8 +5661,6 @@ _PyUnicode_EncodeUTF16(PyObject *str, } else { assert(PyUnicode_Check(rep)); - if (PyUnicode_READY(rep) < 0) - goto error; moreunits = repsize = PyUnicode_GET_LENGTH(rep); if (!PyUnicode_IS_ASCII(rep)) { raise_encode_exception(&exc, encoding, @@ -6619,9 +6038,6 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode) PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(unicode) == -1) { - return NULL; - } len = PyUnicode_GET_LENGTH(unicode); if (len == 0) { @@ -6876,9 +6292,6 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(unicode) == -1) { - return NULL; - } kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); len = PyUnicode_GET_LENGTH(unicode); @@ -7015,8 +6428,6 @@ unicode_encode_call_errorhandler(const char *errors, return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; len = PyUnicode_GET_LENGTH(unicode); make_encode_exception(exceptionObject, @@ -7074,8 +6485,6 @@ unicode_encode_ucs1(PyObject *unicode, /* output object */ _PyBytesWriter writer; - if (PyUnicode_READY(unicode) == -1) - return NULL; size = PyUnicode_GET_LENGTH(unicode); kind = PyUnicode_KIND(unicode); data = PyUnicode_DATA(unicode); @@ -7194,9 +6603,6 @@ unicode_encode_ucs1(PyObject *unicode, else { assert(PyUnicode_Check(rep)); - if (PyUnicode_READY(rep) < 0) - goto onError; - if (limit == 256 ? PyUnicode_KIND(rep) != PyUnicode_1BYTE_KIND : !PyUnicode_IS_ASCII(rep)) @@ -7243,8 +6649,6 @@ _PyUnicode_AsLatin1String(PyObject *unicode, const char *errors) PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; /* Fast path: if it is a one-byte string, construct bytes object directly. */ if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) @@ -7369,8 +6773,6 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors) PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; /* Fast path: if it is an ASCII-only string, construct bytes object directly. Else defer to above function to raise the exception. */ if (PyUnicode_IS_ASCII(unicode)) @@ -7758,22 +7160,11 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, substring = PyUnicode_Substring(unicode, offset, offset+len); if (substring == NULL) return -1; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - p = PyUnicode_AsUnicodeAndSize(substring, &size); - if (p == NULL) { - Py_DECREF(substring); - return -1; - } -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ p = PyUnicode_AsWideCharString(substring, &size); Py_CLEAR(substring); if (p == NULL) { return -1; } -#endif /* USE_UNICODE_WCHAR_CACHE */ assert(size <= INT_MAX); /* First get the size of the result */ @@ -7824,11 +7215,7 @@ _Py_COMP_DIAG_POP ret = 0; done: -#if USE_UNICODE_WCHAR_CACHE - Py_DECREF(substring); -#else /* USE_UNICODE_WCHAR_CACHE */ PyMem_Free(p); -#endif /* USE_UNICODE_WCHAR_CACHE */ return ret; error: @@ -7981,11 +7368,6 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, enum PyUnicode_Kind kind; const void *data; - if (PyUnicode_READY(rep) == -1) { - Py_DECREF(rep); - goto error; - } - outsize = PyUnicode_GET_LENGTH(rep); morebytes += outsize; if (morebytes > 0) { @@ -8046,8 +7428,6 @@ encode_code_page(int code_page, return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; len = PyUnicode_GET_LENGTH(unicode); if (code_page < 0) { @@ -8129,9 +7509,6 @@ charmap_decode_string(const char *s, Py_UCS4 x; unsigned char ch; - if (PyUnicode_READY(mapping) == -1) - return -1; - maplen = PyUnicode_GET_LENGTH(mapping); mapdata = PyUnicode_DATA(mapping); mapkind = PyUnicode_KIND(mapping); @@ -8284,8 +7661,6 @@ charmap_decode_mapping(const char *s, goto onError; } else if (PyUnicode_Check(item)) { - if (PyUnicode_READY(item) == -1) - goto onError; if (PyUnicode_GET_LENGTH(item) == 1) { Py_UCS4 value = PyUnicode_READ_CHAR(item, 0); if (value == 0xFFFE) @@ -8699,8 +8074,6 @@ charmap_encoding_error( Py_UCS4 ch; int val; - if (PyUnicode_READY(unicode) == -1) - return -1; size = PyUnicode_GET_LENGTH(unicode); /* find all unencodable characters */ while (collendpos < size) { @@ -8796,10 +8169,6 @@ charmap_encoding_error( break; } /* generate replacement */ - if (PyUnicode_READY(repunicode) == -1) { - Py_DECREF(repunicode); - return -1; - } repsize = PyUnicode_GET_LENGTH(repunicode); data = PyUnicode_DATA(repunicode); kind = PyUnicode_KIND(repunicode); @@ -8840,8 +8209,6 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, const void *data; int kind; - if (PyUnicode_READY(unicode) == -1) - return NULL; size = PyUnicode_GET_LENGTH(unicode); data = PyUnicode_DATA(unicode); kind = PyUnicode_KIND(unicode); @@ -9120,10 +8487,6 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch, else if (PyUnicode_Check(item)) { Py_UCS4 replace; - if (PyUnicode_READY(item) == -1) { - Py_DECREF(item); - return -1; - } if (PyUnicode_GET_LENGTH(item) != 1) goto exit; @@ -9220,8 +8583,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, return NULL; } - if (PyUnicode_READY(input) == -1) - return NULL; data = PyUnicode_DATA(input); kind = PyUnicode_KIND(input); size = PyUnicode_GET_LENGTH(input); @@ -9237,8 +8598,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, ignore = (errors != NULL && strcmp(errors, "ignore") == 0); - if (PyUnicode_READY(input) == -1) - return NULL; if (PyUnicode_IS_ASCII(input)) { res = unicode_fast_translate(input, mapping, &writer, ignore, &i); if (res < 0) { @@ -9334,8 +8693,6 @@ _PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode) PyErr_BadInternalCall(); return NULL; } - if (PyUnicode_READY(unicode) == -1) - return NULL; if (PyUnicode_IS_ASCII(unicode)) { /* If the string is already ASCII, just return the same string */ Py_INCREF(unicode); @@ -9527,15 +8884,6 @@ _PyUnicode_InsertThousandsGrouping( assert(0 <= n_digits); assert(grouping != NULL); - if (digits != NULL) { - if (PyUnicode_READY(digits) == -1) { - return -1; - } - } - if (PyUnicode_READY(thousands_sep) == -1) { - return -1; - } - Py_ssize_t count = 0; Py_ssize_t n_zeros; int loop_broken = 0; @@ -9716,8 +9064,6 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, { int kind; Py_ssize_t len, result; - if (PyUnicode_READY(str) == -1) - return -2; len = PyUnicode_GET_LENGTH(str); ADJUST_INDICES(start, end, len); if (end - start < 1) @@ -9746,10 +9092,6 @@ tailmatch(PyObject *self, Py_ssize_t i; Py_ssize_t end_sub; - if (PyUnicode_READY(self) == -1 || - PyUnicode_READY(substring) == -1) - return -1; - ADJUST_INDICES(start, end, PyUnicode_GET_LENGTH(self)); end -= PyUnicode_GET_LENGTH(substring); if (end < start) @@ -10008,8 +9350,6 @@ case_operation(PyObject *self, void *outdata; Py_UCS4 maxchar = 0, *tmp, *tmpend; - assert(PyUnicode_IS_READY(self)); - kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); length = PyUnicode_GET_LENGTH(self); @@ -10118,8 +9458,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq Py_TYPE(separator)->tp_name); goto onError; } - if (PyUnicode_READY(separator)) - goto onError; sep = separator; seplen = PyUnicode_GET_LENGTH(separator); maxchar = PyUnicode_MAX_CHAR_VALUE(separator); @@ -10151,8 +9489,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq i, Py_TYPE(item)->tp_name); goto onError; } - if (PyUnicode_READY(item) == -1) - goto onError; add_sz = PyUnicode_GET_LENGTH(item); item_maxchar = PyUnicode_MAX_CHAR_VALUE(item); maxchar = Py_MAX(maxchar, item_maxchar); @@ -10247,7 +9583,6 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, { const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); void *data = PyUnicode_DATA(unicode); - assert(PyUnicode_IS_READY(unicode)); assert(unicode_modifiable(unicode)); assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode)); assert(start >= 0); @@ -10265,8 +9600,6 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, PyErr_BadInternalCall(); return -1; } - if (PyUnicode_READY(unicode) == -1) - return -1; if (unicode_check_modifiable(unicode)) return -1; @@ -10379,9 +9712,6 @@ split(PyObject *self, if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; - if (PyUnicode_READY(self) == -1) - return NULL; - if (substring == NULL) switch (PyUnicode_KIND(self)) { case PyUnicode_1BYTE_KIND: @@ -10409,9 +9739,6 @@ split(PyObject *self, Py_UNREACHABLE(); } - if (PyUnicode_READY(substring) == -1) - return NULL; - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); len1 = PyUnicode_GET_LENGTH(self); @@ -10471,9 +9798,6 @@ rsplit(PyObject *self, if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; - if (PyUnicode_READY(self) == -1) - return NULL; - if (substring == NULL) switch (PyUnicode_KIND(self)) { case PyUnicode_1BYTE_KIND: @@ -10501,9 +9825,6 @@ rsplit(PyObject *self, Py_UNREACHABLE(); } - if (PyUnicode_READY(substring) == -1) - return NULL; - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); len1 = PyUnicode_GET_LENGTH(self); @@ -10905,8 +10226,6 @@ static PyObject * unicode_title_impl(PyObject *self) /*[clinic end generated code: output=c75ae03809574902 input=fa945d669b26e683]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; return case_operation(self, do_title); } @@ -10923,8 +10242,6 @@ static PyObject * unicode_capitalize_impl(PyObject *self) /*[clinic end generated code: output=e49a4c333cdb7667 input=f4cbf1016938da6d]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; if (PyUnicode_GET_LENGTH(self) == 0) return unicode_result_unchanged(self); return case_operation(self, do_capitalize); @@ -10940,8 +10257,6 @@ static PyObject * unicode_casefold_impl(PyObject *self) /*[clinic end generated code: output=0120daf657ca40af input=384d66cc2ae30daf]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; if (PyUnicode_IS_ASCII(self)) return ascii_upper_or_lower(self, 1); return case_operation(self, do_casefold); @@ -10961,8 +10276,6 @@ convert_uc(PyObject *obj, void *addr) "not %.100s", Py_TYPE(obj)->tp_name); return 0; } - if (PyUnicode_READY(obj) < 0) - return 0; if (PyUnicode_GET_LENGTH(obj) != 1) { PyErr_SetString(PyExc_TypeError, "The fill character must be exactly one character long"); @@ -10990,9 +10303,6 @@ unicode_center_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) { Py_ssize_t marg, left; - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_GET_LENGTH(self) >= width) return unicode_result_unchanged(self); @@ -11149,9 +10459,6 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) if (str1 == str2) { return 1; } - if (PyUnicode_READY(str1) || PyUnicode_READY(str2)) { - return -1; - } return unicode_compare_eq(str1, str2); } @@ -11160,10 +10467,6 @@ int PyUnicode_Compare(PyObject *left, PyObject *right) { if (PyUnicode_Check(left) && PyUnicode_Check(right)) { - if (PyUnicode_READY(left) == -1 || - PyUnicode_READY(right) == -1) - return -1; - /* a string is equal to itself */ if (left == right) return 0; @@ -11183,24 +10486,8 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) Py_ssize_t i; int kind; Py_UCS4 chr; - const unsigned char *ustr = (const unsigned char *)str; assert(_PyUnicode_CHECK(uni)); - if (!PyUnicode_IS_READY(uni)) { - const wchar_t *ws = _PyUnicode_WSTR(uni); - /* Compare Unicode string and source character set string */ - for (i = 0; (chr = ws[i]) && ustr[i]; i++) { - if (chr != ustr[i]) - return (chr < ustr[i]) ? -1 : 1; - } - /* This check keeps Python strings that end in '\0' from comparing equal - to C strings identical up to that point. */ - if (_PyUnicode_WSTR_LENGTH(uni) != i || chr) - return 1; /* uni is longer */ - if (ustr[i]) - return -1; /* str is longer */ - return 0; - } kind = PyUnicode_KIND(uni); if (kind == PyUnicode_1BYTE_KIND) { const void *data = PyUnicode_1BYTE_DATA(uni); @@ -11238,24 +10525,6 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) } } -static int -non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) -{ - size_t i, len; - const wchar_t *p; - len = (size_t)_PyUnicode_WSTR_LENGTH(unicode); - if (strlen(str) != len) - return 0; - p = _PyUnicode_WSTR(unicode); - assert(p); - for (i = 0; i < len; i++) { - unsigned char c = (unsigned char)str[i]; - if (c >= 128 || p[i] != (wchar_t)c) - return 0; - } - return 1; -} - int _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) { @@ -11267,11 +10536,6 @@ _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) assert((unsigned char)*p < 128); } #endif - if (PyUnicode_READY(unicode) == -1) { - /* Memory error or bad data */ - PyErr_Clear(); - return non_ready_unicode_equal_to_ascii_string(unicode, str); - } if (!PyUnicode_IS_ASCII(unicode)) return 0; len = (size_t)PyUnicode_GET_LENGTH(unicode); @@ -11292,12 +10556,6 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) } #endif - if (PyUnicode_READY(left) == -1) { - /* memory error or bad data */ - PyErr_Clear(); - return non_ready_unicode_equal_to_ascii_string(left, right->string); - } - if (!PyUnicode_IS_ASCII(left)) return 0; @@ -11333,10 +10591,6 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) if (!PyUnicode_Check(left) || !PyUnicode_Check(right)) Py_RETURN_NOTIMPLEMENTED; - if (PyUnicode_READY(left) == -1 || - PyUnicode_READY(right) == -1) - return NULL; - if (left == right) { switch (op) { case Py_EQ: @@ -11384,8 +10638,6 @@ PyUnicode_Contains(PyObject *str, PyObject *substr) Py_TYPE(substr)->tp_name); return -1; } - if (PyUnicode_READY(substr) == -1) - return -1; if (ensure_unicode(str) < 0) return -1; @@ -11449,8 +10701,6 @@ PyUnicode_Concat(PyObject *left, PyObject *right) Py_TYPE(right)->tp_name); return NULL; } - if (PyUnicode_READY(right) < 0) - return NULL; /* Shortcuts */ PyObject *empty = unicode_get_empty(); // Borrowed reference @@ -11504,11 +10754,6 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) goto error; } - if (PyUnicode_READY(left) == -1) - goto error; - if (PyUnicode_READY(right) == -1) - goto error; - /* Shortcuts */ PyObject *empty = unicode_get_empty(); // Borrowed reference if (left == empty) { @@ -11575,7 +10820,7 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) } /* -Wraps stringlib_parse_args_finds() and additionally ensures that the +Wraps asciilib_parse_args_finds() and additionally ensures that the first argument is a unicode object. */ @@ -11584,8 +10829,7 @@ parse_args_finds_unicode(const char * function_name, PyObject *args, PyObject **substring, Py_ssize_t *start, Py_ssize_t *end) { - if(stringlib_parse_args_finds(function_name, args, substring, - start, end)) { + if (asciilib_parse_args_finds(function_name, args, substring, start, end)) { if (ensure_unicode(*substring) < 0) return 0; return 1; @@ -11708,9 +10952,6 @@ unicode_expandtabs_impl(PyObject *self, int tabsize) int kind; int found; - if (PyUnicode_READY(self) == -1) - return NULL; - /* First pass: determine size of output string */ src_len = PyUnicode_GET_LENGTH(self); i = j = line_pos = 0; @@ -11796,9 +11037,6 @@ unicode_find(PyObject *self, PyObject *args) if (!parse_args_finds_unicode("find", args, &substring, &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) - return NULL; - result = any_find_slice(self, substring, start, end, 1); if (result == -2) @@ -11818,9 +11056,6 @@ unicode_getitem(PyObject *self, Py_ssize_t index) PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(self) == -1) { - return NULL; - } if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; @@ -11843,8 +11078,6 @@ unicode_hash(PyObject *self) #endif if (_PyUnicode_HASH(self) != -1) return _PyUnicode_HASH(self); - if (PyUnicode_READY(self) == -1) - return -1; x = _Py_HashBytes(PyUnicode_DATA(self), PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); @@ -11873,9 +11106,6 @@ unicode_index(PyObject *self, PyObject *args) if (!parse_args_finds_unicode("index", args, &substring, &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) - return NULL; - result = any_find_slice(self, substring, start, end, 1); if (result == -2) @@ -11902,9 +11132,6 @@ static PyObject * unicode_isascii_impl(PyObject *self) /*[clinic end generated code: output=c5910d64b5a8003f input=5a43cbc6399621d5]*/ { - if (PyUnicode_READY(self) == -1) { - return NULL; - } return PyBool_FromLong(PyUnicode_IS_ASCII(self)); } @@ -11926,8 +11153,6 @@ unicode_islower_impl(PyObject *self) const void *data; int cased; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -11971,8 +11196,6 @@ unicode_isupper_impl(PyObject *self) const void *data; int cased; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12016,8 +11239,6 @@ unicode_istitle_impl(PyObject *self) const void *data; int cased, previous_is_cased; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12073,8 +11294,6 @@ unicode_isspace_impl(PyObject *self) int kind; const void *data; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12113,8 +11332,6 @@ unicode_isalpha_impl(PyObject *self) int kind; const void *data; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12152,9 +11369,6 @@ unicode_isalnum_impl(PyObject *self) const void *data; Py_ssize_t len, i; - if (PyUnicode_READY(self) == -1) - return NULL; - kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); len = PyUnicode_GET_LENGTH(self); @@ -12194,8 +11408,6 @@ unicode_isdecimal_impl(PyObject *self) int kind; const void *data; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12233,8 +11445,6 @@ unicode_isdigit_impl(PyObject *self) int kind; const void *data; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12273,8 +11483,6 @@ unicode_isnumeric_impl(PyObject *self) int kind; const void *data; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12299,9 +11507,6 @@ Py_ssize_t _PyUnicode_ScanIdentifier(PyObject *self) { Py_ssize_t i; - if (PyUnicode_READY(self) == -1) - return -1; - Py_ssize_t len = PyUnicode_GET_LENGTH(self); if (len == 0) { /* an empty string is not a valid identifier */ @@ -12335,54 +11540,10 @@ _PyUnicode_ScanIdentifier(PyObject *self) int PyUnicode_IsIdentifier(PyObject *self) { - if (PyUnicode_IS_READY(self)) { - Py_ssize_t i = _PyUnicode_ScanIdentifier(self); - Py_ssize_t len = PyUnicode_GET_LENGTH(self); - /* an empty string is not a valid identifier */ - return len && i == len; - } - else { -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self); - if (len == 0) { - /* an empty string is not a valid identifier */ - return 0; - } - - const wchar_t *wstr = _PyUnicode_WSTR(self); - Py_UCS4 ch = wstr[i++]; -#if SIZEOF_WCHAR_T == 2 - if (Py_UNICODE_IS_HIGH_SURROGATE(ch) - && i < len - && Py_UNICODE_IS_LOW_SURROGATE(wstr[i])) - { - ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]); - i++; - } -#endif - if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) { - return 0; - } - - while (i < len) { - ch = wstr[i++]; -#if SIZEOF_WCHAR_T == 2 - if (Py_UNICODE_IS_HIGH_SURROGATE(ch) - && i < len - && Py_UNICODE_IS_LOW_SURROGATE(wstr[i])) - { - ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]); - i++; - } -#endif - if (!_PyUnicode_IsXidContinue(ch)) { - return 0; - } - } - return 1; -_Py_COMP_DIAG_POP - } + Py_ssize_t i = _PyUnicode_ScanIdentifier(self); + Py_ssize_t len = PyUnicode_GET_LENGTH(self); + /* an empty string is not a valid identifier */ + return len && i == len; } /*[clinic input] @@ -12418,8 +11579,6 @@ unicode_isprintable_impl(PyObject *self) int kind; const void *data; - if (PyUnicode_READY(self) == -1) - return NULL; length = PyUnicode_GET_LENGTH(self); kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); @@ -12461,8 +11620,6 @@ unicode_join(PyObject *self, PyObject *iterable) static Py_ssize_t unicode_length(PyObject *self) { - if (PyUnicode_READY(self) == -1) - return -1; return PyUnicode_GET_LENGTH(self); } @@ -12482,9 +11639,6 @@ static PyObject * unicode_ljust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) /*[clinic end generated code: output=1cce0e0e0a0b84b3 input=3ab599e335e60a32]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_GET_LENGTH(self) >= width) return unicode_result_unchanged(self); @@ -12501,8 +11655,6 @@ static PyObject * unicode_lower_impl(PyObject *self) /*[clinic end generated code: output=84ef9ed42efad663 input=60a2984b8beff23a]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; if (PyUnicode_IS_ASCII(self)) return ascii_upper_or_lower(self, 1); return case_operation(self, do_lower); @@ -12527,9 +11679,6 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) BLOOM_MASK sepmask; Py_ssize_t seplen; - if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1) - return NULL; - kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); len = PyUnicode_GET_LENGTH(self); @@ -12575,9 +11724,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) int kind; Py_ssize_t length; - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); end = Py_MIN(end, length); @@ -12610,9 +11756,6 @@ do_strip(PyObject *self, int striptype) { Py_ssize_t len, i, j; - if (PyUnicode_READY(self) == -1) - return NULL; - len = PyUnicode_GET_LENGTH(self); if (PyUnicode_IS_ASCII(self)) { @@ -12759,9 +11902,6 @@ unicode_repeat(PyObject *str, Py_ssize_t len) if (len == 1) return unicode_result_unchanged(str); - if (PyUnicode_READY(str) == -1) - return NULL; - if (PyUnicode_GET_LENGTH(str) > PY_SSIZE_T_MAX / len) { PyErr_SetString(PyExc_OverflowError, "repeated string is too long"); @@ -12836,8 +11976,6 @@ unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new, Py_ssize_t count) /*[clinic end generated code: output=b63f1a8b5eebf448 input=147d12206276ebeb]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; return replace(self, old, new, count); } @@ -12907,9 +12045,6 @@ unicode_repr(PyObject *unicode) const void *idata; void *odata; - if (PyUnicode_READY(unicode) == -1) - return NULL; - isize = PyUnicode_GET_LENGTH(unicode); idata = PyUnicode_DATA(unicode); @@ -13082,9 +12217,6 @@ unicode_rfind(PyObject *self, PyObject *args) if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) - return NULL; - result = any_find_slice(self, substring, start, end, -1); if (result == -2) @@ -13114,9 +12246,6 @@ unicode_rindex(PyObject *self, PyObject *args) if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end)) return NULL; - if (PyUnicode_READY(self) == -1) - return NULL; - result = any_find_slice(self, substring, start, end, -1); if (result == -2) @@ -13146,9 +12275,6 @@ static PyObject * unicode_rjust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar) /*[clinic end generated code: output=804a1a57fbe8d5cf input=d05f550b5beb1f72]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_GET_LENGTH(self) >= width) return unicode_result_unchanged(self); @@ -13412,8 +12538,6 @@ static PyObject * unicode_swapcase_impl(PyObject *self) /*[clinic end generated code: output=5d28966bf6d7b2af input=3f3ef96d5798a7bb]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; return case_operation(self, do_swapcase); } @@ -13579,8 +12703,6 @@ static PyObject * unicode_upper_impl(PyObject *self) /*[clinic end generated code: output=1b7ddd16bbcdc092 input=db3d55682dfe2e6c]*/ { - if (PyUnicode_READY(self) == -1) - return NULL; if (PyUnicode_IS_ASCII(self)) return ascii_upper_or_lower(self, 0); return case_operation(self, do_upper); @@ -13607,9 +12729,6 @@ unicode_zfill_impl(PyObject *self, Py_ssize_t width) const void *data; Py_UCS4 chr; - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_GET_LENGTH(self) >= width) return unicode_result_unchanged(self); @@ -13652,7 +12771,7 @@ unicode_startswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) + if (!asciilib_parse_args_finds("startswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13706,7 +12825,7 @@ unicode_endswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) + if (!asciilib_parse_args_finds("endswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13753,7 +12872,7 @@ _PyUnicodeWriter_Update(_PyUnicodeWriter *writer) else { /* use a value smaller than PyUnicode_1BYTE_KIND() so _PyUnicodeWriter_PrepareKind() will copy the buffer. */ - writer->kind = PyUnicode_WCHAR_KIND; + writer->kind = 0; assert(writer->kind <= PyUnicode_1BYTE_KIND); /* Copy-on-write mode: set buffer size to 0 so @@ -13773,7 +12892,7 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer) /* use a value smaller than PyUnicode_1BYTE_KIND() so _PyUnicodeWriter_PrepareKind() will copy the buffer. */ - writer->kind = PyUnicode_WCHAR_KIND; + writer->kind = 0; assert(writer->kind <= PyUnicode_1BYTE_KIND); } @@ -13908,8 +13027,6 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) Py_UCS4 maxchar; Py_ssize_t len; - if (PyUnicode_READY(str) == -1) - return -1; len = PyUnicode_GET_LENGTH(str); if (len == 0) return 0; @@ -13940,9 +13057,6 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, Py_UCS4 maxchar; Py_ssize_t len; - if (PyUnicode_READY(str) == -1) - return -1; - assert(0 <= start); assert(end <= PyUnicode_GET_LENGTH(str)); assert(start <= end); @@ -14071,7 +13185,7 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) } assert(_PyUnicode_CheckConsistency(str, 1)); - return unicode_result_ready(str); + return unicode_result(str); } void @@ -14110,8 +13224,6 @@ unicode___format___impl(PyObject *self, PyObject *format_spec) _PyUnicodeWriter writer; int ret; - if (PyUnicode_READY(self) == -1) - return NULL; _PyUnicodeWriter_Init(&writer); ret = _PyUnicode_FormatAdvancedWriter(&writer, self, format_spec, 0, @@ -14137,11 +13249,13 @@ unicode_sizeof_impl(PyObject *self) /* If it's a compact object, account for base structure + character data. */ - if (PyUnicode_IS_COMPACT_ASCII(self)) + if (PyUnicode_IS_COMPACT_ASCII(self)) { size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(self) + 1; - else if (PyUnicode_IS_COMPACT(self)) + } + else if (PyUnicode_IS_COMPACT(self)) { size = sizeof(PyCompactUnicodeObject) + (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self); + } else { /* If it is a two-block object, account for base object, and for character block if present. */ @@ -14150,10 +13264,6 @@ unicode_sizeof_impl(PyObject *self) size += (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self); } - /* If the wstr pointer is present, account for it unless it is shared - with the data pointer. Check if the data is not shared. */ - if (_PyUnicode_HAS_WSTR_MEMORY(self)) - size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t); if (_PyUnicode_HAS_UTF8_MEMORY(self)) size += PyUnicode_UTF8_LENGTH(self) + 1; @@ -14252,9 +13362,6 @@ static PySequenceMethods unicode_as_sequence = { static PyObject* unicode_subscript(PyObject* self, PyObject* item) { - if (PyUnicode_READY(self) == -1) - return NULL; - if (_PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); if (i == -1 && PyErr_Occurred()) @@ -14478,7 +13585,6 @@ _PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) return NULL; assert(unicode_modifiable(result)); - assert(PyUnicode_IS_READY(result)); assert(PyUnicode_IS_ASCII(result)); /* To modify the string in-place, there can only be one reference. */ @@ -15014,9 +14120,6 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx, if (arg->sign && arg->flags & F_ZERO) fill = '0'; - if (PyUnicode_READY(str) == -1) - return -1; - len = PyUnicode_GET_LENGTH(str); if ((arg->width == -1 || arg->width <= len) && (arg->prec == -1 || arg->prec >= len) @@ -15318,15 +14421,12 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) { PyObject *self; Py_ssize_t length, char_size; - int share_wstr, share_utf8; + int share_utf8; unsigned int kind; void *data; assert(PyType_IsSubtype(type, &PyUnicode_Type)); assert(_PyUnicode_CHECK(unicode)); - if (PyUnicode_READY(unicode) == -1) { - return NULL; - } self = type->tp_alloc(type, 0); if (self == NULL) { @@ -15345,15 +14445,11 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) _PyUnicode_STATE(self).kind = kind; _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; - _PyUnicode_STATE(self).ready = 1; - _PyUnicode_WSTR(self) = NULL; _PyUnicode_UTF8_LENGTH(self) = 0; _PyUnicode_UTF8(self) = NULL; - _PyUnicode_WSTR_LENGTH(self) = 0; _PyUnicode_DATA_ANY(self) = NULL; share_utf8 = 0; - share_wstr = 0; if (kind == PyUnicode_1BYTE_KIND) { char_size = 1; if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) @@ -15361,14 +14457,10 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) } else if (kind == PyUnicode_2BYTE_KIND) { char_size = 2; - if (sizeof(wchar_t) == 2) - share_wstr = 1; } else { assert(kind == PyUnicode_4BYTE_KIND); char_size = 4; - if (sizeof(wchar_t) == 4) - share_wstr = 1; } /* Ensure we won't overflow the length. */ @@ -15387,13 +14479,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) _PyUnicode_UTF8_LENGTH(self) = length; _PyUnicode_UTF8(self) = data; } - if (share_wstr) { - _PyUnicode_WSTR_LENGTH(self) = length; - _PyUnicode_WSTR(self) = (wchar_t *)data; - } - memcpy(data, PyUnicode_DATA(unicode), - kind * (length + 1)); + memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1)); assert(_PyUnicode_CheckConsistency(self, 1)); #ifdef Py_DEBUG _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode); @@ -15563,11 +14650,6 @@ PyUnicode_InternInPlace(PyObject **p) } #ifdef INTERNED_STRINGS - if (PyUnicode_READY(s) == -1) { - PyErr_Clear(); - return; - } - if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) { @@ -15656,8 +14738,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) Py_ssize_t pos = 0; PyObject *s, *ignored_value; while (PyDict_Next(interned, &pos, &s, &ignored_value)) { - assert(PyUnicode_IS_READY(s)); - switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_INTERNED_IMMORTAL: Py_SET_REFCNT(s, Py_REFCNT(s) + 1); @@ -15779,7 +14859,7 @@ unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), it->it_seq, it->it_index); } else { - PyObject *u = (PyObject *)_PyUnicode_New(0); + PyObject *u = unicode_new_empty(); if (u == NULL) return NULL; return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), u); @@ -15871,8 +14951,6 @@ unicode_iter(PyObject *seq) PyErr_BadInternalCall(); return NULL; } - if (PyUnicode_READY(seq) == -1) - return NULL; if (PyUnicode_IS_COMPACT_ASCII(seq)) { it = PyObject_GC_New(unicodeiterobject, &_PyUnicodeASCIIIter_Type); } @@ -16120,20 +15198,8 @@ static void unicode_static_dealloc(PyObject *op) assert(ascii->state.compact); - if (ascii->state.ascii) { - if (ascii->wstr) { - PyObject_Free(ascii->wstr); - ascii->wstr = NULL; - } - } - else { + if (!ascii->state.ascii) { PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op; - void* data = (void*)(compact + 1); - if (ascii->wstr && ascii->wstr != data) { - PyObject_Free(ascii->wstr); - ascii->wstr = NULL; - compact->wstr_length = 0; - } if (compact->utf8) { PyObject_Free(compact->utf8); compact->utf8 = NULL; diff --git a/PC/_msi.c b/PC/_msi.c index 01516e8..3f50f9b 100644 --- a/PC/_msi.c +++ b/PC/_msi.c @@ -757,19 +757,13 @@ _msi_SummaryInformation_SetProperty_impl(msiobj *self, int field, int status; if (PyUnicode_Check(data)) { -#if USE_UNICODE_WCHAR_CACHE - const WCHAR *value = _PyUnicode_AsUnicode(data); -#else /* USE_UNICODE_WCHAR_CACHE */ WCHAR *value = PyUnicode_AsWideCharString(data, NULL); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { return NULL; } status = MsiSummaryInfoSetPropertyW(self->h, field, VT_LPSTR, 0, NULL, value); -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(value); -#endif /* USE_UNICODE_WCHAR_CACHE */ } else if (PyLong_CheckExact(data)) { long value = PyLong_AsLong(data); if (value == -1 && PyErr_Occurred()) { diff --git a/PC/clinic/_msi.c.h b/PC/clinic/_msi.c.h index fd21142..ca1f8ad 100644 --- a/PC/clinic/_msi.c.h +++ b/PC/clinic/_msi.c.h @@ -208,11 +208,7 @@ _msi_Record_SetString(msiobj *self, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("SetString", "argument 2", "str", args[1]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - value = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ value = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { goto exit; } @@ -220,9 +216,7 @@ _msi_Record_SetString(msiobj *self, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for value */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)value); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -257,11 +251,7 @@ _msi_Record_SetStream(msiobj *self, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("SetStream", "argument 2", "str", args[1]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - value = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ value = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { goto exit; } @@ -269,9 +259,7 @@ _msi_Record_SetStream(msiobj *self, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for value */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)value); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -561,11 +549,7 @@ _msi_Database_OpenView(msiobj *self, PyObject *arg) _PyArg_BadArgument("OpenView", "argument", "str", arg); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - sql = _PyUnicode_AsUnicode(arg); - #else /* USE_UNICODE_WCHAR_CACHE */ sql = PyUnicode_AsWideCharString(arg, NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (sql == NULL) { goto exit; } @@ -573,9 +557,7 @@ _msi_Database_OpenView(msiobj *self, PyObject *arg) exit: /* Cleanup for sql */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sql); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -660,11 +642,7 @@ _msi_OpenDatabase(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("OpenDatabase", "argument 1", "str", args[0]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - path = _PyUnicode_AsUnicode(args[0]); - #else /* USE_UNICODE_WCHAR_CACHE */ path = PyUnicode_AsWideCharString(args[0], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (path == NULL) { goto exit; } @@ -676,9 +654,7 @@ _msi_OpenDatabase(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for path */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)path); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -713,4 +689,4 @@ _msi_CreateRecord(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=d7eb07e6bfcdc13f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a592695c4315db22 input=a9049054013a1b77]*/ diff --git a/PC/clinic/winreg.c.h b/PC/clinic/winreg.c.h index 8bcb290..6af24af 100644 --- a/PC/clinic/winreg.c.h +++ b/PC/clinic/winreg.c.h @@ -159,11 +159,7 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs computer_name = NULL; } else if (PyUnicode_Check(args[0])) { - #if USE_UNICODE_WCHAR_CACHE - computer_name = _PyUnicode_AsUnicode(args[0]); - #else /* USE_UNICODE_WCHAR_CACHE */ computer_name = PyUnicode_AsWideCharString(args[0], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (computer_name == NULL) { goto exit; } @@ -183,9 +179,7 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs exit: /* Cleanup for computer_name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)computer_name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -233,11 +227,7 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) sub_key = NULL; } else if (PyUnicode_Check(args[1])) { - #if USE_UNICODE_WCHAR_CACHE - sub_key = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ sub_key = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (sub_key == NULL) { goto exit; } @@ -254,9 +244,7 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -318,9 +306,7 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -366,11 +352,7 @@ winreg_DeleteKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("DeleteKey", "argument 2", "str", args[1]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - sub_key = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ sub_key = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (sub_key == NULL) { goto exit; } @@ -378,9 +360,7 @@ winreg_DeleteKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -437,9 +417,7 @@ winreg_DeleteKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -478,11 +456,7 @@ winreg_DeleteValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) value = NULL; } else if (PyUnicode_Check(args[1])) { - #if USE_UNICODE_WCHAR_CACHE - value = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ value = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { goto exit; } @@ -495,9 +469,7 @@ winreg_DeleteValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for value */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)value); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -622,11 +594,7 @@ winreg_ExpandEnvironmentStrings(PyObject *module, PyObject *arg) _PyArg_BadArgument("ExpandEnvironmentStrings", "argument", "str", arg); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - string = _PyUnicode_AsUnicode(arg); - #else /* USE_UNICODE_WCHAR_CACHE */ string = PyUnicode_AsWideCharString(arg, NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (string == NULL) { goto exit; } @@ -634,9 +602,7 @@ winreg_ExpandEnvironmentStrings(PyObject *module, PyObject *arg) exit: /* Cleanup for string */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)string); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -734,11 +700,7 @@ winreg_LoadKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("LoadKey", "argument 2", "str", args[1]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - sub_key = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ sub_key = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (sub_key == NULL) { goto exit; } @@ -746,11 +708,7 @@ winreg_LoadKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("LoadKey", "argument 3", "str", args[2]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - file_name = _PyUnicode_AsUnicode(args[2]); - #else /* USE_UNICODE_WCHAR_CACHE */ file_name = PyUnicode_AsWideCharString(args[2], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (file_name == NULL) { goto exit; } @@ -758,13 +716,9 @@ winreg_LoadKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for file_name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)file_name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -819,9 +773,7 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -876,9 +828,7 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -962,11 +912,7 @@ winreg_QueryValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) sub_key = NULL; } else if (PyUnicode_Check(args[1])) { - #if USE_UNICODE_WCHAR_CACHE - sub_key = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ sub_key = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (sub_key == NULL) { goto exit; } @@ -979,9 +925,7 @@ winreg_QueryValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1025,11 +969,7 @@ winreg_QueryValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) name = NULL; } else if (PyUnicode_Check(args[1])) { - #if USE_UNICODE_WCHAR_CACHE - name = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ name = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (name == NULL) { goto exit; } @@ -1042,9 +982,7 @@ winreg_QueryValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1093,11 +1031,7 @@ winreg_SaveKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("SaveKey", "argument 2", "str", args[1]); goto exit; } - #if USE_UNICODE_WCHAR_CACHE - file_name = _PyUnicode_AsUnicode(args[1]); - #else /* USE_UNICODE_WCHAR_CACHE */ file_name = PyUnicode_AsWideCharString(args[1], NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if (file_name == NULL) { goto exit; } @@ -1105,9 +1039,7 @@ winreg_SaveKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for file_name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)file_name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1162,9 +1094,7 @@ winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for sub_key */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)sub_key); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1238,9 +1168,7 @@ winreg_SetValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: /* Cleanup for value_name */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)value_name); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1346,4 +1274,4 @@ winreg_QueryReflectionKey(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=c3454803528f6e97 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9782b1630b59e201 input=a9049054013a1b77]*/ diff --git a/PC/winreg.c b/PC/winreg.c index 2d44c82..92d05f5 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -645,19 +645,9 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) t = PyList_GET_ITEM(value, j); if (!PyUnicode_Check(t)) return FALSE; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - len = PyUnicode_GetSize(t); - if (len < 0) - return FALSE; - len++; -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ len = PyUnicode_AsWideChar(t, NULL, 0); if (len < 0) return FALSE; -#endif /* USE_UNICODE_WCHAR_CACHE */ size += Py_SAFE_DOWNCAST(len * sizeof(wchar_t), size_t, DWORD); } @@ -1709,40 +1699,27 @@ winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, return NULL; } -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - const wchar_t *value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *value = PyUnicode_AsWideCharString(value_obj, &value_length); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { return NULL; } if ((Py_ssize_t)(DWORD)value_length != value_length) { PyErr_SetString(PyExc_OverflowError, "value is too long"); -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(value); -#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } if (PySys_Audit("winreg.SetValue", "nunu#", (Py_ssize_t)key, sub_key, (Py_ssize_t)type, value, value_length) < 0) { -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(value); -#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } Py_BEGIN_ALLOW_THREADS rc = RegSetValueW(key, sub_key, REG_SZ, value, (DWORD)(value_length + 1)); Py_END_ALLOW_THREADS -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(value); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegSetValue"); Py_RETURN_NONE; diff --git a/Python/dynload_win.c b/Python/dynload_win.c index b43e9fc..c03bc56 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -225,11 +225,7 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, _Py_CheckPython3(); -#if USE_UNICODE_WCHAR_CACHE - const wchar_t *wpathname = _PyUnicode_AsUnicode(pathname); -#else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpathname = PyUnicode_AsWideCharString(pathname, NULL); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpathname == NULL) return NULL; @@ -251,9 +247,7 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR); Py_END_ALLOW_THREADS -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(wpathname); -#endif /* USE_UNICODE_WCHAR_CACHE */ /* restore old error mode settings */ SetErrorMode(old_mode); diff --git a/Python/fileutils.c b/Python/fileutils.c index 4f7f894..a38886a 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1244,18 +1244,12 @@ _Py_stat(PyObject *path, struct stat *statbuf) #ifdef MS_WINDOWS int err; -#if USE_UNICODE_WCHAR_CACHE - const wchar_t *wpath = _PyUnicode_AsUnicode(path); -#else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return -2; err = _Py_wstat(wpath, statbuf); -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(wpath); -#endif /* USE_UNICODE_WCHAR_CACHE */ return err; #else int ret; @@ -1663,11 +1657,8 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_TYPE(path)); return NULL; } -#if USE_UNICODE_WCHAR_CACHE - const wchar_t *wpath = _PyUnicode_AsUnicode(path); -#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); -#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return NULL; @@ -1675,9 +1666,7 @@ _Py_fopen_obj(PyObject *path, const char *mode) wmode, Py_ARRAY_LENGTH(wmode)); if (usize == 0) { PyErr_SetFromWindowsErr(0); -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(wpath); -#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } @@ -1687,9 +1676,7 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_END_ALLOW_THREADS } while (f == NULL && errno == EINTR && !(async_err = PyErr_CheckSignals())); -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free(wpath); -#endif /* USE_UNICODE_WCHAR_CACHE */ #else PyObject *bytes; const char *path_bytes; diff --git a/Python/getargs.c b/Python/getargs.c index a2ea4d7..ed3ffda 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1012,58 +1012,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, break; } - case 'u': /* raw unicode buffer (Py_UNICODE *) */ - case 'Z': /* raw unicode buffer or None */ - { - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "getargs: The '%c' format is deprecated. Use 'U' instead.", c)) { - return NULL; - } -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); - - if (*format == '#') { - /* "u#" or "Z#" */ - REQUIRE_PY_SSIZE_T_CLEAN; - Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - - if (c == 'Z' && arg == Py_None) { - *p = NULL; - *psize = 0; - } - else if (PyUnicode_Check(arg)) { - Py_ssize_t len; - *p = PyUnicode_AsUnicodeAndSize(arg, &len); - if (*p == NULL) - RETURN_ERR_OCCURRED; - *psize = len; - } - else - return converterr(c == 'Z' ? "str or None" : "str", - arg, msgbuf, bufsize); - format++; - } else { - /* "u" or "Z" */ - if (c == 'Z' && arg == Py_None) - *p = NULL; - else if (PyUnicode_Check(arg)) { - Py_ssize_t len; - *p = PyUnicode_AsUnicodeAndSize(arg, &len); - if (*p == NULL) - RETURN_ERR_OCCURRED; - if (wcslen(*p) != (size_t)len) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - RETURN_ERR_OCCURRED; - } - } else - return converterr(c == 'Z' ? "str or None" : "str", - arg, msgbuf, bufsize); - } - break; -_Py_COMP_DIAG_POP - } - case 'e': {/* encoded string */ char **buffer; const char *encoding; @@ -2685,8 +2633,6 @@ skipitem(const char **p_format, va_list *p_va, int flags) case 's': /* string */ case 'z': /* string or None */ case 'y': /* bytes */ - case 'u': /* unicode string */ - case 'Z': /* unicode string or None */ case 'w': /* buffer, read-write */ { if (p_va != NULL) { diff --git a/Python/traceback.c b/Python/traceback.c index 3ec0618..e76c9aa 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -1077,7 +1077,6 @@ _Py_DumpASCII(int fd, PyObject *text) int truncated; int kind; void *data = NULL; - wchar_t *wstr = NULL; Py_UCS4 ch; if (!PyUnicode_Check(text)) @@ -1085,13 +1084,7 @@ _Py_DumpASCII(int fd, PyObject *text) size = ascii->length; kind = ascii->state.kind; - if (kind == PyUnicode_WCHAR_KIND) { - wstr = ascii->wstr; - if (wstr == NULL) - return; - size = _PyCompactUnicodeObject_CAST(text)->wstr_length; - } - else if (ascii->state.compact) { + if (ascii->state.compact) { if (ascii->state.ascii) data = ascii + 1; else @@ -1132,10 +1125,7 @@ _Py_DumpASCII(int fd, PyObject *text) } for (i=0; i < size; i++) { - if (kind != PyUnicode_WCHAR_KIND) - ch = PyUnicode_READ(kind, data, i); - else - ch = wstr[i]; + ch = PyUnicode_READ(kind, data, i); if (' ' <= ch && ch <= 126) { /* printable ASCII character */ char c = (char)ch; diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 5ad4f87..53e29df 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -3526,9 +3526,7 @@ class Py_UNICODE_converter(CConverter): def cleanup(self): if not self.length: return """\ -#if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *){name}); -#endif /* USE_UNICODE_WCHAR_CACHE */ """.format(name=self.name) def parse_arg(self, argname, argnum): @@ -3539,11 +3537,7 @@ PyMem_Free((void *){name}); _PyArg_BadArgument("{{name}}", {argnum}, "str", {argname}); goto exit; }}}} - #if USE_UNICODE_WCHAR_CACHE - {paramname} = _PyUnicode_AsUnicode({argname}); - #else /* USE_UNICODE_WCHAR_CACHE */ {paramname} = PyUnicode_AsWideCharString({argname}, NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if ({paramname} == NULL) {{{{ goto exit; }}}} @@ -3554,11 +3548,7 @@ PyMem_Free((void *){name}); {paramname} = NULL; }}}} else if (PyUnicode_Check({argname})) {{{{ - #if USE_UNICODE_WCHAR_CACHE - {paramname} = _PyUnicode_AsUnicode({argname}); - #else /* USE_UNICODE_WCHAR_CACHE */ {paramname} = PyUnicode_AsWideCharString({argname}, NULL); - #endif /* USE_UNICODE_WCHAR_CACHE */ if ({paramname} == NULL) {{{{ goto exit; }}}} diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 857e52f..80563ea 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1376,57 +1376,28 @@ class PyUnicodeObjectPtr(PyObjectPtr): return _type_Py_UNICODE.sizeof def proxyval(self, visited): - may_have_surrogates = False compact = self.field('_base') ascii = compact['_base'] state = ascii['state'] is_compact_ascii = (int(state['ascii']) and int(state['compact'])) - if not int(state['ready']): - # string is not ready - field_length = int(compact['wstr_length']) - may_have_surrogates = True - field_str = ascii['wstr'] + field_length = int(ascii['length']) + if is_compact_ascii: + field_str = ascii.address + 1 + elif int(state['compact']): + field_str = compact.address + 1 else: - field_length = int(ascii['length']) - if is_compact_ascii: - field_str = ascii.address + 1 - elif int(state['compact']): - field_str = compact.address + 1 - else: - field_str = self.field('data')['any'] - repr_kind = int(state['kind']) - if repr_kind == 1: - field_str = field_str.cast(_type_unsigned_char_ptr()) - elif repr_kind == 2: - field_str = field_str.cast(_type_unsigned_short_ptr()) - elif repr_kind == 4: - field_str = field_str.cast(_type_unsigned_int_ptr()) + field_str = self.field('data')['any'] + repr_kind = int(state['kind']) + if repr_kind == 1: + field_str = field_str.cast(_type_unsigned_char_ptr()) + elif repr_kind == 2: + field_str = field_str.cast(_type_unsigned_short_ptr()) + elif repr_kind == 4: + field_str = field_str.cast(_type_unsigned_int_ptr()) # Gather a list of ints from the Py_UNICODE array; these are either # UCS-1, UCS-2 or UCS-4 code points: - if not may_have_surrogates: - Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] - else: - # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the - # inferior process: we must join surrogate pairs. - Py_UNICODEs = [] - i = 0 - limit = safety_limit(field_length) - while i < limit: - ucs = int(field_str[i]) - i += 1 - if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: - Py_UNICODEs.append(ucs) - continue - # This could be a surrogate pair. - ucs2 = int(field_str[i]) - if ucs2 < 0xDC00 or ucs2 > 0xDFFF: - continue - code = (ucs & 0x03FF) << 10 - code |= ucs2 & 0x03FF - code += 0x00010000 - Py_UNICODEs.append(code) - i += 1 + Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] # Convert the int code points to unicode characters, and generate a # local unicode instance. diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 5ee6c2f..ac20767 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -200,7 +200,6 @@ class Printer: self.write(".kind = 1,") self.write(".compact = 1,") self.write(".ascii = 1,") - self.write(".ready = 1,") self.write(f"._data = {make_string_literal(s.encode('ascii'))},") return f"& {name}._ascii.ob_base" else: @@ -213,21 +212,10 @@ class Printer: self.write(f".kind = {kind},") self.write(".compact = 1,") self.write(".ascii = 0,") - self.write(".ready = 1,") with self.block(f"._data =", ","): for i in range(0, len(s), 16): data = s[i:i+16] self.write(", ".join(map(str, map(ord, data))) + ",") - if kind == PyUnicode_2BYTE_KIND: - self.patchups.append("if (sizeof(wchar_t) == 2) {") - self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;") - self.patchups.append(f" {name}._compact.wstr_length = {len(s)};") - self.patchups.append("}") - if kind == PyUnicode_4BYTE_KIND: - self.patchups.append("if (sizeof(wchar_t) == 4) {") - self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;") - self.patchups.append(f" {name}._compact.wstr_length = {len(s)};") - self.patchups.append("}") return f"& {name}._compact._base.ob_base" -- cgit v0.12