summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2022-05-12 05:48:38 (GMT)
committerGitHub <noreply@github.com>2022-05-12 05:48:38 (GMT)
commitf9c9354a7a173eaca2aa19e667b5cf12167b7fed (patch)
treeeb0fdd3219f53c973f1a7dbbcb9f8b0e0babdf36 /Objects/unicodeobject.c
parent68fec31364e96d122aae0571c14683b4ddb0ebd0 (diff)
downloadcpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.zip
cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.tar.gz
cpython-f9c9354a7a173eaca2aa19e667b5cf12167b7fed.tar.bz2
gh-92536: PEP 623: Remove wstr and legacy APIs from Unicode (GH-92537)
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c1116
1 files changed, 91 insertions, 1025 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 656c7cc..cc50fcd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -115,7 +115,6 @@ extern "C" {
(_PyCompactUnicodeObject_CAST(op)->utf8)
#define PyUnicode_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \
- assert(PyUnicode_IS_READY(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
((char*)(_PyASCIIObject_CAST(op) + 1)) : \
_PyUnicode_UTF8(op))
@@ -123,21 +122,10 @@ extern "C" {
(_PyCompactUnicodeObject_CAST(op)->utf8_length)
#define PyUnicode_UTF8_LENGTH(op) \
(assert(_PyUnicode_CHECK(op)), \
- assert(PyUnicode_IS_READY(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
_PyASCIIObject_CAST(op)->length : \
_PyUnicode_UTF8_LENGTH(op))
-#define _PyUnicode_WSTR(op) \
- (_PyASCIIObject_CAST(op)->wstr)
-
-/* Don't use deprecated macro of unicodeobject.h */
-#undef PyUnicode_WSTR_LENGTH
-#define PyUnicode_WSTR_LENGTH(op) \
- (PyUnicode_IS_COMPACT_ASCII(op) ? \
- _PyASCIIObject_CAST(op)->length : \
- _PyCompactUnicodeObject_CAST(op)->wstr_length)
-#define _PyUnicode_WSTR_LENGTH(op) \
- (_PyCompactUnicodeObject_CAST(op)->wstr_length)
+
#define _PyUnicode_LENGTH(op) \
(_PyASCIIObject_CAST(op)->length)
#define _PyUnicode_STATE(op) \
@@ -153,20 +141,10 @@ extern "C" {
#define _PyUnicode_DATA_ANY(op) \
(_PyUnicodeObject_CAST(op)->data.any)
-#undef PyUnicode_READY
-#define PyUnicode_READY(op) \
- (assert(_PyUnicode_CHECK(op)), \
- (PyUnicode_IS_READY(op) ? \
- 0 : \
- _PyUnicode_Ready(op)))
-
#define _PyUnicode_SHARE_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \
assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
(_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
-#define _PyUnicode_SHARE_WSTR(op) \
- (assert(_PyUnicode_CHECK(op)), \
- (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op)))
/* true if the Unicode object has an allocated UTF-8 memory block
(not shared with other data) */
@@ -175,13 +153,6 @@ extern "C" {
&& _PyUnicode_UTF8(op) \
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
-/* true if the Unicode object has an allocated wstr memory block
- (not shared with other data) */
-#define _PyUnicode_HAS_WSTR_MEMORY(op) \
- ((_PyUnicode_WSTR(op) && \
- (!PyUnicode_IS_READY(op) || \
- _PyUnicode_WSTR(op) != PyUnicode_DATA(op))))
-
/* Generic helper macro to convert characters of different types.
from_type and to_type have to be valid type names, begin and end
are pointers to the source characters which should be of type
@@ -280,7 +251,6 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
Py_ssize_t start, Py_ssize_t length)
{
assert(0 <= start);
- assert(kind != PyUnicode_WCHAR_KIND);
switch (kind) {
case PyUnicode_1BYTE_KIND: {
assert(value <= 0xff);
@@ -342,7 +312,6 @@ const unsigned char _Py_ascii_whitespace[] = {
};
/* forward */
-static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
static PyObject* get_latin1_char(unsigned char ch);
static int unicode_modifiable(PyObject *unicode);
@@ -518,7 +487,6 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
CHECK(kind == PyUnicode_1BYTE_KIND);
- CHECK(ascii->state.ready == 1);
}
else {
PyCompactUnicodeObject *compact = _PyCompactUnicodeObject_CAST(op);
@@ -530,62 +498,32 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|| kind == PyUnicode_2BYTE_KIND
|| kind == PyUnicode_4BYTE_KIND);
CHECK(ascii->state.ascii == 0);
- CHECK(ascii->state.ready == 1);
CHECK(compact->utf8 != data);
}
else {
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
data = unicode->data.any;
- if (kind == PyUnicode_WCHAR_KIND) {
- CHECK(ascii->length == 0);
- CHECK(ascii->hash == -1);
- CHECK(ascii->state.compact == 0);
- CHECK(ascii->state.ascii == 0);
- CHECK(ascii->state.ready == 0);
- CHECK(ascii->state.interned == SSTATE_NOT_INTERNED);
- CHECK(ascii->wstr != NULL);
- CHECK(data == NULL);
- CHECK(compact->utf8 == NULL);
+ CHECK(kind == PyUnicode_1BYTE_KIND
+ || kind == PyUnicode_2BYTE_KIND
+ || kind == PyUnicode_4BYTE_KIND);
+ CHECK(ascii->state.compact == 0);
+ CHECK(data != NULL);
+ if (ascii->state.ascii) {
+ CHECK(compact->utf8 == data);
+ CHECK(compact->utf8_length == ascii->length);
}
else {
- CHECK(kind == PyUnicode_1BYTE_KIND
- || kind == PyUnicode_2BYTE_KIND
- || kind == PyUnicode_4BYTE_KIND);
- CHECK(ascii->state.compact == 0);
- CHECK(ascii->state.ready == 1);
- CHECK(data != NULL);
- if (ascii->state.ascii) {
- CHECK(compact->utf8 == data);
- CHECK(compact->utf8_length == ascii->length);
- }
- else
- CHECK(compact->utf8 != data);
+ CHECK(compact->utf8 != data);
}
}
- if (kind != PyUnicode_WCHAR_KIND) {
- if (
-#if SIZEOF_WCHAR_T == 2
- kind == PyUnicode_2BYTE_KIND
-#else
- kind == PyUnicode_4BYTE_KIND
-#endif
- )
- {
- CHECK(ascii->wstr == data);
- CHECK(compact->wstr_length == ascii->length);
- } else
- CHECK(ascii->wstr != data);
- }
if (compact->utf8 == NULL)
CHECK(compact->utf8_length == 0);
- if (ascii->wstr == NULL)
- CHECK(compact->wstr_length == 0);
}
/* check that the best kind is used: O(n) operation */
- if (check_content && kind != PyUnicode_WCHAR_KIND) {
+ if (check_content) {
Py_ssize_t i;
Py_UCS4 maxchar = 0;
const void *data;
@@ -621,47 +559,12 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
#undef CHECK
}
-
-static PyObject*
-unicode_result_wchar(PyObject *unicode)
-{
-#ifndef Py_DEBUG
- Py_ssize_t len;
-
- len = _PyUnicode_WSTR_LENGTH(unicode);
- if (len == 0) {
- Py_DECREF(unicode);
- _Py_RETURN_UNICODE_EMPTY();
- }
-
- if (len == 1) {
- wchar_t ch = _PyUnicode_WSTR(unicode)[0];
- if ((Py_UCS4)ch < 256) {
- Py_DECREF(unicode);
- return get_latin1_char((unsigned char)ch);
- }
- }
-
- if (_PyUnicode_Ready(unicode) < 0) {
- Py_DECREF(unicode);
- return NULL;
- }
-#else
- assert(Py_REFCNT(unicode) == 1);
-
- /* don't make the result ready in debug mode to ensure that the caller
- makes the string ready before using it */
- assert(_PyUnicode_CheckConsistency(unicode, 1));
-#endif
- return unicode;
-}
-
static PyObject*
-unicode_result_ready(PyObject *unicode)
+unicode_result(PyObject *unicode)
{
- Py_ssize_t length;
+ assert(_PyUnicode_CHECK(unicode));
- length = PyUnicode_GET_LENGTH(unicode);
+ Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
if (length == 0) {
PyObject *empty = unicode_get_empty();
if (unicode != empty) {
@@ -690,21 +593,9 @@ unicode_result_ready(PyObject *unicode)
}
static PyObject*
-unicode_result(PyObject *unicode)
-{
- assert(_PyUnicode_CHECK(unicode));
- if (PyUnicode_IS_READY(unicode))
- return unicode_result_ready(unicode);
- else
- return unicode_result_wchar(unicode);
-}
-
-static PyObject*
unicode_result_unchanged(PyObject *unicode)
{
if (PyUnicode_CheckExact(unicode)) {
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
Py_INCREF(unicode);
return unicode;
}
@@ -724,7 +615,6 @@ backslashreplace(_PyBytesWriter *writer, char *str,
enum PyUnicode_Kind kind;
const void *data;
- assert(PyUnicode_IS_READY(unicode));
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
@@ -791,7 +681,6 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
enum PyUnicode_Kind kind;
const void *data;
- assert(PyUnicode_IS_READY(unicode));
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
@@ -915,7 +804,7 @@ ensure_unicode(PyObject *obj)
Py_TYPE(obj)->tp_name);
return -1;
}
- return PyUnicode_READY(obj);
+ return 0;
}
/* Compilation of templated routines */
@@ -961,15 +850,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
-#include "stringlib/unicodedefs.h"
-#include "stringlib/fastsearch.h"
-#include "stringlib/count.h"
-#include "stringlib/find.h"
-#include "stringlib/undef.h"
-_Py_COMP_DIAG_POP
-
#undef STRINGLIB_GET_EMPTY
/* --- Unicode Object ----------------------------------------------------- */
@@ -1029,14 +909,12 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
Py_ssize_t char_size;
Py_ssize_t struct_size;
Py_ssize_t new_size;
- int share_wstr;
PyObject *new_unicode;
#ifdef Py_DEBUG
Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
#endif
assert(unicode_modifiable(unicode));
- assert(PyUnicode_IS_READY(unicode));
assert(PyUnicode_IS_COMPACT(unicode));
char_size = PyUnicode_KIND(unicode);
@@ -1044,7 +922,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
struct_size = sizeof(PyASCIIObject);
else
struct_size = sizeof(PyCompactUnicodeObject);
- share_wstr = _PyUnicode_SHARE_WSTR(unicode);
if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) {
PyErr_NoMemory();
@@ -1074,17 +951,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
_Py_NewReference(unicode);
_PyUnicode_LENGTH(unicode) = length;
- if (share_wstr) {
- _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode);
- if (!PyUnicode_IS_ASCII(unicode))
- _PyUnicode_WSTR_LENGTH(unicode) = length;
- }
- else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) {
- PyObject_Free(_PyUnicode_WSTR(unicode));
- _PyUnicode_WSTR(unicode) = NULL;
- if (!PyUnicode_IS_ASCII(unicode))
- _PyUnicode_WSTR_LENGTH(unicode) = 0;
- }
#ifdef Py_DEBUG
unicode_fill_invalid(unicode, old_length);
#endif
@@ -1097,78 +963,55 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
static int
resize_inplace(PyObject *unicode, Py_ssize_t length)
{
- wchar_t *wstr;
- Py_ssize_t new_size;
assert(!PyUnicode_IS_COMPACT(unicode));
assert(Py_REFCNT(unicode) == 1);
- if (PyUnicode_IS_READY(unicode)) {
- Py_ssize_t char_size;
- int share_wstr, share_utf8;
- void *data;
+ Py_ssize_t new_size;
+ Py_ssize_t char_size;
+ int share_utf8;
+ void *data;
#ifdef Py_DEBUG
- Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+ Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
#endif
- data = _PyUnicode_DATA_ANY(unicode);
- char_size = PyUnicode_KIND(unicode);
- share_wstr = _PyUnicode_SHARE_WSTR(unicode);
- share_utf8 = _PyUnicode_SHARE_UTF8(unicode);
+ data = _PyUnicode_DATA_ANY(unicode);
+ char_size = PyUnicode_KIND(unicode);
+ share_utf8 = _PyUnicode_SHARE_UTF8(unicode);
- if (length > (PY_SSIZE_T_MAX / char_size - 1)) {
- PyErr_NoMemory();
- return -1;
- }
- new_size = (length + 1) * char_size;
+ if (length > (PY_SSIZE_T_MAX / char_size - 1)) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ new_size = (length + 1) * char_size;
- if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
- {
- PyObject_Free(_PyUnicode_UTF8(unicode));
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
- }
+ if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
+ {
+ PyObject_Free(_PyUnicode_UTF8(unicode));
+ _PyUnicode_UTF8(unicode) = NULL;
+ _PyUnicode_UTF8_LENGTH(unicode) = 0;
+ }
- data = (PyObject *)PyObject_Realloc(data, new_size);
- if (data == NULL) {
- PyErr_NoMemory();
- return -1;
- }
- _PyUnicode_DATA_ANY(unicode) = data;
- if (share_wstr) {
- _PyUnicode_WSTR(unicode) = data;
- _PyUnicode_WSTR_LENGTH(unicode) = length;
- }
- if (share_utf8) {
- _PyUnicode_UTF8(unicode) = data;
- _PyUnicode_UTF8_LENGTH(unicode) = length;
- }
- _PyUnicode_LENGTH(unicode) = length;
- PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
+ data = (PyObject *)PyObject_Realloc(data, new_size);
+ if (data == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ _PyUnicode_DATA_ANY(unicode) = data;
+ if (share_utf8) {
+ _PyUnicode_UTF8(unicode) = data;
+ _PyUnicode_UTF8_LENGTH(unicode) = length;
+ }
+ _PyUnicode_LENGTH(unicode) = length;
+ PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
#ifdef Py_DEBUG
- unicode_fill_invalid(unicode, old_length);
+ unicode_fill_invalid(unicode, old_length);
#endif
- if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
- assert(_PyUnicode_CheckConsistency(unicode, 0));
- return 0;
- }
- }
- assert(_PyUnicode_WSTR(unicode) != NULL);
/* check for integer overflow */
if (length > PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1) {
PyErr_NoMemory();
return -1;
}
- new_size = sizeof(wchar_t) * (length + 1);
- wstr = _PyUnicode_WSTR(unicode);
- wstr = PyObject_Realloc(wstr, new_size);
- if (!wstr) {
- PyErr_NoMemory();
- return -1;
- }
- _PyUnicode_WSTR(unicode) = wstr;
- _PyUnicode_WSTR(unicode)[length] = 0;
- _PyUnicode_WSTR_LENGTH(unicode) = length;
assert(_PyUnicode_CheckConsistency(unicode, 0));
return 0;
}
@@ -1177,99 +1020,15 @@ static PyObject*
resize_copy(PyObject *unicode, Py_ssize_t length)
{
Py_ssize_t copy_length;
- if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) {
- PyObject *copy;
-
- assert(PyUnicode_IS_READY(unicode));
-
- copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode));
- if (copy == NULL)
- return NULL;
-
- copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
- _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length);
- return copy;
- }
- else {
- PyObject *w;
-
- w = (PyObject*)_PyUnicode_New(length);
- if (w == NULL)
- return NULL;
- copy_length = _PyUnicode_WSTR_LENGTH(unicode);
- copy_length = Py_MIN(copy_length, length);
- memcpy(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
- copy_length * sizeof(wchar_t));
- return w;
- }
-}
-
-/* We allocate one more byte to make sure the string is
- Ux0000 terminated; some code (e.g. new_identifier)
- relies on that.
-
- XXX This allocator could further be enhanced by assuring that the
- free list never reduces its size below 1.
-
-*/
-
-static PyUnicodeObject *
-_PyUnicode_New(Py_ssize_t length)
-{
- PyUnicodeObject *unicode;
- size_t new_size;
-
- /* Optimization for empty strings */
- if (length == 0) {
- return (PyUnicodeObject *)unicode_new_empty();
- }
-
- /* Ensure we won't overflow the size. */
- if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
- return (PyUnicodeObject *)PyErr_NoMemory();
- }
- if (length < 0) {
- PyErr_SetString(PyExc_SystemError,
- "Negative size passed to _PyUnicode_New");
- return NULL;
- }
-
- unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
- if (unicode == NULL)
- return NULL;
- new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+ PyObject *copy;
- _PyUnicode_WSTR_LENGTH(unicode) = length;
- _PyUnicode_HASH(unicode) = -1;
- _PyUnicode_STATE(unicode).interned = 0;
- _PyUnicode_STATE(unicode).kind = 0;
- _PyUnicode_STATE(unicode).compact = 0;
- _PyUnicode_STATE(unicode).ready = 0;
- _PyUnicode_STATE(unicode).ascii = 0;
- _PyUnicode_DATA_ANY(unicode) = NULL;
- _PyUnicode_LENGTH(unicode) = 0;
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
-
- _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_Malloc(new_size);
- if (!_PyUnicode_WSTR(unicode)) {
- Py_DECREF(unicode);
- PyErr_NoMemory();
+ copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode));
+ if (copy == NULL)
return NULL;
- }
- /* Initialize the first element to guard against cases where
- * the caller fails before initializing str -- unicode_resize()
- * reads str[0], and the Keep-Alive optimization can keep memory
- * allocated for str alive across a call to unicode_dealloc(unicode).
- * We don't want unicode_resize to read uninitialized memory in
- * that case.
- */
- _PyUnicode_WSTR(unicode)[0] = 0;
- _PyUnicode_WSTR(unicode)[length] = 0;
-
- assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0));
- return unicode;
+ copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
+ _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length);
+ return copy;
}
static const char*
@@ -1279,8 +1038,6 @@ unicode_kind_name(PyObject *unicode)
_PyUnicode_Dump() */
if (!PyUnicode_IS_COMPACT(unicode))
{
- if (!PyUnicode_IS_READY(unicode))
- return "wstr";
switch (PyUnicode_KIND(unicode))
{
case PyUnicode_1BYTE_KIND:
@@ -1296,7 +1053,6 @@ unicode_kind_name(PyObject *unicode)
return "<legacy invalid kind>";
}
}
- assert(PyUnicode_IS_READY(unicode));
switch (PyUnicode_KIND(unicode)) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(unicode))
@@ -1353,15 +1109,7 @@ _PyUnicode_Dump(PyObject *op)
data = unicode->data.any;
printf("%s: len=%zu, ", unicode_kind_name(op), ascii->length);
- if (ascii->wstr == data)
- printf("shared ");
- printf("wstr=%p", (void *)ascii->wstr);
-
- if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) {
- printf(" (%zu), ", compact->wstr_length);
- if (!ascii->state.compact && compact->utf8 == unicode->data.any) {
- printf("shared ");
- }
+ if (!ascii->state.ascii) {
printf("utf8=%p (%zu)", (void *)compact->utf8, compact->utf8_length);
}
printf(", data=%p\n", data);
@@ -1381,12 +1129,11 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
PyCompactUnicodeObject *unicode;
void *data;
enum PyUnicode_Kind kind;
- int is_sharing, is_ascii;
+ int is_ascii;
Py_ssize_t char_size;
Py_ssize_t struct_size;
is_ascii = 0;
- is_sharing = 0;
struct_size = sizeof(PyCompactUnicodeObject);
if (maxchar < 128) {
kind = PyUnicode_1BYTE_KIND;
@@ -1401,8 +1148,6 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
else if (maxchar < 65536) {
kind = PyUnicode_2BYTE_KIND;
char_size = 2;
- if (sizeof(wchar_t) == 2)
- is_sharing = 1;
}
else {
if (maxchar > MAX_UNICODE) {
@@ -1412,8 +1157,6 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
}
kind = PyUnicode_4BYTE_KIND;
char_size = 4;
- if (sizeof(wchar_t) == 4)
- is_sharing = 1;
}
/* Ensure we won't overflow the size. */
@@ -1445,16 +1188,12 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
_PyUnicode_STATE(unicode).interned = 0;
_PyUnicode_STATE(unicode).kind = kind;
_PyUnicode_STATE(unicode).compact = 1;
- _PyUnicode_STATE(unicode).ready = 1;
_PyUnicode_STATE(unicode).ascii = is_ascii;
if (is_ascii) {
((char*)data)[size] = 0;
- _PyUnicode_WSTR(unicode) = NULL;
}
else if (kind == PyUnicode_1BYTE_KIND) {
((char*)data)[size] = 0;
- _PyUnicode_WSTR(unicode) = NULL;
- _PyUnicode_WSTR_LENGTH(unicode) = 0;
unicode->utf8 = NULL;
unicode->utf8_length = 0;
}
@@ -1465,14 +1204,6 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
((Py_UCS2*)data)[size] = 0;
else /* kind == PyUnicode_4BYTE_KIND */
((Py_UCS4*)data)[size] = 0;
- if (is_sharing) {
- _PyUnicode_WSTR_LENGTH(unicode) = size;
- _PyUnicode_WSTR(unicode) = (wchar_t *)data;
- }
- else {
- _PyUnicode_WSTR_LENGTH(unicode) = 0;
- _PyUnicode_WSTR(unicode) = NULL;
- }
}
#ifdef Py_DEBUG
unicode_fill_invalid((PyObject*)unicode, 0);
@@ -1545,11 +1276,9 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
assert(0 <= from_start);
assert(0 <= to_start);
assert(PyUnicode_Check(from));
- assert(PyUnicode_IS_READY(from));
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
assert(PyUnicode_Check(to));
- assert(PyUnicode_IS_READY(to));
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
if (how_many == 0)
@@ -1694,11 +1423,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
return -1;
}
- if (PyUnicode_READY(from) == -1)
- return -1;
- if (PyUnicode_READY(to) == -1)
- return -1;
-
if ((size_t)from_start > (size_t)PyUnicode_GET_LENGTH(from)) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return -1;
@@ -1783,135 +1507,6 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
return 0;
}
-int
-_PyUnicode_Ready(PyObject *unicode)
-{
- wchar_t *end;
- Py_UCS4 maxchar = 0;
- Py_ssize_t num_surrogates;
-#if SIZEOF_WCHAR_T == 2
- Py_ssize_t length_wo_surrogates;
-#endif
-
- /* _PyUnicode_Ready() is only intended for old-style API usage where
- strings were created using _PyObject_New() and where no canonical
- representation (the str field) has been set yet aka strings
- which are not yet ready. */
- assert(_PyUnicode_CHECK(unicode));
- assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND);
- assert(_PyUnicode_WSTR(unicode) != NULL);
- assert(_PyUnicode_DATA_ANY(unicode) == NULL);
- assert(_PyUnicode_UTF8(unicode) == NULL);
- /* Actually, it should neither be interned nor be anything else: */
- assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED);
-
- end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
- if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
- &maxchar, &num_surrogates) == -1)
- return -1;
-
- if (maxchar < 256) {
- _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc(_PyUnicode_WSTR_LENGTH(unicode) + 1);
- if (!_PyUnicode_DATA_ANY(unicode)) {
- PyErr_NoMemory();
- return -1;
- }
- _PyUnicode_CONVERT_BYTES(wchar_t, unsigned char,
- _PyUnicode_WSTR(unicode), end,
- PyUnicode_1BYTE_DATA(unicode));
- PyUnicode_1BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0';
- _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
- _PyUnicode_STATE(unicode).kind = PyUnicode_1BYTE_KIND;
- if (maxchar < 128) {
- _PyUnicode_STATE(unicode).ascii = 1;
- _PyUnicode_UTF8(unicode) = _PyUnicode_DATA_ANY(unicode);
- _PyUnicode_UTF8_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
- }
- else {
- _PyUnicode_STATE(unicode).ascii = 0;
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
- }
- PyObject_Free(_PyUnicode_WSTR(unicode));
- _PyUnicode_WSTR(unicode) = NULL;
- _PyUnicode_WSTR_LENGTH(unicode) = 0;
- }
- /* In this case we might have to convert down from 4-byte native
- wchar_t to 2-byte unicode. */
- else if (maxchar < 65536) {
- assert(num_surrogates == 0 &&
- "FindMaxCharAndNumSurrogatePairs() messed up");
-
-#if SIZEOF_WCHAR_T == 2
- /* We can share representations and are done. */
- _PyUnicode_DATA_ANY(unicode) = _PyUnicode_WSTR(unicode);
- PyUnicode_2BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0';
- _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
- _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND;
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
-#else
- /* sizeof(wchar_t) == 4 */
- _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc(
- 2 * (_PyUnicode_WSTR_LENGTH(unicode) + 1));
- if (!_PyUnicode_DATA_ANY(unicode)) {
- PyErr_NoMemory();
- return -1;
- }
- _PyUnicode_CONVERT_BYTES(wchar_t, Py_UCS2,
- _PyUnicode_WSTR(unicode), end,
- PyUnicode_2BYTE_DATA(unicode));
- PyUnicode_2BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0';
- _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
- _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND;
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
- PyObject_Free(_PyUnicode_WSTR(unicode));
- _PyUnicode_WSTR(unicode) = NULL;
- _PyUnicode_WSTR_LENGTH(unicode) = 0;
-#endif
- }
- /* maxchar exceeds 16 bit, wee need 4 bytes for unicode characters */
- else {
-#if SIZEOF_WCHAR_T == 2
- /* in case the native representation is 2-bytes, we need to allocate a
- new normalized 4-byte version. */
- length_wo_surrogates = _PyUnicode_WSTR_LENGTH(unicode) - num_surrogates;
- if (length_wo_surrogates > PY_SSIZE_T_MAX / 4 - 1) {
- PyErr_NoMemory();
- return -1;
- }
- _PyUnicode_DATA_ANY(unicode) = PyObject_Malloc(4 * (length_wo_surrogates + 1));
- if (!_PyUnicode_DATA_ANY(unicode)) {
- PyErr_NoMemory();
- return -1;
- }
- _PyUnicode_LENGTH(unicode) = length_wo_surrogates;
- _PyUnicode_STATE(unicode).kind = PyUnicode_4BYTE_KIND;
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
- /* unicode_convert_wchar_to_ucs4() requires a ready string */
- _PyUnicode_STATE(unicode).ready = 1;
- unicode_convert_wchar_to_ucs4(_PyUnicode_WSTR(unicode), end, unicode);
- PyObject_Free(_PyUnicode_WSTR(unicode));
- _PyUnicode_WSTR(unicode) = NULL;
- _PyUnicode_WSTR_LENGTH(unicode) = 0;
-#else
- assert(num_surrogates == 0);
-
- _PyUnicode_DATA_ANY(unicode) = _PyUnicode_WSTR(unicode);
- _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
- _PyUnicode_STATE(unicode).kind = PyUnicode_4BYTE_KIND;
-#endif
- PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0';
- }
- _PyUnicode_STATE(unicode).ready = 1;
- assert(_PyUnicode_CheckConsistency(unicode, 1));
- return 0;
-}
-
static void
unicode_dealloc(PyObject *unicode)
{
@@ -1953,9 +1548,6 @@ unicode_dealloc(PyObject *unicode)
Py_UNREACHABLE();
}
- if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) {
- PyObject_Free(_PyUnicode_WSTR(unicode));
- }
if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
PyObject_Free(_PyUnicode_UTF8(unicode));
}
@@ -1975,7 +1567,7 @@ unicode_is_singleton(PyObject *unicode)
}
PyASCIIObject *ascii = _PyASCIIObject_CAST(unicode);
- if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) {
+ if (ascii->length == 1) {
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && LATIN1(ch) == unicode) {
return 1;
@@ -2017,10 +1609,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
assert(PyUnicode_Check(unicode));
assert(0 <= length);
- if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND)
- old_length = PyUnicode_WSTR_LENGTH(unicode);
- else
- old_length = PyUnicode_GET_LENGTH(unicode);
+ old_length = PyUnicode_GET_LENGTH(unicode);
if (old_length == length)
return 0;
@@ -2150,28 +1739,6 @@ unicode_char(Py_UCS4 ch)
}
PyObject *
-PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
-{
- if (u == NULL) {
- if (size > 0) {
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PyUnicode_FromUnicode(NULL, size) is deprecated; "
- "use PyUnicode_New() instead", 1) < 0) {
- return NULL;
- }
- }
- return (PyObject*)_PyUnicode_New(size);
- }
-
- if (size < 0) {
- PyErr_BadInternalCall();
- return NULL;
- }
-
- return PyUnicode_FromWideChar(u, size);
-}
-
-PyObject *
PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
{
PyObject *unicode;
@@ -2264,16 +1831,12 @@ PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
if (u != NULL) {
return PyUnicode_DecodeUTF8Stateful(u, size, NULL, NULL);
}
- else {
- if (size > 0) {
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PyUnicode_FromStringAndSize(NULL, size) is deprecated; "
- "use PyUnicode_New() instead", 1) < 0) {
- return NULL;
- }
- }
- return (PyObject *)_PyUnicode_New(size);
+ if (size > 0) {
+ PyErr_SetString(PyExc_SystemError,
+ "NULL string with positive size with NULL passed to PyUnicode_FromStringAndSize");
+ return NULL;
}
+ return unicode_new_empty();
}
PyObject *
@@ -2504,7 +2067,6 @@ _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
enum PyUnicode_Kind kind;
const void *startptr, *endptr;
- assert(PyUnicode_IS_READY(unicode));
assert(0 <= start);
assert(end <= PyUnicode_GET_LENGTH(unicode));
assert(start <= end);
@@ -2547,7 +2109,6 @@ unicode_adjust_maxchar(PyObject **p_unicode)
assert(p_unicode != NULL);
unicode = *p_unicode;
- assert(PyUnicode_IS_READY(unicode));
if (PyUnicode_IS_ASCII(unicode))
return;
@@ -2591,8 +2152,6 @@ _PyUnicode_Copy(PyObject *unicode)
PyErr_BadInternalCall();
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(unicode);
copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode));
@@ -2661,8 +2220,6 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
int kind;
const void *data;
Py_ssize_t len, targetlen;
- if (PyUnicode_READY(string) == -1)
- return NULL;
kind = PyUnicode_KIND(string);
data = PyUnicode_DATA(string);
len = PyUnicode_GET_LENGTH(string);
@@ -2733,9 +2290,6 @@ unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
Py_ssize_t length, fill, arglen;
Py_UCS4 maxchar;
- if (PyUnicode_READY(str) == -1)
- return -1;
-
length = PyUnicode_GET_LENGTH(str);
if ((precision == -1 || precision >= length)
&& width <= length)
@@ -3172,13 +2726,6 @@ unicode_get_widechar_size(PyObject *unicode)
assert(unicode != NULL);
assert(_PyUnicode_CHECK(unicode));
-#if USE_UNICODE_WCHAR_CACHE
- if (_PyUnicode_WSTR(unicode) != NULL) {
- return PyUnicode_WSTR_LENGTH(unicode);
- }
-#endif /* USE_UNICODE_WCHAR_CACHE */
- assert(PyUnicode_IS_READY(unicode));
-
res = _PyUnicode_LENGTH(unicode);
#if SIZEOF_WCHAR_T == 2
if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) {
@@ -3200,19 +2747,10 @@ unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size)
assert(unicode != NULL);
assert(_PyUnicode_CHECK(unicode));
-#if USE_UNICODE_WCHAR_CACHE
- const wchar_t *wstr = _PyUnicode_WSTR(unicode);
- if (wstr != NULL) {
- memcpy(w, wstr, size * sizeof(wchar_t));
- return;
- }
-#else /* USE_UNICODE_WCHAR_CACHE */
if (PyUnicode_KIND(unicode) == sizeof(wchar_t)) {
memcpy(w, PyUnicode_DATA(unicode), size * sizeof(wchar_t));
return;
}
-#endif /* USE_UNICODE_WCHAR_CACHE */
- assert(PyUnicode_IS_READY(unicode));
if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
const Py_UCS1 *s = PyUnicode_1BYTE_DATA(unicode);
@@ -3353,26 +2891,16 @@ _PyUnicode_WideCharString_Converter(PyObject *obj, void *ptr)
{
wchar_t **p = (wchar_t **)ptr;
if (obj == NULL) {
-#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free(*p);
-#endif /* USE_UNICODE_WCHAR_CACHE */
*p = NULL;
return 1;
}
if (PyUnicode_Check(obj)) {
-#if USE_UNICODE_WCHAR_CACHE
- *p = (wchar_t *)_PyUnicode_AsUnicode(obj);
- if (*p == NULL) {
- return 0;
- }
- return 1;
-#else /* USE_UNICODE_WCHAR_CACHE */
*p = PyUnicode_AsWideCharString(obj, NULL);
if (*p == NULL) {
return 0;
}
return Py_CLEANUP_SUPPORTED;
-#endif /* USE_UNICODE_WCHAR_CACHE */
}
PyErr_Format(PyExc_TypeError,
"argument must be str, not %.50s",
@@ -3385,9 +2913,7 @@ _PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr)
{
wchar_t **p = (wchar_t **)ptr;
if (obj == NULL) {
-#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free(*p);
-#endif /* USE_UNICODE_WCHAR_CACHE */
*p = NULL;
return 1;
}
@@ -3396,19 +2922,11 @@ _PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr)
return 1;
}
if (PyUnicode_Check(obj)) {
-#if USE_UNICODE_WCHAR_CACHE
- *p = (wchar_t *)_PyUnicode_AsUnicode(obj);
- if (*p == NULL) {
- return 0;
- }
- return 1;
-#else /* USE_UNICODE_WCHAR_CACHE */
*p = PyUnicode_AsWideCharString(obj, NULL);
if (*p == NULL) {
return 0;
}
return Py_CLEANUP_SUPPORTED;
-#endif /* USE_UNICODE_WCHAR_CACHE */
}
PyErr_Format(PyExc_TypeError,
"argument must be str or None, not %.50s",
@@ -3434,8 +2952,6 @@ PyUnicode_FromObject(PyObject *obj)
/* XXX Perhaps we should make this API an alias of
PyObject_Str() instead ?! */
if (PyUnicode_CheckExact(obj)) {
- if (PyUnicode_READY(obj) == -1)
- return NULL;
Py_INCREF(obj);
return obj;
}
@@ -4161,10 +3677,6 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
Py_DECREF(path);
return 0;
}
- if (PyUnicode_READY(output) == -1) {
- Py_DECREF(output);
- return 0;
- }
if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output),
PyUnicode_GET_LENGTH(output), 0, 1) >= 0) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
@@ -4185,8 +3697,6 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
if (PyUnicode_UTF8(unicode) == NULL) {
if (unicode_fill_utf8(unicode) == -1) {
@@ -4205,85 +3715,22 @@ PyUnicode_AsUTF8(PyObject *unicode)
return PyUnicode_AsUTF8AndSize(unicode, NULL);
}
-Py_UNICODE *
-PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
-{
- if (!PyUnicode_Check(unicode)) {
- PyErr_BadArgument();
- return NULL;
- }
- Py_UNICODE *w = _PyUnicode_WSTR(unicode);
- if (w == NULL) {
- /* Non-ASCII compact unicode object */
- assert(_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
- assert(PyUnicode_IS_READY(unicode));
-
- Py_ssize_t wlen = unicode_get_widechar_size(unicode);
- if ((size_t)wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
- PyErr_NoMemory();
- return NULL;
- }
- w = (wchar_t *) PyObject_Malloc(sizeof(wchar_t) * (wlen + 1));
- if (w == NULL) {
- PyErr_NoMemory();
- return NULL;
- }
- unicode_copy_as_widechar(unicode, w, wlen + 1);
- _PyUnicode_WSTR(unicode) = w;
- if (!PyUnicode_IS_COMPACT_ASCII(unicode)) {
- _PyUnicode_WSTR_LENGTH(unicode) = wlen;
- }
- }
- if (size != NULL)
- *size = PyUnicode_WSTR_LENGTH(unicode);
- return w;
-}
-
-/* Deprecated APIs */
-
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
-
-Py_UNICODE *
-PyUnicode_AsUnicode(PyObject *unicode)
-{
- return PyUnicode_AsUnicodeAndSize(unicode, NULL);
-}
-
-const Py_UNICODE *
-_PyUnicode_AsUnicode(PyObject *unicode)
-{
- Py_ssize_t size;
- const Py_UNICODE *wstr;
-
- wstr = PyUnicode_AsUnicodeAndSize(unicode, &size);
- if (wstr && wcslen(wstr) != (size_t)size) {
- PyErr_SetString(PyExc_ValueError, "embedded null character");
- return NULL;
- }
- return wstr;
-}
-
+/*
+PyUnicode_GetSize() has been deprecated since Python 3.3
+because it returned length of Py_UNICODE.
-Py_ssize_t
+But this function is part of stable abi, because it don't
+include Py_UNICODE in signature and it was not excluded from
+stable abi in PEP 384.
+*/
+PyAPI_FUNC(Py_ssize_t)
PyUnicode_GetSize(PyObject *unicode)
{
- if (!PyUnicode_Check(unicode)) {
- PyErr_BadArgument();
- goto onError;
- }
- if (_PyUnicode_WSTR(unicode) == NULL) {
- if (PyUnicode_AsUnicode(unicode) == NULL)
- goto onError;
- }
- return PyUnicode_WSTR_LENGTH(unicode);
-
- onError:
+ PyErr_SetString(PyExc_RuntimeError,
+ "PyUnicode_GetSize has been removed.");
return -1;
}
-_Py_COMP_DIAG_POP
-
Py_ssize_t
PyUnicode_GetLength(PyObject *unicode)
{
@@ -4291,8 +3738,6 @@ PyUnicode_GetLength(PyObject *unicode)
PyErr_BadArgument();
return -1;
}
- if (PyUnicode_READY(unicode) == -1)
- return -1;
return PyUnicode_GET_LENGTH(unicode);
}
@@ -4306,9 +3751,6 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
PyErr_BadArgument();
return (Py_UCS4)-1;
}
- if (PyUnicode_READY(unicode) == -1) {
- return (Py_UCS4)-1;
- }
if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return (Py_UCS4)-1;
@@ -4325,7 +3767,6 @@ PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch)
PyErr_BadArgument();
return -1;
}
- assert(PyUnicode_IS_READY(unicode));
if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return -1;
@@ -4458,19 +3899,10 @@ unicode_decode_call_errorhandler_wchar(
goto onError;
}
-#if USE_UNICODE_WCHAR_CACHE
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
- repwlen = PyUnicode_GetSize(repunicode);
- if (repwlen < 0)
- goto onError;
-_Py_COMP_DIAG_POP
-#else /* USE_UNICODE_WCHAR_CACHE */
repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0);
if (repwlen < 0)
goto onError;
repwlen--;
-#endif /* USE_UNICODE_WCHAR_CACHE */
/* need more space? (at least enough for what we
have+the replacement+the rest of the string (starting
at the new input position), so we won't have to check space
@@ -4920,8 +4352,6 @@ _PyUnicode_EncodeUTF7(PyObject *str,
char * out;
const char * start;
- if (PyUnicode_READY(str) == -1)
- return NULL;
kind = PyUnicode_KIND(str);
data = PyUnicode_DATA(str);
len = PyUnicode_GET_LENGTH(str);
@@ -5550,9 +4980,6 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
-
if (PyUnicode_UTF8(unicode))
return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode),
PyUnicode_UTF8_LENGTH(unicode));
@@ -5833,8 +5260,6 @@ _PyUnicode_EncodeUTF32(PyObject *str,
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(str) == -1)
- return NULL;
kind = PyUnicode_KIND(str);
data = PyUnicode_DATA(str);
len = PyUnicode_GET_LENGTH(str);
@@ -5901,8 +5326,6 @@ _PyUnicode_EncodeUTF32(PyObject *str,
}
else {
assert(PyUnicode_Check(rep));
- if (PyUnicode_READY(rep) < 0)
- goto error;
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
if (!PyUnicode_IS_ASCII(rep)) {
raise_encode_exception(&exc, encoding,
@@ -6155,8 +5578,6 @@ _PyUnicode_EncodeUTF16(PyObject *str,
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(str) == -1)
- return NULL;
kind = PyUnicode_KIND(str);
data = PyUnicode_DATA(str);
len = PyUnicode_GET_LENGTH(str);
@@ -6240,8 +5661,6 @@ _PyUnicode_EncodeUTF16(PyObject *str,
}
else {
assert(PyUnicode_Check(rep));
- if (PyUnicode_READY(rep) < 0)
- goto error;
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
if (!PyUnicode_IS_ASCII(rep)) {
raise_encode_exception(&exc, encoding,
@@ -6619,9 +6038,6 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(unicode) == -1) {
- return NULL;
- }
len = PyUnicode_GET_LENGTH(unicode);
if (len == 0) {
@@ -6876,9 +6292,6 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(unicode) == -1) {
- return NULL;
- }
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
len = PyUnicode_GET_LENGTH(unicode);
@@ -7015,8 +6428,6 @@ unicode_encode_call_errorhandler(const char *errors,
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
len = PyUnicode_GET_LENGTH(unicode);
make_encode_exception(exceptionObject,
@@ -7074,8 +6485,6 @@ unicode_encode_ucs1(PyObject *unicode,
/* output object */
_PyBytesWriter writer;
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
size = PyUnicode_GET_LENGTH(unicode);
kind = PyUnicode_KIND(unicode);
data = PyUnicode_DATA(unicode);
@@ -7194,9 +6603,6 @@ unicode_encode_ucs1(PyObject *unicode,
else {
assert(PyUnicode_Check(rep));
- if (PyUnicode_READY(rep) < 0)
- goto onError;
-
if (limit == 256 ?
PyUnicode_KIND(rep) != PyUnicode_1BYTE_KIND :
!PyUnicode_IS_ASCII(rep))
@@ -7243,8 +6649,6 @@ _PyUnicode_AsLatin1String(PyObject *unicode, const char *errors)
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
/* Fast path: if it is a one-byte string, construct
bytes object directly. */
if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND)
@@ -7369,8 +6773,6 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
/* Fast path: if it is an ASCII-only string, construct bytes object
directly. Else defer to above function to raise the exception. */
if (PyUnicode_IS_ASCII(unicode))
@@ -7758,22 +7160,11 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
substring = PyUnicode_Substring(unicode, offset, offset+len);
if (substring == NULL)
return -1;
-#if USE_UNICODE_WCHAR_CACHE
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
- p = PyUnicode_AsUnicodeAndSize(substring, &size);
- if (p == NULL) {
- Py_DECREF(substring);
- return -1;
- }
-_Py_COMP_DIAG_POP
-#else /* USE_UNICODE_WCHAR_CACHE */
p = PyUnicode_AsWideCharString(substring, &size);
Py_CLEAR(substring);
if (p == NULL) {
return -1;
}
-#endif /* USE_UNICODE_WCHAR_CACHE */
assert(size <= INT_MAX);
/* First get the size of the result */
@@ -7824,11 +7215,7 @@ _Py_COMP_DIAG_POP
ret = 0;
done:
-#if USE_UNICODE_WCHAR_CACHE
- Py_DECREF(substring);
-#else /* USE_UNICODE_WCHAR_CACHE */
PyMem_Free(p);
-#endif /* USE_UNICODE_WCHAR_CACHE */
return ret;
error:
@@ -7981,11 +7368,6 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
enum PyUnicode_Kind kind;
const void *data;
- if (PyUnicode_READY(rep) == -1) {
- Py_DECREF(rep);
- goto error;
- }
-
outsize = PyUnicode_GET_LENGTH(rep);
morebytes += outsize;
if (morebytes > 0) {
@@ -8046,8 +7428,6 @@ encode_code_page(int code_page,
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
len = PyUnicode_GET_LENGTH(unicode);
if (code_page < 0) {
@@ -8129,9 +7509,6 @@ charmap_decode_string(const char *s,
Py_UCS4 x;
unsigned char ch;
- if (PyUnicode_READY(mapping) == -1)
- return -1;
-
maplen = PyUnicode_GET_LENGTH(mapping);
mapdata = PyUnicode_DATA(mapping);
mapkind = PyUnicode_KIND(mapping);
@@ -8284,8 +7661,6 @@ charmap_decode_mapping(const char *s,
goto onError;
}
else if (PyUnicode_Check(item)) {
- if (PyUnicode_READY(item) == -1)
- goto onError;
if (PyUnicode_GET_LENGTH(item) == 1) {
Py_UCS4 value = PyUnicode_READ_CHAR(item, 0);
if (value == 0xFFFE)
@@ -8699,8 +8074,6 @@ charmap_encoding_error(
Py_UCS4 ch;
int val;
- if (PyUnicode_READY(unicode) == -1)
- return -1;
size = PyUnicode_GET_LENGTH(unicode);
/* find all unencodable characters */
while (collendpos < size) {
@@ -8796,10 +8169,6 @@ charmap_encoding_error(
break;
}
/* generate replacement */
- if (PyUnicode_READY(repunicode) == -1) {
- Py_DECREF(repunicode);
- return -1;
- }
repsize = PyUnicode_GET_LENGTH(repunicode);
data = PyUnicode_DATA(repunicode);
kind = PyUnicode_KIND(repunicode);
@@ -8840,8 +8209,6 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
const void *data;
int kind;
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
size = PyUnicode_GET_LENGTH(unicode);
data = PyUnicode_DATA(unicode);
kind = PyUnicode_KIND(unicode);
@@ -9120,10 +8487,6 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
else if (PyUnicode_Check(item)) {
Py_UCS4 replace;
- if (PyUnicode_READY(item) == -1) {
- Py_DECREF(item);
- return -1;
- }
if (PyUnicode_GET_LENGTH(item) != 1)
goto exit;
@@ -9220,8 +8583,6 @@ _PyUnicode_TranslateCharmap(PyObject *input,
return NULL;
}
- if (PyUnicode_READY(input) == -1)
- return NULL;
data = PyUnicode_DATA(input);
kind = PyUnicode_KIND(input);
size = PyUnicode_GET_LENGTH(input);
@@ -9237,8 +8598,6 @@ _PyUnicode_TranslateCharmap(PyObject *input,
ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
- if (PyUnicode_READY(input) == -1)
- return NULL;
if (PyUnicode_IS_ASCII(input)) {
res = unicode_fast_translate(input, mapping, &writer, ignore, &i);
if (res < 0) {
@@ -9334,8 +8693,6 @@ _PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode)
PyErr_BadInternalCall();
return NULL;
}
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
if (PyUnicode_IS_ASCII(unicode)) {
/* If the string is already ASCII, just return the same string */
Py_INCREF(unicode);
@@ -9527,15 +8884,6 @@ _PyUnicode_InsertThousandsGrouping(
assert(0 <= n_digits);
assert(grouping != NULL);
- if (digits != NULL) {
- if (PyUnicode_READY(digits) == -1) {
- return -1;
- }
- }
- if (PyUnicode_READY(thousands_sep) == -1) {
- return -1;
- }
-
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
int loop_broken = 0;
@@ -9716,8 +9064,6 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
{
int kind;
Py_ssize_t len, result;
- if (PyUnicode_READY(str) == -1)
- return -2;
len = PyUnicode_GET_LENGTH(str);
ADJUST_INDICES(start, end, len);
if (end - start < 1)
@@ -9746,10 +9092,6 @@ tailmatch(PyObject *self,
Py_ssize_t i;
Py_ssize_t end_sub;
- if (PyUnicode_READY(self) == -1 ||
- PyUnicode_READY(substring) == -1)
- return -1;
-
ADJUST_INDICES(start, end, PyUnicode_GET_LENGTH(self));
end -= PyUnicode_GET_LENGTH(substring);
if (end < start)
@@ -10008,8 +9350,6 @@ case_operation(PyObject *self,
void *outdata;
Py_UCS4 maxchar = 0, *tmp, *tmpend;
- assert(PyUnicode_IS_READY(self));
-
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
length = PyUnicode_GET_LENGTH(self);
@@ -10118,8 +9458,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq
Py_TYPE(separator)->tp_name);
goto onError;
}
- if (PyUnicode_READY(separator))
- goto onError;
sep = separator;
seplen = PyUnicode_GET_LENGTH(separator);
maxchar = PyUnicode_MAX_CHAR_VALUE(separator);
@@ -10151,8 +9489,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq
i, Py_TYPE(item)->tp_name);
goto onError;
}
- if (PyUnicode_READY(item) == -1)
- goto onError;
add_sz = PyUnicode_GET_LENGTH(item);
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
maxchar = Py_MAX(maxchar, item_maxchar);
@@ -10247,7 +9583,6 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
{
const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
void *data = PyUnicode_DATA(unicode);
- assert(PyUnicode_IS_READY(unicode));
assert(unicode_modifiable(unicode));
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
assert(start >= 0);
@@ -10265,8 +9600,6 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
PyErr_BadInternalCall();
return -1;
}
- if (PyUnicode_READY(unicode) == -1)
- return -1;
if (unicode_check_modifiable(unicode))
return -1;
@@ -10379,9 +9712,6 @@ split(PyObject *self,
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
if (substring == NULL)
switch (PyUnicode_KIND(self)) {
case PyUnicode_1BYTE_KIND:
@@ -10409,9 +9739,6 @@ split(PyObject *self,
Py_UNREACHABLE();
}
- if (PyUnicode_READY(substring) == -1)
- return NULL;
-
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
len1 = PyUnicode_GET_LENGTH(self);
@@ -10471,9 +9798,6 @@ rsplit(PyObject *self,
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
if (substring == NULL)
switch (PyUnicode_KIND(self)) {
case PyUnicode_1BYTE_KIND:
@@ -10501,9 +9825,6 @@ rsplit(PyObject *self,
Py_UNREACHABLE();
}
- if (PyUnicode_READY(substring) == -1)
- return NULL;
-
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
len1 = PyUnicode_GET_LENGTH(self);
@@ -10905,8 +10226,6 @@ static PyObject *
unicode_title_impl(PyObject *self)
/*[clinic end generated code: output=c75ae03809574902 input=fa945d669b26e683]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
return case_operation(self, do_title);
}
@@ -10923,8 +10242,6 @@ static PyObject *
unicode_capitalize_impl(PyObject *self)
/*[clinic end generated code: output=e49a4c333cdb7667 input=f4cbf1016938da6d]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
if (PyUnicode_GET_LENGTH(self) == 0)
return unicode_result_unchanged(self);
return case_operation(self, do_capitalize);
@@ -10940,8 +10257,6 @@ static PyObject *
unicode_casefold_impl(PyObject *self)
/*[clinic end generated code: output=0120daf657ca40af input=384d66cc2ae30daf]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
if (PyUnicode_IS_ASCII(self))
return ascii_upper_or_lower(self, 1);
return case_operation(self, do_casefold);
@@ -10961,8 +10276,6 @@ convert_uc(PyObject *obj, void *addr)
"not %.100s", Py_TYPE(obj)->tp_name);
return 0;
}
- if (PyUnicode_READY(obj) < 0)
- return 0;
if (PyUnicode_GET_LENGTH(obj) != 1) {
PyErr_SetString(PyExc_TypeError,
"The fill character must be exactly one character long");
@@ -10990,9 +10303,6 @@ unicode_center_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar)
{
Py_ssize_t marg, left;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
if (PyUnicode_GET_LENGTH(self) >= width)
return unicode_result_unchanged(self);
@@ -11149,9 +10459,6 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2)
if (str1 == str2) {
return 1;
}
- if (PyUnicode_READY(str1) || PyUnicode_READY(str2)) {
- return -1;
- }
return unicode_compare_eq(str1, str2);
}
@@ -11160,10 +10467,6 @@ int
PyUnicode_Compare(PyObject *left, PyObject *right)
{
if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
- if (PyUnicode_READY(left) == -1 ||
- PyUnicode_READY(right) == -1)
- return -1;
-
/* a string is equal to itself */
if (left == right)
return 0;
@@ -11183,24 +10486,8 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
Py_ssize_t i;
int kind;
Py_UCS4 chr;
- const unsigned char *ustr = (const unsigned char *)str;
assert(_PyUnicode_CHECK(uni));
- if (!PyUnicode_IS_READY(uni)) {
- const wchar_t *ws = _PyUnicode_WSTR(uni);
- /* Compare Unicode string and source character set string */
- for (i = 0; (chr = ws[i]) && ustr[i]; i++) {
- if (chr != ustr[i])
- return (chr < ustr[i]) ? -1 : 1;
- }
- /* This check keeps Python strings that end in '\0' from comparing equal
- to C strings identical up to that point. */
- if (_PyUnicode_WSTR_LENGTH(uni) != i || chr)
- return 1; /* uni is longer */
- if (ustr[i])
- return -1; /* str is longer */
- return 0;
- }
kind = PyUnicode_KIND(uni);
if (kind == PyUnicode_1BYTE_KIND) {
const void *data = PyUnicode_1BYTE_DATA(uni);
@@ -11238,24 +10525,6 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
}
}
-static int
-non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str)
-{
- size_t i, len;
- const wchar_t *p;
- len = (size_t)_PyUnicode_WSTR_LENGTH(unicode);
- if (strlen(str) != len)
- return 0;
- p = _PyUnicode_WSTR(unicode);
- assert(p);
- for (i = 0; i < len; i++) {
- unsigned char c = (unsigned char)str[i];
- if (c >= 128 || p[i] != (wchar_t)c)
- return 0;
- }
- return 1;
-}
-
int
_PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str)
{
@@ -11267,11 +10536,6 @@ _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str)
assert((unsigned char)*p < 128);
}
#endif
- if (PyUnicode_READY(unicode) == -1) {
- /* Memory error or bad data */
- PyErr_Clear();
- return non_ready_unicode_equal_to_ascii_string(unicode, str);
- }
if (!PyUnicode_IS_ASCII(unicode))
return 0;
len = (size_t)PyUnicode_GET_LENGTH(unicode);
@@ -11292,12 +10556,6 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
}
#endif
- if (PyUnicode_READY(left) == -1) {
- /* memory error or bad data */
- PyErr_Clear();
- return non_ready_unicode_equal_to_ascii_string(left, right->string);
- }
-
if (!PyUnicode_IS_ASCII(left))
return 0;
@@ -11333,10 +10591,6 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
if (!PyUnicode_Check(left) || !PyUnicode_Check(right))
Py_RETURN_NOTIMPLEMENTED;
- if (PyUnicode_READY(left) == -1 ||
- PyUnicode_READY(right) == -1)
- return NULL;
-
if (left == right) {
switch (op) {
case Py_EQ:
@@ -11384,8 +10638,6 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
Py_TYPE(substr)->tp_name);
return -1;
}
- if (PyUnicode_READY(substr) == -1)
- return -1;
if (ensure_unicode(str) < 0)
return -1;
@@ -11449,8 +10701,6 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
Py_TYPE(right)->tp_name);
return NULL;
}
- if (PyUnicode_READY(right) < 0)
- return NULL;
/* Shortcuts */
PyObject *empty = unicode_get_empty(); // Borrowed reference
@@ -11504,11 +10754,6 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
goto error;
}
- if (PyUnicode_READY(left) == -1)
- goto error;
- if (PyUnicode_READY(right) == -1)
- goto error;
-
/* Shortcuts */
PyObject *empty = unicode_get_empty(); // Borrowed reference
if (left == empty) {
@@ -11575,7 +10820,7 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right)
}
/*
-Wraps stringlib_parse_args_finds() and additionally ensures that the
+Wraps asciilib_parse_args_finds() and additionally ensures that the
first argument is a unicode object.
*/
@@ -11584,8 +10829,7 @@ parse_args_finds_unicode(const char * function_name, PyObject *args,
PyObject **substring,
Py_ssize_t *start, Py_ssize_t *end)
{
- if(stringlib_parse_args_finds(function_name, args, substring,
- start, end)) {
+ if (asciilib_parse_args_finds(function_name, args, substring, start, end)) {
if (ensure_unicode(*substring) < 0)
return 0;
return 1;
@@ -11708,9 +10952,6 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
int kind;
int found;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
/* First pass: determine size of output string */
src_len = PyUnicode_GET_LENGTH(self);
i = j = line_pos = 0;
@@ -11796,9 +11037,6 @@ unicode_find(PyObject *self, PyObject *args)
if (!parse_args_finds_unicode("find", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
result = any_find_slice(self, substring, start, end, 1);
if (result == -2)
@@ -11818,9 +11056,6 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
PyErr_BadArgument();
return NULL;
}
- if (PyUnicode_READY(self) == -1) {
- return NULL;
- }
if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
@@ -11843,8 +11078,6 @@ unicode_hash(PyObject *self)
#endif
if (_PyUnicode_HASH(self) != -1)
return _PyUnicode_HASH(self);
- if (PyUnicode_READY(self) == -1)
- return -1;
x = _Py_HashBytes(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
@@ -11873,9 +11106,6 @@ unicode_index(PyObject *self, PyObject *args)
if (!parse_args_finds_unicode("index", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
result = any_find_slice(self, substring, start, end, 1);
if (result == -2)
@@ -11902,9 +11132,6 @@ static PyObject *
unicode_isascii_impl(PyObject *self)
/*[clinic end generated code: output=c5910d64b5a8003f input=5a43cbc6399621d5]*/
{
- if (PyUnicode_READY(self) == -1) {
- return NULL;
- }
return PyBool_FromLong(PyUnicode_IS_ASCII(self));
}
@@ -11926,8 +11153,6 @@ unicode_islower_impl(PyObject *self)
const void *data;
int cased;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -11971,8 +11196,6 @@ unicode_isupper_impl(PyObject *self)
const void *data;
int cased;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12016,8 +11239,6 @@ unicode_istitle_impl(PyObject *self)
const void *data;
int cased, previous_is_cased;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12073,8 +11294,6 @@ unicode_isspace_impl(PyObject *self)
int kind;
const void *data;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12113,8 +11332,6 @@ unicode_isalpha_impl(PyObject *self)
int kind;
const void *data;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12152,9 +11369,6 @@ unicode_isalnum_impl(PyObject *self)
const void *data;
Py_ssize_t len, i;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
len = PyUnicode_GET_LENGTH(self);
@@ -12194,8 +11408,6 @@ unicode_isdecimal_impl(PyObject *self)
int kind;
const void *data;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12233,8 +11445,6 @@ unicode_isdigit_impl(PyObject *self)
int kind;
const void *data;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12273,8 +11483,6 @@ unicode_isnumeric_impl(PyObject *self)
int kind;
const void *data;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12299,9 +11507,6 @@ Py_ssize_t
_PyUnicode_ScanIdentifier(PyObject *self)
{
Py_ssize_t i;
- if (PyUnicode_READY(self) == -1)
- return -1;
-
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
if (len == 0) {
/* an empty string is not a valid identifier */
@@ -12335,54 +11540,10 @@ _PyUnicode_ScanIdentifier(PyObject *self)
int
PyUnicode_IsIdentifier(PyObject *self)
{
- if (PyUnicode_IS_READY(self)) {
- Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
- Py_ssize_t len = PyUnicode_GET_LENGTH(self);
- /* an empty string is not a valid identifier */
- return len && i == len;
- }
- else {
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
- Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
- if (len == 0) {
- /* an empty string is not a valid identifier */
- return 0;
- }
-
- const wchar_t *wstr = _PyUnicode_WSTR(self);
- Py_UCS4 ch = wstr[i++];
-#if SIZEOF_WCHAR_T == 2
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
- && i < len
- && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
- {
- ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
- i++;
- }
-#endif
- if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
- return 0;
- }
-
- while (i < len) {
- ch = wstr[i++];
-#if SIZEOF_WCHAR_T == 2
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
- && i < len
- && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
- {
- ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
- i++;
- }
-#endif
- if (!_PyUnicode_IsXidContinue(ch)) {
- return 0;
- }
- }
- return 1;
-_Py_COMP_DIAG_POP
- }
+ Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
+ Py_ssize_t len = PyUnicode_GET_LENGTH(self);
+ /* an empty string is not a valid identifier */
+ return len && i == len;
}
/*[clinic input]
@@ -12418,8 +11579,6 @@ unicode_isprintable_impl(PyObject *self)
int kind;
const void *data;
- if (PyUnicode_READY(self) == -1)
- return NULL;
length = PyUnicode_GET_LENGTH(self);
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
@@ -12461,8 +11620,6 @@ unicode_join(PyObject *self, PyObject *iterable)
static Py_ssize_t
unicode_length(PyObject *self)
{
- if (PyUnicode_READY(self) == -1)
- return -1;
return PyUnicode_GET_LENGTH(self);
}
@@ -12482,9 +11639,6 @@ static PyObject *
unicode_ljust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar)
/*[clinic end generated code: output=1cce0e0e0a0b84b3 input=3ab599e335e60a32]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
if (PyUnicode_GET_LENGTH(self) >= width)
return unicode_result_unchanged(self);
@@ -12501,8 +11655,6 @@ static PyObject *
unicode_lower_impl(PyObject *self)
/*[clinic end generated code: output=84ef9ed42efad663 input=60a2984b8beff23a]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
if (PyUnicode_IS_ASCII(self))
return ascii_upper_or_lower(self, 1);
return case_operation(self, do_lower);
@@ -12527,9 +11679,6 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
BLOOM_MASK sepmask;
Py_ssize_t seplen;
- if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
- return NULL;
-
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
len = PyUnicode_GET_LENGTH(self);
@@ -12575,9 +11724,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
int kind;
Py_ssize_t length;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
length = PyUnicode_GET_LENGTH(self);
end = Py_MIN(end, length);
@@ -12610,9 +11756,6 @@ do_strip(PyObject *self, int striptype)
{
Py_ssize_t len, i, j;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
len = PyUnicode_GET_LENGTH(self);
if (PyUnicode_IS_ASCII(self)) {
@@ -12759,9 +11902,6 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
if (len == 1)
return unicode_result_unchanged(str);
- if (PyUnicode_READY(str) == -1)
- return NULL;
-
if (PyUnicode_GET_LENGTH(str) > PY_SSIZE_T_MAX / len) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
@@ -12836,8 +11976,6 @@ unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new,
Py_ssize_t count)
/*[clinic end generated code: output=b63f1a8b5eebf448 input=147d12206276ebeb]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
return replace(self, old, new, count);
}
@@ -12907,9 +12045,6 @@ unicode_repr(PyObject *unicode)
const void *idata;
void *odata;
- if (PyUnicode_READY(unicode) == -1)
- return NULL;
-
isize = PyUnicode_GET_LENGTH(unicode);
idata = PyUnicode_DATA(unicode);
@@ -13082,9 +12217,6 @@ unicode_rfind(PyObject *self, PyObject *args)
if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
result = any_find_slice(self, substring, start, end, -1);
if (result == -2)
@@ -13114,9 +12246,6 @@ unicode_rindex(PyObject *self, PyObject *args)
if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end))
return NULL;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
result = any_find_slice(self, substring, start, end, -1);
if (result == -2)
@@ -13146,9 +12275,6 @@ static PyObject *
unicode_rjust_impl(PyObject *self, Py_ssize_t width, Py_UCS4 fillchar)
/*[clinic end generated code: output=804a1a57fbe8d5cf input=d05f550b5beb1f72]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
if (PyUnicode_GET_LENGTH(self) >= width)
return unicode_result_unchanged(self);
@@ -13412,8 +12538,6 @@ static PyObject *
unicode_swapcase_impl(PyObject *self)
/*[clinic end generated code: output=5d28966bf6d7b2af input=3f3ef96d5798a7bb]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
return case_operation(self, do_swapcase);
}
@@ -13579,8 +12703,6 @@ static PyObject *
unicode_upper_impl(PyObject *self)
/*[clinic end generated code: output=1b7ddd16bbcdc092 input=db3d55682dfe2e6c]*/
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
if (PyUnicode_IS_ASCII(self))
return ascii_upper_or_lower(self, 0);
return case_operation(self, do_upper);
@@ -13607,9 +12729,6 @@ unicode_zfill_impl(PyObject *self, Py_ssize_t width)
const void *data;
Py_UCS4 chr;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
if (PyUnicode_GET_LENGTH(self) >= width)
return unicode_result_unchanged(self);
@@ -13652,7 +12771,7 @@ unicode_startswith(PyObject *self,
Py_ssize_t end = PY_SSIZE_T_MAX;
int result;
- if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
+ if (!asciilib_parse_args_finds("startswith", args, &subobj, &start, &end))
return NULL;
if (PyTuple_Check(subobj)) {
Py_ssize_t i;
@@ -13706,7 +12825,7 @@ unicode_endswith(PyObject *self,
Py_ssize_t end = PY_SSIZE_T_MAX;
int result;
- if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
+ if (!asciilib_parse_args_finds("endswith", args, &subobj, &start, &end))
return NULL;
if (PyTuple_Check(subobj)) {
Py_ssize_t i;
@@ -13753,7 +12872,7 @@ _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
else {
/* use a value smaller than PyUnicode_1BYTE_KIND() so
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
- writer->kind = PyUnicode_WCHAR_KIND;
+ writer->kind = 0;
assert(writer->kind <= PyUnicode_1BYTE_KIND);
/* Copy-on-write mode: set buffer size to 0 so
@@ -13773,7 +12892,7 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
/* use a value smaller than PyUnicode_1BYTE_KIND() so
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
- writer->kind = PyUnicode_WCHAR_KIND;
+ writer->kind = 0;
assert(writer->kind <= PyUnicode_1BYTE_KIND);
}
@@ -13908,8 +13027,6 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
Py_UCS4 maxchar;
Py_ssize_t len;
- if (PyUnicode_READY(str) == -1)
- return -1;
len = PyUnicode_GET_LENGTH(str);
if (len == 0)
return 0;
@@ -13940,9 +13057,6 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
Py_UCS4 maxchar;
Py_ssize_t len;
- if (PyUnicode_READY(str) == -1)
- return -1;
-
assert(0 <= start);
assert(end <= PyUnicode_GET_LENGTH(str));
assert(start <= end);
@@ -14071,7 +13185,7 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
}
assert(_PyUnicode_CheckConsistency(str, 1));
- return unicode_result_ready(str);
+ return unicode_result(str);
}
void
@@ -14110,8 +13224,6 @@ unicode___format___impl(PyObject *self, PyObject *format_spec)
_PyUnicodeWriter writer;
int ret;
- if (PyUnicode_READY(self) == -1)
- return NULL;
_PyUnicodeWriter_Init(&writer);
ret = _PyUnicode_FormatAdvancedWriter(&writer,
self, format_spec, 0,
@@ -14137,11 +13249,13 @@ unicode_sizeof_impl(PyObject *self)
/* If it's a compact object, account for base structure +
character data. */
- if (PyUnicode_IS_COMPACT_ASCII(self))
+ if (PyUnicode_IS_COMPACT_ASCII(self)) {
size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(self) + 1;
- else if (PyUnicode_IS_COMPACT(self))
+ }
+ else if (PyUnicode_IS_COMPACT(self)) {
size = sizeof(PyCompactUnicodeObject) +
(PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self);
+ }
else {
/* If it is a two-block object, account for base object, and
for character block if present. */
@@ -14150,10 +13264,6 @@ unicode_sizeof_impl(PyObject *self)
size += (PyUnicode_GET_LENGTH(self) + 1) *
PyUnicode_KIND(self);
}
- /* If the wstr pointer is present, account for it unless it is shared
- with the data pointer. Check if the data is not shared. */
- if (_PyUnicode_HAS_WSTR_MEMORY(self))
- size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t);
if (_PyUnicode_HAS_UTF8_MEMORY(self))
size += PyUnicode_UTF8_LENGTH(self) + 1;
@@ -14252,9 +13362,6 @@ static PySequenceMethods unicode_as_sequence = {
static PyObject*
unicode_subscript(PyObject* self, PyObject* item)
{
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
if (_PyIndex_Check(item)) {
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
if (i == -1 && PyErr_Occurred())
@@ -14478,7 +13585,6 @@ _PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
return NULL;
assert(unicode_modifiable(result));
- assert(PyUnicode_IS_READY(result));
assert(PyUnicode_IS_ASCII(result));
/* To modify the string in-place, there can only be one reference. */
@@ -15014,9 +14120,6 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
if (arg->sign && arg->flags & F_ZERO)
fill = '0';
- if (PyUnicode_READY(str) == -1)
- return -1;
-
len = PyUnicode_GET_LENGTH(str);
if ((arg->width == -1 || arg->width <= len)
&& (arg->prec == -1 || arg->prec >= len)
@@ -15318,15 +14421,12 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
{
PyObject *self;
Py_ssize_t length, char_size;
- int share_wstr, share_utf8;
+ int share_utf8;
unsigned int kind;
void *data;
assert(PyType_IsSubtype(type, &PyUnicode_Type));
assert(_PyUnicode_CHECK(unicode));
- if (PyUnicode_READY(unicode) == -1) {
- return NULL;
- }
self = type->tp_alloc(type, 0);
if (self == NULL) {
@@ -15345,15 +14445,11 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
_PyUnicode_STATE(self).kind = kind;
_PyUnicode_STATE(self).compact = 0;
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
- _PyUnicode_STATE(self).ready = 1;
- _PyUnicode_WSTR(self) = NULL;
_PyUnicode_UTF8_LENGTH(self) = 0;
_PyUnicode_UTF8(self) = NULL;
- _PyUnicode_WSTR_LENGTH(self) = 0;
_PyUnicode_DATA_ANY(self) = NULL;
share_utf8 = 0;
- share_wstr = 0;
if (kind == PyUnicode_1BYTE_KIND) {
char_size = 1;
if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
@@ -15361,14 +14457,10 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
}
else if (kind == PyUnicode_2BYTE_KIND) {
char_size = 2;
- if (sizeof(wchar_t) == 2)
- share_wstr = 1;
}
else {
assert(kind == PyUnicode_4BYTE_KIND);
char_size = 4;
- if (sizeof(wchar_t) == 4)
- share_wstr = 1;
}
/* Ensure we won't overflow the length. */
@@ -15387,13 +14479,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
_PyUnicode_UTF8_LENGTH(self) = length;
_PyUnicode_UTF8(self) = data;
}
- if (share_wstr) {
- _PyUnicode_WSTR_LENGTH(self) = length;
- _PyUnicode_WSTR(self) = (wchar_t *)data;
- }
- memcpy(data, PyUnicode_DATA(unicode),
- kind * (length + 1));
+ memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1));
assert(_PyUnicode_CheckConsistency(self, 1));
#ifdef Py_DEBUG
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
@@ -15563,11 +14650,6 @@ PyUnicode_InternInPlace(PyObject **p)
}
#ifdef INTERNED_STRINGS
- if (PyUnicode_READY(s) == -1) {
- PyErr_Clear();
- return;
- }
-
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
@@ -15656,8 +14738,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
Py_ssize_t pos = 0;
PyObject *s, *ignored_value;
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
- assert(PyUnicode_IS_READY(s));
-
switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL:
Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
@@ -15779,7 +14859,7 @@ unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
} else {
- PyObject *u = (PyObject *)_PyUnicode_New(0);
+ PyObject *u = unicode_new_empty();
if (u == NULL)
return NULL;
return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), u);
@@ -15871,8 +14951,6 @@ unicode_iter(PyObject *seq)
PyErr_BadInternalCall();
return NULL;
}
- if (PyUnicode_READY(seq) == -1)
- return NULL;
if (PyUnicode_IS_COMPACT_ASCII(seq)) {
it = PyObject_GC_New(unicodeiterobject, &_PyUnicodeASCIIIter_Type);
}
@@ -16120,20 +15198,8 @@ static void unicode_static_dealloc(PyObject *op)
assert(ascii->state.compact);
- if (ascii->state.ascii) {
- if (ascii->wstr) {
- PyObject_Free(ascii->wstr);
- ascii->wstr = NULL;
- }
- }
- else {
+ if (!ascii->state.ascii) {
PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
- void* data = (void*)(compact + 1);
- if (ascii->wstr && ascii->wstr != data) {
- PyObject_Free(ascii->wstr);
- ascii->wstr = NULL;
- compact->wstr_length = 0;
- }
if (compact->utf8) {
PyObject_Free(compact->utf8);
compact->utf8 = NULL;