summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/cpython/unicodeobject.h6
-rw-r--r--Objects/unicodeobject.c95
2 files changed, 46 insertions, 55 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index 0df6479..81a35cd 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -726,12 +726,6 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
Py_ssize_t start,
Py_ssize_t end);
-/* --- wchar_t support for platforms which support it --------------------- */
-
-#ifdef HAVE_WCHAR_H
-PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
-#endif
-
/* --- Manage the default encoding ---------------------------------------- */
/* Returns a pointer to the default encoding (UTF-8) of the
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3d99f11..9d51c8a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2043,9 +2043,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
void *data = PyUnicode_DATA(unicode);
const char *end = str + len;
+ assert(index + len <= PyUnicode_GET_LENGTH(unicode));
switch (kind) {
case PyUnicode_1BYTE_KIND: {
- assert(index + len <= PyUnicode_GET_LENGTH(unicode));
#ifdef Py_DEBUG
if (PyUnicode_IS_ASCII(unicode)) {
Py_UCS4 maxchar = ucs1lib_find_max_char(
@@ -2060,7 +2060,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
case PyUnicode_2BYTE_KIND: {
Py_UCS2 *start = (Py_UCS2 *)data + index;
Py_UCS2 *ucs2 = start;
- assert(index <= PyUnicode_GET_LENGTH(unicode));
for (; str < end; ++ucs2, ++str)
*ucs2 = (Py_UCS2)*str;
@@ -2068,17 +2067,18 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
break;
}
- default: {
+ case PyUnicode_4BYTE_KIND: {
Py_UCS4 *start = (Py_UCS4 *)data + index;
Py_UCS4 *ucs4 = start;
- assert(kind == PyUnicode_4BYTE_KIND);
- assert(index <= PyUnicode_GET_LENGTH(unicode));
for (; str < end; ++ucs4, ++str)
*ucs4 = (Py_UCS4)*str;
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
+ break;
}
+ default:
+ Py_UNREACHABLE();
}
}
@@ -2458,13 +2458,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
if (max_char >= 256)
return;
}
- else {
+ else if (kind == PyUnicode_4BYTE_KIND) {
const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
- assert(kind == PyUnicode_4BYTE_KIND);
max_char = ucs4lib_find_max_char(u, u + len);
if (max_char >= 0x10000)
return;
}
+ else
+ Py_UNREACHABLE();
+
copy = PyUnicode_New(len, max_char);
if (copy != NULL)
_PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
@@ -2501,22 +2503,12 @@ _PyUnicode_Copy(PyObject *unicode)
/* Widen Unicode objects to larger buffers. Don't write terminating null
character. Return NULL on error. */
-void*
-_PyUnicode_AsKind(PyObject *s, unsigned int kind)
+static void*
+unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
{
- Py_ssize_t len;
void *result;
- unsigned int skind;
- if (PyUnicode_READY(s) == -1)
- return NULL;
-
- len = PyUnicode_GET_LENGTH(s);
- skind = PyUnicode_KIND(s);
- if (skind >= kind) {
- PyErr_SetString(PyExc_SystemError, "invalid widening attempt");
- return NULL;
- }
+ assert(skind < kind);
switch (kind) {
case PyUnicode_2BYTE_KIND:
result = PyMem_New(Py_UCS2, len);
@@ -2525,8 +2517,8 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS2,
- PyUnicode_1BYTE_DATA(s),
- PyUnicode_1BYTE_DATA(s) + len,
+ (const Py_UCS1 *)data,
+ ((const Py_UCS1 *)data) + len,
result);
return result;
case PyUnicode_4BYTE_KIND:
@@ -2536,24 +2528,23 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
if (skind == PyUnicode_2BYTE_KIND) {
_PyUnicode_CONVERT_BYTES(
Py_UCS2, Py_UCS4,
- PyUnicode_2BYTE_DATA(s),
- PyUnicode_2BYTE_DATA(s) + len,
+ (const Py_UCS2 *)data,
+ ((const Py_UCS2 *)data) + len,
result);
}
else {
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS4,
- PyUnicode_1BYTE_DATA(s),
- PyUnicode_1BYTE_DATA(s) + len,
+ (const Py_UCS1 *)data,
+ ((const Py_UCS1 *)data) + len,
result);
}
return result;
default:
- break;
+ Py_UNREACHABLE();
+ return NULL;
}
- PyErr_SetString(PyExc_SystemError, "invalid kind");
- return NULL;
}
static Py_UCS4*
@@ -9420,7 +9411,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
}
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(s2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -2;
}
@@ -9642,7 +9633,7 @@ PyUnicode_Count(PyObject *str,
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(substr, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
goto onError;
}
@@ -10415,7 +10406,7 @@ split(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(substring, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -10506,7 +10497,7 @@ rsplit(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(substring, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -10665,7 +10656,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) {
/* widen substring */
- buf1 = _PyUnicode_AsKind(str1, rkind);
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@@ -10674,19 +10665,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (rkind > kind2) {
/* widen replacement */
- buf2 = _PyUnicode_AsKind(str2, rkind);
+ buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (rkind < kind2) {
/* widen self and buf1 */
rkind = kind2;
- if (release1) PyMem_Free(buf1);
- release1 = 0;
- sbuf = _PyUnicode_AsKind(self, rkind);
+ if (release1) {
+ PyMem_Free(buf1);
+ buf1 = PyUnicode_DATA(str1);
+ release1 = 0;
+ }
+ sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
- buf1 = _PyUnicode_AsKind(str1, rkind);
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@@ -10724,7 +10718,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) {
/* widen substring */
- buf1 = _PyUnicode_AsKind(str1, rkind);
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@@ -10733,19 +10727,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (kind2 < rkind) {
/* widen replacement */
- buf2 = _PyUnicode_AsKind(str2, rkind);
+ buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (kind2 > rkind) {
/* widen self and buf1 */
rkind = kind2;
- sbuf = _PyUnicode_AsKind(self, rkind);
+ sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
- if (release1) PyMem_Free(buf1);
- release1 = 0;
- buf1 = _PyUnicode_AsKind(str1, rkind);
+ if (release1) {
+ PyMem_Free(buf1);
+ buf1 = PyUnicode_DATA(str1);
+ release1 = 0;
+ }
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@@ -11361,7 +11358,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
return result;
}
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(substr, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -1;
}
@@ -11578,7 +11575,7 @@ unicode_count(PyObject *self, PyObject *args)
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(substring, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -13081,7 +13078,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -13138,7 +13135,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
- buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}