summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-03-24 19:55:47 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-03-24 19:55:47 (GMT)
commitd9d769fcdd573ab12b628798288c02dceba53505 (patch)
treeb61e4767ddb9a89e4402a041f381584662363584
parentf7ef47583e743ebecaae8c93dcb92e7a5792884f (diff)
downloadcpython-d9d769fcdd573ab12b628798288c02dceba53505.zip
cpython-d9d769fcdd573ab12b628798288c02dceba53505.tar.gz
cpython-d9d769fcdd573ab12b628798288c02dceba53505.tar.bz2
Issue #23573: Increased performance of string search operations (str.find,
str.index, str.count, the in operator, str.split, str.partition) with arguments of different kinds (UCS1, UCS2, UCS4).
-rw-r--r--Misc/NEWS4
-rw-r--r--Objects/bytearrayobject.c33
-rw-r--r--Objects/bytesobject.c33
-rw-r--r--Objects/stringlib/fastsearch.h4
-rw-r--r--Objects/stringlib/find.h23
-rw-r--r--Objects/unicodeobject.c344
6 files changed, 248 insertions, 193 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 6a40fba..946335d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ Release date: 2015-03-28
Core and Builtins
-----------------
+- Issue #23573: Increased performance of string search operations (str.find,
+ str.index, str.count, the in operator, str.split, str.partition) with
+ arguments of different kinds (UCS1, UCS2, UCS4).
+
- Issue #23753: Python doesn't support anymore platforms without stat() or
fstat(), these functions are always required.
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 8b3267e..195c79f 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -1142,7 +1142,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
char byte;
Py_buffer subbuf;
const char *sub;
- Py_ssize_t sub_len;
+ Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res;
@@ -1161,15 +1161,30 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
sub = &byte;
sub_len = 1;
}
+ len = PyByteArray_GET_SIZE(self);
- if (dir > 0)
- res = stringlib_find_slice(
- PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
- sub, sub_len, start, end);
- else
- res = stringlib_rfind_slice(
- PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
- sub, sub_len, start, end);
+ ADJUST_INDICES(start, end, len);
+ if (end - start < sub_len)
+ res = -1;
+ else if (sub_len == 1) {
+ unsigned char needle = *sub;
+ int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
+ res = stringlib_fastsearch_memchr_1char(
+ PyByteArray_AS_STRING(self) + start, end - start,
+ needle, needle, mode);
+ if (res >= 0)
+ res += start;
+ }
+ else {
+ if (dir > 0)
+ res = stringlib_find_slice(
+ PyByteArray_AS_STRING(self), len,
+ sub, sub_len, start, end);
+ else
+ res = stringlib_rfind_slice(
+ PyByteArray_AS_STRING(self), len,
+ sub, sub_len, start, end);
+ }
if (subobj)
PyBuffer_Release(&subbuf);
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 5a9dfbd..ae3c289 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1914,7 +1914,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
char byte;
Py_buffer subbuf;
const char *sub;
- Py_ssize_t sub_len;
+ Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res;
@@ -1933,15 +1933,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
sub = &byte;
sub_len = 1;
}
+ len = PyBytes_GET_SIZE(self);
- if (dir > 0)
- res = stringlib_find_slice(
- PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
- sub, sub_len, start, end);
- else
- res = stringlib_rfind_slice(
- PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
- sub, sub_len, start, end);
+ ADJUST_INDICES(start, end, len);
+ if (end - start < sub_len)
+ res = -1;
+ else if (sub_len == 1) {
+ unsigned char needle = *sub;
+ int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
+ res = stringlib_fastsearch_memchr_1char(
+ PyBytes_AS_STRING(self) + start, end - start,
+ needle, needle, mode);
+ if (res >= 0)
+ res += start;
+ }
+ else {
+ if (dir > 0)
+ res = stringlib_find_slice(
+ PyBytes_AS_STRING(self), len,
+ sub, sub_len, start, end);
+ else
+ res = stringlib_rfind_slice(
+ PyBytes_AS_STRING(self), len,
+ sub, sub_len, start, end);
+ }
if (subobj)
PyBuffer_Release(&subbuf);
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index cd7cac4..cda68e7 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -36,7 +36,7 @@
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
STRINGLIB_CHAR ch, unsigned char needle,
- Py_ssize_t maxcount, int mode)
+ int mode)
{
if (mode == FAST_SEARCH) {
const STRINGLIB_CHAR *ptr = s;
@@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (needle != 0)
#endif
return STRINGLIB(fastsearch_memchr_1char)
- (s, n, p[0], needle, maxcount, mode);
+ (s, n, p[0], needle, mode);
}
if (mode == FAST_COUNT) {
for (i = 0; i < n; i++)
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index 518e012..14815f6 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{
Py_ssize_t pos;
- if (str_len < 0)
- return -1;
+ assert(str_len >= 0);
if (sub_len == 0)
return offset;
@@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{
Py_ssize_t pos;
- if (str_len < 0)
- return -1;
+ assert(str_len >= 0);
if (sub_len == 0)
return str_len + offset;
@@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos;
}
-/* helper macro to fixup start/end slice values */
-#define ADJUST_INDICES(start, end, len) \
- if (end > len) \
- end = len; \
- else if (end < 0) { \
- end += len; \
- if (end < 0) \
- end = 0; \
- } \
- if (start < 0) { \
- start += len; \
- if (start < 0) \
- start = 0; \
- }
-
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- ADJUST_INDICES(start, end, str_len);
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
}
@@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- ADJUST_INDICES(start, end, str_len);
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 56d9a02..5343b21 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -641,7 +641,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
static PyObject *
fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s));
-Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
+Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
Py_ssize_t size, Py_UCS4 ch,
int direction)
{
@@ -8959,35 +8959,61 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len) \
+ if (end > len) \
+ end = len; \
+ else if (end < 0) { \
+ end += len; \
+ if (end < 0) \
+ end = 0; \
+ } \
+ if (start < 0) { \
+ start += len; \
+ if (start < 0) \
+ start = 0; \
+ }
+
static Py_ssize_t
any_find_slice(int direction, PyObject* s1, PyObject* s2,
Py_ssize_t start,
Py_ssize_t end)
{
- int kind1, kind2, kind;
+ int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2, result;
kind1 = PyUnicode_KIND(s1);
kind2 = PyUnicode_KIND(s2);
- kind = kind1 > kind2 ? kind1 : kind2;
+ if (kind1 < kind2)
+ return -1;
+
+ len1 = PyUnicode_GET_LENGTH(s1);
+ len2 = PyUnicode_GET_LENGTH(s2);
+ ADJUST_INDICES(start, end, len1);
+ if (end - start < len2)
+ return -1;
+
buf1 = PyUnicode_DATA(s1);
buf2 = PyUnicode_DATA(s2);
- if (kind1 != kind)
- buf1 = _PyUnicode_AsKind(s1, kind);
- if (!buf1)
- return -2;
- if (kind2 != kind)
- buf2 = _PyUnicode_AsKind(s2, kind);
- if (!buf2) {
- if (kind1 != kind) PyMem_Free(buf1);
- return -2;
+ if (len2 == 1) {
+ Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
+ result = findchar((const char *)buf1 + kind1*start,
+ kind1, end - start, ch, direction);
+ if (result == -1)
+ return -1;
+ else
+ return start + result;
+ }
+
+ if (kind2 != kind1) {
+ buf2 = _PyUnicode_AsKind(s2, kind1);
+ if (!buf2)
+ return -2;
}
- len1 = PyUnicode_GET_LENGTH(s1);
- len2 = PyUnicode_GET_LENGTH(s2);
if (direction > 0) {
- switch (kind) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
result = asciilib_find_slice(buf1, len1, buf2, len2, start, end);
@@ -9005,7 +9031,7 @@ any_find_slice(int direction, PyObject* s1, PyObject* s2,
}
}
else {
- switch (kind) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end);
@@ -9023,9 +9049,7 @@ any_find_slice(int direction, PyObject* s1, PyObject* s2,
}
}
- if (kind1 != kind)
- PyMem_Free(buf1);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
return result;
@@ -9115,21 +9139,6 @@ _PyUnicode_InsertThousandsGrouping(
}
-/* helper macro to fixup start/end slice values */
-#define ADJUST_INDICES(start, end, len) \
- if (end > len) \
- end = len; \
- else if (end < 0) { \
- end += len; \
- if (end < 0) \
- end = 0; \
- } \
- if (start < 0) { \
- start += len; \
- if (start < 0) \
- start = 0; \
- }
-
Py_ssize_t
PyUnicode_Count(PyObject *str,
PyObject *substr,
@@ -9139,7 +9148,7 @@ PyUnicode_Count(PyObject *str,
Py_ssize_t result;
PyObject* str_obj;
PyObject* sub_obj;
- int kind1, kind2, kind;
+ int kind1, kind2;
void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2;
@@ -9159,24 +9168,30 @@ PyUnicode_Count(PyObject *str,
kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sub_obj);
- kind = kind1;
- buf1 = PyUnicode_DATA(str_obj);
- buf2 = PyUnicode_DATA(sub_obj);
- if (kind2 != kind) {
- if (kind2 > kind) {
- Py_DECREF(sub_obj);
- Py_DECREF(str_obj);
- return 0;
- }
- buf2 = _PyUnicode_AsKind(sub_obj, kind);
+ if (kind1 < kind2) {
+ Py_DECREF(sub_obj);
+ Py_DECREF(str_obj);
+ return 0;
}
- if (!buf2)
- goto onError;
+
len1 = PyUnicode_GET_LENGTH(str_obj);
len2 = PyUnicode_GET_LENGTH(sub_obj);
-
ADJUST_INDICES(start, end, len1);
- switch (kind) {
+ if (end - start < len2) {
+ Py_DECREF(sub_obj);
+ Py_DECREF(str_obj);
+ return 0;
+ }
+
+ buf1 = PyUnicode_DATA(str_obj);
+ buf2 = PyUnicode_DATA(sub_obj);
+ if (kind2 != kind1) {
+ buf2 = _PyUnicode_AsKind(sub_obj, kind1);
+ if (!buf2)
+ goto onError;
+ }
+
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj))
result = asciilib_count(
@@ -9208,14 +9223,14 @@ PyUnicode_Count(PyObject *str,
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
return result;
onError:
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
- if (kind2 != kind && buf2)
+ if (kind2 != kind1 && buf2)
PyMem_Free(buf2);
return -1;
}
@@ -9268,6 +9283,8 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
}
if (end > PyUnicode_GET_LENGTH(str))
end = PyUnicode_GET_LENGTH(str);
+ if (start >= end)
+ return -1;
kind = PyUnicode_KIND(str);
result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
kind, end-start, ch, direction);
@@ -10014,7 +10031,7 @@ split(PyObject *self,
PyObject *substring,
Py_ssize_t maxcount)
{
- int kind1, kind2, kind;
+ int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2;
PyObject* out;
@@ -10058,23 +10075,25 @@ split(PyObject *self,
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- kind = kind1 > kind2 ? kind1 : kind2;
+ len1 = PyUnicode_GET_LENGTH(self);
+ len2 = PyUnicode_GET_LENGTH(substring);
+ if (kind1 < kind2 || len1 < len2) {
+ out = PyList_New(1);
+ if (out == NULL)
+ return NULL;
+ Py_INCREF(self);
+ PyList_SET_ITEM(out, 0, self);
+ return out;
+ }
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
- if (kind1 != kind)
- buf1 = _PyUnicode_AsKind(self, kind);
- if (!buf1)
- return NULL;
- if (kind2 != kind)
- buf2 = _PyUnicode_AsKind(substring, kind);
- if (!buf2) {
- if (kind1 != kind) PyMem_Free(buf1);
- return NULL;
+ if (kind2 != kind1) {
+ buf2 = _PyUnicode_AsKind(substring, kind1);
+ if (!buf2)
+ return NULL;
}
- len1 = PyUnicode_GET_LENGTH(self);
- len2 = PyUnicode_GET_LENGTH(substring);
- switch (kind) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
out = asciilib_split(
@@ -10094,9 +10113,7 @@ split(PyObject *self,
default:
out = NULL;
}
- if (kind1 != kind)
- PyMem_Free(buf1);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
return out;
}
@@ -10106,7 +10123,7 @@ rsplit(PyObject *self,
PyObject *substring,
Py_ssize_t maxcount)
{
- int kind1, kind2, kind;
+ int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2;
PyObject* out;
@@ -10150,23 +10167,25 @@ rsplit(PyObject *self,
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- kind = kind1 > kind2 ? kind1 : kind2;
+ len1 = PyUnicode_GET_LENGTH(self);
+ len2 = PyUnicode_GET_LENGTH(substring);
+ if (kind1 < kind2 || len1 < len2) {
+ out = PyList_New(1);
+ if (out == NULL)
+ return NULL;
+ Py_INCREF(self);
+ PyList_SET_ITEM(out, 0, self);
+ return out;
+ }
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
- if (kind1 != kind)
- buf1 = _PyUnicode_AsKind(self, kind);
- if (!buf1)
- return NULL;
- if (kind2 != kind)
- buf2 = _PyUnicode_AsKind(substring, kind);
- if (!buf2) {
- if (kind1 != kind) PyMem_Free(buf1);
- return NULL;
+ if (kind2 != kind1) {
+ buf2 = _PyUnicode_AsKind(substring, kind1);
+ if (!buf2)
+ return NULL;
}
- len1 = PyUnicode_GET_LENGTH(self);
- len2 = PyUnicode_GET_LENGTH(substring);
- switch (kind) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
out = asciilib_rsplit(
@@ -10186,9 +10205,7 @@ rsplit(PyObject *self,
default:
out = NULL;
}
- if (kind1 != kind)
- PyMem_Free(buf1);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
return out;
}
@@ -10407,7 +10424,7 @@ replace(PyObject *self, PyObject *str1,
}
/* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) -
PyUnicode_GET_LENGTH(str1))); */
- if (len2 > len1 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
+ if (len1 < len2 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
PyErr_SetString(PyExc_OverflowError,
"replace string is too long");
goto error;
@@ -10816,7 +10833,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
}
if (len1 > len2)
return 1; /* uni is longer */
- if (len2 > len1)
+ if (len1 < len2)
return -1; /* str is longer */
return 0;
}
@@ -10928,23 +10945,35 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(sub);
+ if (kind1 < kind2) {
+ Py_DECREF(sub);
+ Py_DECREF(str);
+ return 0;
+ }
+ len1 = PyUnicode_GET_LENGTH(str);
+ len2 = PyUnicode_GET_LENGTH(sub);
+ if (len1 < len2) {
+ Py_DECREF(sub);
+ Py_DECREF(str);
+ return 0;
+ }
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(sub);
+ if (len2 == 1) {
+ Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
+ result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1;
+ Py_DECREF(sub);
+ Py_DECREF(str);
+ return result;
+ }
if (kind2 != kind1) {
- if (kind2 > kind1) {
+ buf2 = _PyUnicode_AsKind(sub, kind1);
+ if (!buf2) {
Py_DECREF(sub);
Py_DECREF(str);
- return 0;
+ return -1;
}
- buf2 = _PyUnicode_AsKind(sub, kind1);
- }
- if (!buf2) {
- Py_DECREF(sub);
- Py_DECREF(str);
- return -1;
}
- len1 = PyUnicode_GET_LENGTH(str);
- len2 = PyUnicode_GET_LENGTH(sub);
switch (kind1) {
case PyUnicode_1BYTE_KIND:
@@ -11129,7 +11158,7 @@ unicode_count(PyObject *self, PyObject *args)
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
- int kind1, kind2, kind;
+ int kind1, kind2;
void *buf1, *buf2;
Py_ssize_t len1, len2, iresult;
@@ -11139,24 +11168,27 @@ unicode_count(PyObject *self, PyObject *args)
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- if (kind2 > kind1) {
+ if (kind1 < kind2) {
Py_DECREF(substring);
return PyLong_FromLong(0);
}
- kind = kind1;
- buf1 = PyUnicode_DATA(self);
- buf2 = PyUnicode_DATA(substring);
- if (kind2 != kind)
- buf2 = _PyUnicode_AsKind(substring, kind);
- if (!buf2) {
- Py_DECREF(substring);
- return NULL;
- }
len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
-
ADJUST_INDICES(start, end, len1);
- switch (kind) {
+ if (end - start < len2) {
+ Py_DECREF(substring);
+ return PyLong_FromLong(0);
+ }
+ buf1 = PyUnicode_DATA(self);
+ buf2 = PyUnicode_DATA(substring);
+ if (kind2 != kind1) {
+ buf2 = _PyUnicode_AsKind(substring, kind1);
+ if (!buf2) {
+ Py_DECREF(substring);
+ return NULL;
+ }
+ }
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
iresult = ucs1lib_count(
((Py_UCS1*)buf1) + start, end - start,
@@ -11181,7 +11213,7 @@ unicode_count(PyObject *self, PyObject *args)
result = PyLong_FromSsize_t(iresult);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
Py_DECREF(substring);
@@ -12632,8 +12664,8 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
PyObject* str_obj;
PyObject* sep_obj;
PyObject* out;
- int kind1, kind2, kind;
- void *buf1 = NULL, *buf2 = NULL;
+ int kind1, kind2;
+ void *buf1, *buf2;
Py_ssize_t len1, len2;
str_obj = PyUnicode_FromObject(str_in);
@@ -12652,21 +12684,29 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj);
- kind = Py_MAX(kind1, kind2);
- buf1 = PyUnicode_DATA(str_obj);
- if (kind1 != kind)
- buf1 = _PyUnicode_AsKind(str_obj, kind);
- if (!buf1)
- goto onError;
- buf2 = PyUnicode_DATA(sep_obj);
- if (kind2 != kind)
- buf2 = _PyUnicode_AsKind(sep_obj, kind);
- if (!buf2)
- goto onError;
len1 = PyUnicode_GET_LENGTH(str_obj);
len2 = PyUnicode_GET_LENGTH(sep_obj);
+ if (kind1 < kind2 || len1 < len2) {
+ _Py_INCREF_UNICODE_EMPTY();
+ if (!unicode_empty)
+ out = NULL;
+ else {
+ out = PyTuple_Pack(3, str_obj, unicode_empty, unicode_empty);
+ Py_DECREF(unicode_empty);
+ }
+ Py_DECREF(sep_obj);
+ Py_DECREF(str_obj);
+ return out;
+ }
+ buf1 = PyUnicode_DATA(str_obj);
+ buf2 = PyUnicode_DATA(sep_obj);
+ if (kind2 != kind1) {
+ buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+ if (!buf2)
+ goto onError;
+ }
- switch (PyUnicode_KIND(str_obj)) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
@@ -12686,18 +12726,14 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
- if (kind1 != kind)
- PyMem_Free(buf1);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
return out;
onError:
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
- if (kind1 != kind && buf1)
- PyMem_Free(buf1);
- if (kind2 != kind && buf2)
+ if (kind2 != kind1 && buf2)
PyMem_Free(buf2);
return NULL;
}
@@ -12709,8 +12745,8 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
PyObject* str_obj;
PyObject* sep_obj;
PyObject* out;
- int kind1, kind2, kind;
- void *buf1 = NULL, *buf2 = NULL;
+ int kind1, kind2;
+ void *buf1, *buf2;
Py_ssize_t len1, len2;
str_obj = PyUnicode_FromObject(str_in);
@@ -12722,23 +12758,31 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
return NULL;
}
- kind1 = PyUnicode_KIND(str_in);
+ kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj);
- kind = Py_MAX(kind1, kind2);
- buf1 = PyUnicode_DATA(str_in);
- if (kind1 != kind)
- buf1 = _PyUnicode_AsKind(str_in, kind);
- if (!buf1)
- goto onError;
- buf2 = PyUnicode_DATA(sep_obj);
- if (kind2 != kind)
- buf2 = _PyUnicode_AsKind(sep_obj, kind);
- if (!buf2)
- goto onError;
len1 = PyUnicode_GET_LENGTH(str_obj);
len2 = PyUnicode_GET_LENGTH(sep_obj);
+ if (kind1 < kind2 || len1 < len2) {
+ _Py_INCREF_UNICODE_EMPTY();
+ if (!unicode_empty)
+ out = NULL;
+ else {
+ out = PyTuple_Pack(3, unicode_empty, unicode_empty, str_obj);
+ Py_DECREF(unicode_empty);
+ }
+ Py_DECREF(sep_obj);
+ Py_DECREF(str_obj);
+ return out;
+ }
+ buf1 = PyUnicode_DATA(str_obj);
+ buf2 = PyUnicode_DATA(sep_obj);
+ if (kind2 != kind1) {
+ buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+ if (!buf2)
+ goto onError;
+ }
- switch (PyUnicode_KIND(str_in)) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
@@ -12758,18 +12802,14 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
- if (kind1 != kind)
- PyMem_Free(buf1);
- if (kind2 != kind)
+ if (kind2 != kind1)
PyMem_Free(buf2);
return out;
onError:
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
- if (kind1 != kind && buf1)
- PyMem_Free(buf1);
- if (kind2 != kind && buf2)
+ if (kind2 != kind1 && buf2)
PyMem_Free(buf2);
return NULL;
}