summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst2
-rw-r--r--Objects/unicodeobject.c31
2 files changed, 23 insertions, 10 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
index 52568db..9172ca2 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
@@ -1,2 +1,2 @@
Reduce allocation size of :class:`list` from :meth:`str.split`
-and :meth:`str.rsplit`. Patch by Dong-hee Na.
+and :meth:`str.rsplit`. Patch by Dong-hee Na and Inada Naoki.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 355d74f..7ff7995 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9698,11 +9698,11 @@ split(PyObject *self,
PyObject* out;
len1 = PyUnicode_GET_LENGTH(self);
kind1 = PyUnicode_KIND(self);
- if (maxcount < 0) {
- maxcount = len1;
- }
- if (substring == NULL)
+ if (substring == NULL) {
+ if (maxcount < 0) {
+ maxcount = (len1 - 1) / 2 + 1;
+ }
switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self))
@@ -9728,9 +9728,16 @@ split(PyObject *self,
default:
Py_UNREACHABLE();
}
+ }
kind2 = PyUnicode_KIND(substring);
len2 = PyUnicode_GET_LENGTH(substring);
+ if (maxcount < 0) {
+ // if len2 == 0, it will raise ValueError.
+ maxcount = len2 == 0 ? 0 : (len1 / len2) + 1;
+ // handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1
+ maxcount = maxcount < 0 ? len1 : maxcount;
+ }
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);
if (out == NULL)
@@ -9785,11 +9792,11 @@ rsplit(PyObject *self,
len1 = PyUnicode_GET_LENGTH(self);
kind1 = PyUnicode_KIND(self);
- if (maxcount < 0) {
- maxcount = len1;
- }
- if (substring == NULL)
+ if (substring == NULL) {
+ if (maxcount < 0) {
+ maxcount = (len1 - 1) / 2 + 1;
+ }
switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self))
@@ -9815,9 +9822,15 @@ rsplit(PyObject *self,
default:
Py_UNREACHABLE();
}
-
+ }
kind2 = PyUnicode_KIND(substring);
len2 = PyUnicode_GET_LENGTH(substring);
+ if (maxcount < 0) {
+ // if len2 == 0, it will raise ValueError.
+ maxcount = len2 == 0 ? 0 : (len1 / len2) + 1;
+ // handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1
+ maxcount = maxcount < 0 ? len1 : maxcount;
+ }
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);
if (out == NULL)