diff options
author | Dong-hee Na <donghee.na@python.org> | 2022-08-01 13:15:07 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-01 13:15:07 (GMT) |
commit | fb75d015f487e50079e8d2ea7859750684b124e4 (patch) | |
tree | 6d79d990973ef91e4f00bca1e90562862590030e /Objects/unicodeobject.c | |
parent | 347c783673c22c3f37e21c485437404b67405512 (diff) | |
download | cpython-fb75d015f487e50079e8d2ea7859750684b124e4.zip cpython-fb75d015f487e50079e8d2ea7859750684b124e4.tar.gz cpython-fb75d015f487e50079e8d2ea7859750684b124e4.tar.bz2 |
gh-91146: More reduce allocation size of list from str.split/rsplit (gh-95493)
Co-authored-by: Inada Naoki <songofacandy@gmail.com>
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 355d74f..7ff7995 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9698,11 +9698,11 @@ split(PyObject *self, PyObject* out; len1 = PyUnicode_GET_LENGTH(self); kind1 = PyUnicode_KIND(self); - if (maxcount < 0) { - maxcount = len1; - } - if (substring == NULL) + if (substring == NULL) { + if (maxcount < 0) { + maxcount = (len1 - 1) / 2 + 1; + } switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) @@ -9728,9 +9728,16 @@ split(PyObject *self, default: Py_UNREACHABLE(); } + } kind2 = PyUnicode_KIND(substring); len2 = PyUnicode_GET_LENGTH(substring); + if (maxcount < 0) { + // if len2 == 0, it will raise ValueError. + maxcount = len2 == 0 ? 0 : (len1 / len2) + 1; + // handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1 + maxcount = maxcount < 0 ? len1 : maxcount; + } if (kind1 < kind2 || len1 < len2) { out = PyList_New(1); if (out == NULL) @@ -9785,11 +9792,11 @@ rsplit(PyObject *self, len1 = PyUnicode_GET_LENGTH(self); kind1 = PyUnicode_KIND(self); - if (maxcount < 0) { - maxcount = len1; - } - if (substring == NULL) + if (substring == NULL) { + if (maxcount < 0) { + maxcount = (len1 - 1) / 2 + 1; + } switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) @@ -9815,9 +9822,15 @@ rsplit(PyObject *self, default: Py_UNREACHABLE(); } - + } kind2 = PyUnicode_KIND(substring); len2 = PyUnicode_GET_LENGTH(substring); + if (maxcount < 0) { + // if len2 == 0, it will raise ValueError. + maxcount = len2 == 0 ? 0 : (len1 / len2) + 1; + // handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1 + maxcount = maxcount < 0 ? len1 : maxcount; + } if (kind1 < kind2 || len1 < len2) { out = PyList_New(1); if (out == NULL) |