summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDong-hee Na <donghee.na@python.org>2022-07-31 03:14:53 (GMT)
committerGitHub <noreply@github.com>2022-07-31 03:14:53 (GMT)
commit50b2261bdac98303087287b24eef96abd45a82f9 (patch)
treebe63b7a12e8d286cac66b9840cfdc6e35b73219e
parent53357b3ee53d767c2da5cd77f9c47117903e9a44 (diff)
downloadcpython-50b2261bdac98303087287b24eef96abd45a82f9.zip
cpython-50b2261bdac98303087287b24eef96abd45a82f9.tar.gz
cpython-50b2261bdac98303087287b24eef96abd45a82f9.tar.bz2
gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473)
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst2
-rw-r--r--Objects/unicodeobject.c39
2 files changed, 22 insertions, 19 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
new file mode 100644
index 0000000..52568db
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst
@@ -0,0 +1,2 @@
+Reduce allocation size of :class:`list` from :meth:`str.split`
+and :meth:`str.rsplit`. Patch by Dong-hee Na.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ad16ada..355d74f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9696,40 +9696,40 @@ split(PyObject *self,
const void *buf1, *buf2;
Py_ssize_t len1, len2;
PyObject* out;
-
- if (maxcount < 0)
- maxcount = PY_SSIZE_T_MAX;
+ len1 = PyUnicode_GET_LENGTH(self);
+ kind1 = PyUnicode_KIND(self);
+ if (maxcount < 0) {
+ maxcount = len1;
+ }
if (substring == NULL)
- switch (PyUnicode_KIND(self)) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self))
return asciilib_split_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
else
return ucs1lib_split_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_2BYTE_KIND:
return ucs2lib_split_whitespace(
self, PyUnicode_2BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_4BYTE_KIND:
return ucs4lib_split_whitespace(
self, PyUnicode_4BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
default:
Py_UNREACHABLE();
}
- kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);
@@ -9783,39 +9783,40 @@ rsplit(PyObject *self,
Py_ssize_t len1, len2;
PyObject* out;
- if (maxcount < 0)
- maxcount = PY_SSIZE_T_MAX;
+ len1 = PyUnicode_GET_LENGTH(self);
+ kind1 = PyUnicode_KIND(self);
+ if (maxcount < 0) {
+ maxcount = len1;
+ }
if (substring == NULL)
- switch (PyUnicode_KIND(self)) {
+ switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self))
return asciilib_rsplit_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
else
return ucs1lib_rsplit_whitespace(
self, PyUnicode_1BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_2BYTE_KIND:
return ucs2lib_rsplit_whitespace(
self, PyUnicode_2BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
case PyUnicode_4BYTE_KIND:
return ucs4lib_rsplit_whitespace(
self, PyUnicode_4BYTE_DATA(self),
- PyUnicode_GET_LENGTH(self), maxcount
+ len1, maxcount
);
default:
Py_UNREACHABLE();
}
- kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
- len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);