diff options
author | Dong-hee Na <donghee.na@python.org> | 2022-07-31 03:14:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-31 03:14:53 (GMT) |
commit | 50b2261bdac98303087287b24eef96abd45a82f9 (patch) | |
tree | be63b7a12e8d286cac66b9840cfdc6e35b73219e | |
parent | 53357b3ee53d767c2da5cd77f9c47117903e9a44 (diff) | |
download | cpython-50b2261bdac98303087287b24eef96abd45a82f9.zip cpython-50b2261bdac98303087287b24eef96abd45a82f9.tar.gz cpython-50b2261bdac98303087287b24eef96abd45a82f9.tar.bz2 |
gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473)
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst | 2 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 39 |
2 files changed, 22 insertions, 19 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst new file mode 100644 index 0000000..52568db --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst @@ -0,0 +1,2 @@ +Reduce allocation size of :class:`list` from :meth:`str.split` +and :meth:`str.rsplit`. Patch by Dong-hee Na. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ad16ada..355d74f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9696,40 +9696,40 @@ split(PyObject *self, const void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; - - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_split_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_split_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1); @@ -9783,39 +9783,40 @@ rsplit(PyObject *self, Py_ssize_t len1, len2; PyObject* out; - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_rsplit_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_rsplit_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1); |