From fac395681fb758401d17974f258b17d285336c57 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 21 Mar 2016 10:38:58 +0100 Subject: Optimize bytes.replace(b'', b'.') Issue #26574: Optimize bytes.replace(b'', b'.') and bytearray.replace(b'', b'.'): up to 80% faster. Patch written by Josh Snider. --- Doc/whatsnew/3.6.rst | 3 +++ Misc/ACKS | 1 + Misc/NEWS | 3 +++ Objects/bytearrayobject.c | 28 +++++++++++++++++++--------- Objects/bytesobject.c | 28 +++++++++++++++++++--------- 5 files changed, 45 insertions(+), 18 deletions(-) diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 9046058..986c145 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -339,6 +339,9 @@ Optimizations * Optimize :meth:`bytes.fromhex` and :meth:`bytearray.fromhex`: they are now between 2x and 3.5x faster. (Contributed by Victor Stinner in :issue:`25401`). +* Optimize ``bytes.replace(b'', b'.')`` and ``bytearray.replace(b'', b'.')``: + up to 80% faster. (Contributed by Josh Snider in :issue:`26574`). + Build and C API Changes ======================= diff --git a/Misc/ACKS b/Misc/ACKS index e67f6d1..52eae69 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1376,6 +1376,7 @@ Mark Smith Roy Smith Ryan Smith-Roberts Rafal Smotrzyk +Josh Snider Eric Snow Dirk Soede Nir Soffer diff --git a/Misc/NEWS b/Misc/NEWS index 6f5c7ab..2fa82f3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ Release date: tba Core and Builtins ----------------- +- Issue #26574: Optimize ``bytes.replace(b'', b'.')`` and + ``bytearray.replace(b'', b'.')``. Patch written by Josh Snider. + - Issue #26581: If coding cookie is specified multiple times on a line in Python source code file, only the first one is taken to account. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 9e8ba39..209a641 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1705,17 +1705,27 @@ replace_interleave(PyByteArrayObject *self, self_s = PyByteArray_AS_STRING(self); result_s = PyByteArray_AS_STRING(result); - /* TODO: special case single character, which doesn't need memcpy */ - - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i=0; i 1) { + /* Lay the first one down (guaranteed this will occur) */ Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; + count -= 1; + + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + } + } + else { + result_s[0] = to_s[0]; + result_s += to_len; + count -= 1; + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + result_s[0] = to_s[0]; + result_s += to_len; + } } /* Copy the rest of the original string */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 602dea6..5b9006e 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2464,17 +2464,27 @@ replace_interleave(PyBytesObject *self, self_s = PyBytes_AS_STRING(self); result_s = PyBytes_AS_STRING(result); - /* TODO: special case single character, which doesn't need memcpy */ - - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i=0; i 1) { + /* Lay the first one down (guaranteed this will occur) */ Py_MEMCPY(result_s, to_s, to_len); result_s += to_len; + count -= 1; + + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + } + } + else { + result_s[0] = to_s[0]; + result_s += to_len; + count -= 1; + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + result_s[0] = to_s[0]; + result_s += to_len; + } } /* Copy the rest of the original string */ -- cgit v0.12