diff options
author | Ruben Vorderman <r.h.p.vorderman@lumc.nl> | 2024-06-13 14:28:59 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-13 14:28:59 (GMT) |
commit | 2078eb45ca0db495972a20fcaf96df8fcf48451d (patch) | |
tree | 74b08778b5908f61c203d3055aa724b20001105b | |
parent | 6ae254aaa0a5a3985a52d1ab387a2b68c001bd96 (diff) | |
download | cpython-2078eb45ca0db495972a20fcaf96df8fcf48451d.zip cpython-2078eb45ca0db495972a20fcaf96df8fcf48451d.tar.gz cpython-2078eb45ca0db495972a20fcaf96df8fcf48451d.tar.bz2 |
gh-120397: Optimize str.count() for single characters (#120398)
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst | 2 | ||||
-rw-r--r-- | Objects/stringlib/fastsearch.h | 19 |
2 files changed, 21 insertions, 0 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst new file mode 100644 index 0000000..05c55e8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst @@ -0,0 +1,2 @@ +Improve the througput by up to two times for the :meth:`str.count`, :meth:`bytes.count` and :meth:`bytearray.count` +methods for counting single characters. diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 309ed15..05e700b 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -753,6 +753,22 @@ STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n, } +static inline Py_ssize_t +STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n, + const STRINGLIB_CHAR p0) +/* A specialized function of count_char that does not cut off at a maximum. + As a result, the compiler is able to vectorize the loop. */ +{ + Py_ssize_t count = 0; + for (Py_ssize_t i = 0; i < n; i++) { + if (s[i] == p0) { + count++; + } + } + return count; +} + + Py_LOCAL_INLINE(Py_ssize_t) FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, const STRINGLIB_CHAR* p, Py_ssize_t m, @@ -773,6 +789,9 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, else if (mode == FAST_RSEARCH) return STRINGLIB(rfind_char)(s, n, p[0]); else { + if (maxcount == PY_SSIZE_T_MAX) { + return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); + } return STRINGLIB(count_char)(s, n, p[0], maxcount); } } |