summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
authorgoldsteinn <35538541+goldsteinn@users.noreply.github.com>2022-05-24 01:45:31 (GMT)
committerGitHub <noreply@github.com>2022-05-24 01:45:31 (GMT)
commit7108bdf27c7a460cf83c4a01dea54ae4591d8aea (patch)
tree0ebf080594979a194459f6d5f4f30cb6fafffdac /Objects/stringlib
parentf7fabae75c7b8ecd0c5673b5d62a15db24a05953 (diff)
downloadcpython-7108bdf27c7a460cf83c4a01dea54ae4591d8aea.zip
cpython-7108bdf27c7a460cf83c4a01dea54ae4591d8aea.tar.gz
cpython-7108bdf27c7a460cf83c4a01dea54ae4591d8aea.tar.bz2
gh-93033: Use wmemchr in stringlib (GH-93034)
Generally comparable perf for the "good" case where memchr doesn't return any collisions (false matches on lower byte) but clearly faster with collisions.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/asciilib.h1
-rw-r--r--Objects/stringlib/fastsearch.h34
-rw-r--r--Objects/stringlib/replace.h4
-rw-r--r--Objects/stringlib/stringdefs.h1
-rw-r--r--Objects/stringlib/ucs1lib.h1
-rw-r--r--Objects/stringlib/ucs2lib.h4
-rw-r--r--Objects/stringlib/ucs4lib.h4
-rw-r--r--Objects/stringlib/undef.h1
8 files changed, 36 insertions, 14 deletions
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index eebe888..b3016bf 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -21,6 +21,7 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
+#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index b91082b..a838c66 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -39,7 +39,7 @@
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-#if STRINGLIB_SIZEOF_CHAR == 1
+#ifdef STRINGLIB_FAST_MEMCHR
# define MEMCHR_CUT_OFF 15
#else
# define MEMCHR_CUT_OFF 40
@@ -53,8 +53,8 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
p = s;
e = s + n;
if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memchr(s, ch, n);
+#ifdef STRINGLIB_FAST_MEMCHR
+ p = STRINGLIB_FAST_MEMCHR(s, ch, n);
if (p != NULL)
return (p - s);
return -1;
@@ -102,16 +102,26 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1;
}
+#undef MEMCHR_CUT_OFF
+
+#if STRINGLIB_SIZEOF_CHAR == 1
+# define MEMRCHR_CUT_OFF 15
+#else
+# define MEMRCHR_CUT_OFF 40
+#endif
+
+
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR
- /* memrchr() is a GNU extension, available since glibc 2.1.91.
- it doesn't seem as optimized as memchr(), but is still quite
- faster than our hand-written loop below */
+ /* memrchr() is a GNU extension, available since glibc 2.1.91. it
+ doesn't seem as optimized as memchr(), but is still quite
+ faster than our hand-written loop below. There is no wmemrchr
+ for 4-byte chars. */
- if (n > MEMCHR_CUT_OFF) {
+ if (n > MEMRCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memrchr(s, ch, n);
if (p != NULL)
@@ -139,11 +149,11 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
if (*p == ch)
return n;
/* False positive */
- if (n1 - n > MEMCHR_CUT_OFF)
+ if (n1 - n > MEMRCHR_CUT_OFF)
continue;
- if (n <= MEMCHR_CUT_OFF)
+ if (n <= MEMRCHR_CUT_OFF)
break;
- s1 = p - MEMCHR_CUT_OFF;
+ s1 = p - MEMRCHR_CUT_OFF;
while (p > s1) {
p--;
if (*p == ch)
@@ -151,7 +161,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
}
n = p - s;
}
- while (n > MEMCHR_CUT_OFF);
+ while (n > MEMRCHR_CUT_OFF);
}
#endif
}
@@ -165,7 +175,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1;
}
-#undef MEMCHR_CUT_OFF
+#undef MEMRCHR_CUT_OFF
/* Change to a 1 to see logging comments walk through the algorithm. */
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h
index ef318ed..123c9f8 100644
--- a/Objects/stringlib/replace.h
+++ b/Objects/stringlib/replace.h
@@ -29,9 +29,9 @@ STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
if (!--attempts) {
/* if u1 was not found for attempts iterations,
use FASTSEARCH() or memchr() */
-#if STRINGLIB_SIZEOF_CHAR == 1
+#ifdef STRINGLIB_FAST_MEMCHR
s++;
- s = memchr(s, u1, end - s);
+ s = STRINGLIB_FAST_MEMCHR(s, u1, end - s);
if (s == NULL)
return;
#else
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index 88641b2..484b98b 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -24,4 +24,5 @@
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_Repr
+#define STRINGLIB_FAST_MEMCHR memchr
#endif /* !STRINGLIB_STRINGDEFS_H */
diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h
index 026ab11..1b9b65e 100644
--- a/Objects/stringlib/ucs1lib.h
+++ b/Objects/stringlib/ucs1lib.h
@@ -20,6 +20,7 @@
#define STRINGLIB_NEW _PyUnicode_FromUCS1
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
+#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_MUTABLE 0
#define STRINGLIB_TOSTR PyObject_Str
diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h
index 75f11bc..4b49bbb 100644
--- a/Objects/stringlib/ucs2lib.h
+++ b/Objects/stringlib/ucs2lib.h
@@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
+#if SIZEOF_WCHAR_T == 2
+#define STRINGLIB_FAST_MEMCHR(s, c, n) \
+ (Py_UCS2 *)wmemchr((const wchar_t *)(s), c, n)
+#endif
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h
index 57344f2..def4ca5 100644
--- a/Objects/stringlib/ucs4lib.h
+++ b/Objects/stringlib/ucs4lib.h
@@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
+#if SIZEOF_WCHAR_T == 4
+#define STRINGLIB_FAST_MEMCHR(s, c, n) \
+ (Py_UCS4 *)wmemchr((const wchar_t *)(s), c, n)
+#endif
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h
index bf32298..cc873a2 100644
--- a/Objects/stringlib/undef.h
+++ b/Objects/stringlib/undef.h
@@ -8,3 +8,4 @@
#undef STRINGLIB_NEW
#undef STRINGLIB_IS_UNICODE
#undef STRINGLIB_MUTABLE
+#undef STRINGLIB_FAST_MEMCHR