From 2c3b2302adb1bb00b6050afc30eacbc023379b93 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 11 Oct 2011 20:29:21 +0200 Subject: Issue #13134: optimize finding single-character strings using memchr --- Lib/test/test_unicode.py | 17 ++++++++++ Objects/stringlib/fastsearch.h | 73 ++++++++++++++++++++++++++++++++++++++++++ configure.in | 3 +- pyconfig.h.in | 3 ++ 4 files changed, 95 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9a5862d..f79b2f0 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -171,6 +171,15 @@ class UnicodeTest(string_tests.CommonTest, def test_find(self): string_tests.CommonTest.test_find(self) + # test implementation details of the memchr fast path + self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102') + self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201') + self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0120') + self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0220') + self.checkequal(100, 'a' * 100 + '\U00100304', 'find', '\U00100304') + self.checkequal(-1, 'a' * 100 + '\U00100304', 'find', '\U00100204') + self.checkequal(-1, 'a' * 100 + '\U00100304', 'find', '\U00102004') + # check mixed argument types self.checkequalnofix(0, 'abcdefghiabc', 'find', 'abc') self.checkequalnofix(9, 'abcdefghiabc', 'find', 'abc', 1) self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4) @@ -180,6 +189,14 @@ class UnicodeTest(string_tests.CommonTest, def test_rfind(self): string_tests.CommonTest.test_rfind(self) + # test implementation details of the memrchr fast path + self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102') + self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201') + self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0120') + self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0220') + self.checkequal(0, '\U00100304' + 'a' * 100, 'rfind', '\U00100304') + self.checkequal(-1, '\U00100304' + 'a' * 100, 'rfind', '\U00100204') + self.checkequal(-1, '\U00100304' + 'a' * 100, 'rfind', '\U00102004') # check mixed argument types self.checkequalnofix(9, 'abcdefghiabc', 'rfind', 'abc') self.checkequalnofix(12, 'abcdefghiabc', 'rfind', '') diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index d35cba3..33ab6ff 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -32,6 +32,60 @@ #define STRINGLIB_BLOOM(mask, ch) \ ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) + +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, + STRINGLIB_CHAR ch, unsigned char needle, + Py_ssize_t maxcount, int mode) +{ + void *candidate; + const STRINGLIB_CHAR *found; + +#define DO_MEMCHR(memchr, s, needle, nchars) do { \ + candidate = memchr((const void *) (s), (needle), (nchars) * sizeof(STRINGLIB_CHAR)); \ + found = (const STRINGLIB_CHAR *) \ + ((Py_ssize_t) candidate & (~ ((Py_ssize_t) sizeof(STRINGLIB_CHAR) - 1))); \ + } while (0) + + if (mode == FAST_SEARCH) { + const STRINGLIB_CHAR *_s = s; + const STRINGLIB_CHAR *e = s + n; + while (_s < e) { + DO_MEMCHR(memchr, _s, needle, e - _s); + if (found == NULL) + return -1; + if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch) + return (found - _s); + /* False positive */ + _s = found + 1; + } + return -1; + } +#ifdef HAVE_MEMRCHR + /* memrchr() is a GNU extension, available since glibc 2.1.91. + it doesn't seem as optimized as memchr(), but is still quite + faster than our hand-written loop in FASTSEARCH below */ + else if (mode == FAST_RSEARCH) { + while (n > 0) { + DO_MEMCHR(memrchr, s, needle, n); + if (found == NULL) + return -1; + n = found - s; + if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch) + return n; + /* False positive */ + } + return -1; + } +#endif + else { + assert(0); /* Should never get here */ + return 0; + } + +#undef DO_MEMCHR +} + Py_LOCAL_INLINE(Py_ssize_t) FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, const STRINGLIB_CHAR* p, Py_ssize_t m, @@ -51,6 +105,25 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, if (m <= 0) return -1; /* use special case for 1-character strings */ + if (n > 10 && (mode == FAST_SEARCH +#ifdef HAVE_MEMRCHR + || mode == FAST_RSEARCH +#endif + )) { + /* use memchr if we can choose a needle without two many likely + false positives */ + unsigned char needle; + int use_needle = 1; + needle = p[0] & 0xff; + if (needle == 0 && sizeof(STRINGLIB_CHAR) > 1) { + needle = (p[0] >> 8) & 0xff; + if (needle >= 32) + use_needle = 0; + } + if (use_needle) + return STRINGLIB(fastsearch_memchr_1char) + (s, n, p[0], needle, maxcount, mode); + } if (mode == FAST_COUNT) { for (i = 0; i < n; i++) if (s[i] == p[0]) { diff --git a/configure.in b/configure.in index e3d026a..340fe31 100644 --- a/configure.in +++ b/configure.in @@ -2566,7 +2566,8 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \ getgrouplist getgroups getlogin getloadavg getpeername getpgid getpid \ getpriority getresuid getresgid getpwent getspnam getspent getsid getwd \ if_nameindex \ - initgroups kill killpg lchmod lchown lockf linkat lstat lutimes mbrtowc mkdirat mkfifo \ + initgroups kill killpg lchmod lchown lockf linkat lstat lutimes memrchr \ + mbrtowc mkdirat mkfifo \ mkfifoat mknod mknodat mktime mremap nice openat pathconf pause pipe2 plock poll \ posix_fallocate posix_fadvise pread \ pthread_init pthread_kill putenv pwrite readlink readlinkat readv realpath renameat \ diff --git a/pyconfig.h.in b/pyconfig.h.in index 3a38398..dbaa5c4 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -515,6 +515,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the `memrchr' function. */ +#undef HAVE_MEMRCHR + /* Define to 1 if you have the `mkdirat' function. */ #undef HAVE_MKDIRAT -- cgit v0.12