From a0c603cb9d4dbb9909979313a88bcd1f5fde4f62 Mon Sep 17 00:00:00 2001 From: Ma Lin Date: Sun, 18 Oct 2020 22:48:38 +0800 Subject: bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334) --- .../2020-10-18-18-43-45.bpo-38252.7Nlepg.rst | 1 + Objects/bytes_methods.c | 20 ++++++------- Objects/stringlib/codecs.h | 30 +++++++++---------- Objects/stringlib/find_max_char.h | 20 ++++++------- Objects/unicodeobject.c | 34 +++++++++++----------- 5 files changed, 53 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst diff --git a/Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst b/Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst new file mode 100644 index 0000000..c103e6c --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst @@ -0,0 +1 @@ +Use 8-byte step to detect ASCII sequence in 64-bit Windows build. diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c index 72daa1f..1512086 100644 --- a/Objects/bytes_methods.c +++ b/Objects/bytes_methods.c @@ -100,14 +100,14 @@ Return True if B is empty or all characters in B are ASCII,\n\ False otherwise."); // Optimization is copied from ascii_decode in unicodeobject.c -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif PyObject* @@ -115,20 +115,20 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len) { const char *p = cptr; const char *end = p + len; - const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); while (p < end) { /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h for an explanation. */ - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Help allocation */ const char *_p = p; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & ASCII_CHAR_MASK) { Py_RETURN_FALSE; } - _p += SIZEOF_LONG; + _p += SIZEOF_SIZE_T; } p = _p; if (_p == end) diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 197605b..b6ca404 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -6,14 +6,14 @@ #include "pycore_bitutils.h" // _Py_bswap32() -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif /* 10xxxxxx */ @@ -26,7 +26,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, { Py_UCS4 ch; const char *s = *inptr; - const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); STRINGLIB_CHAR *p = dest + *outpos; while (s < end) { @@ -36,19 +36,19 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, /* Fast path for runs of ASCII characters. Given that common UTF-8 input will consist of an overwhelming majority of ASCII characters, we try to optimize for this case by checking - as many characters as a C 'long' can contain. + as many characters as a C 'size_t' can contain. First, check if we can do an aligned read, as most CPUs have a penalty for unaligned reads. */ - if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) { /* Help register allocation */ const char *_s = s; STRINGLIB_CHAR *_p = p; while (_s < aligned_end) { - /* Read a whole long at a time (either 4 or 8 bytes), + /* Read a whole size_t at a time (either 4 or 8 bytes), and do a fast unrolled copy if it only contains ASCII characters. */ - unsigned long value = *(const unsigned long *) _s; + size_t value = *(const size_t *) _s; if (value & ASCII_CHAR_MASK) break; #if PY_LITTLE_ENDIAN @@ -56,14 +56,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu); -# if SIZEOF_LONG == 8 +# if SIZEOF_SIZE_T == 8 _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu); _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); # endif #else -# if SIZEOF_LONG == 8 +# if SIZEOF_SIZE_T == 8 _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); @@ -79,8 +79,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, _p[3] = (STRINGLIB_CHAR)(value & 0xFFu); # endif #endif - _s += SIZEOF_LONG; - _p += SIZEOF_LONG; + _s += SIZEOF_SIZE_T; + _p += SIZEOF_SIZE_T; } s = _s; p = _p; diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h index f4e0a77..3319a46 100644 --- a/Objects/stringlib/find_max_char.h +++ b/Objects/stringlib/find_max_char.h @@ -4,14 +4,14 @@ # error "find_max_char.h is specific to Unicode" #endif -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define UCS1_ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define UCS1_ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif #if STRINGLIB_SIZEOF_CHAR == 1 @@ -21,17 +21,17 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) { const unsigned char *p = (const unsigned char *) begin; const unsigned char *aligned_end = - (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); while (p < end) { - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Help register allocation */ const unsigned char *_p = p; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & UCS1_ASCII_CHAR_MASK) return 255; - _p += SIZEOF_LONG; + _p += SIZEOF_SIZE_T; } p = _p; if (p == end) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c4e73eb..f963deb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5025,21 +5025,21 @@ PyUnicode_DecodeUTF8(const char *s, #include "stringlib/codecs.h" #include "stringlib/undef.h" -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif static Py_ssize_t ascii_decode(const char *start, const char *end, Py_UCS1 *dest) { const char *p = start; - const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); /* * Issue #17237: m68k is a bit different from most architectures in @@ -5049,21 +5049,21 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) * version" will even speed up m68k. */ #if !defined(__m68k__) -#if SIZEOF_LONG <= SIZEOF_VOID_P - assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG)); - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { +#if SIZEOF_SIZE_T <= SIZEOF_VOID_P + assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T)); + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Fast path, see in STRINGLIB(utf8_decode) for an explanation. */ /* Help allocation */ const char *_p = p; Py_UCS1 * q = dest; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & ASCII_CHAR_MASK) break; - *((unsigned long *)q) = value; - _p += SIZEOF_LONG; - q += SIZEOF_LONG; + *((size_t *)q) = value; + _p += SIZEOF_SIZE_T; + q += SIZEOF_SIZE_T; } p = _p; while (p < end) { @@ -5078,14 +5078,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) while (p < end) { /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h for an explanation. */ - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Help allocation */ const char *_p = p; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & ASCII_CHAR_MASK) break; - _p += SIZEOF_LONG; + _p += SIZEOF_SIZE_T; } p = _p; if (_p == end) -- cgit v0.12