diff options
author | Ma Lin <animalize@users.noreply.github.com> | 2020-10-18 14:48:38 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-18 14:48:38 (GMT) |
commit | a0c603cb9d4dbb9909979313a88bcd1f5fde4f62 (patch) | |
tree | 4e25844a35bc4dd3436cec0087450e73720bac42 /Objects | |
parent | 3635388f52b42e5280229104747962117104c453 (diff) | |
download | cpython-a0c603cb9d4dbb9909979313a88bcd1f5fde4f62.zip cpython-a0c603cb9d4dbb9909979313a88bcd1f5fde4f62.tar.gz cpython-a0c603cb9d4dbb9909979313a88bcd1f5fde4f62.tar.bz2 |
bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/bytes_methods.c | 20 | ||||
-rw-r--r-- | Objects/stringlib/codecs.h | 30 | ||||
-rw-r--r-- | Objects/stringlib/find_max_char.h | 20 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 34 |
4 files changed, 52 insertions, 52 deletions
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c index 72daa1f..1512086 100644 --- a/Objects/bytes_methods.c +++ b/Objects/bytes_methods.c @@ -100,14 +100,14 @@ Return True if B is empty or all characters in B are ASCII,\n\ False otherwise."); // Optimization is copied from ascii_decode in unicodeobject.c -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif PyObject* @@ -115,20 +115,20 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len) { const char *p = cptr; const char *end = p + len; - const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); while (p < end) { /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h for an explanation. */ - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Help allocation */ const char *_p = p; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & ASCII_CHAR_MASK) { Py_RETURN_FALSE; } - _p += SIZEOF_LONG; + _p += SIZEOF_SIZE_T; } p = _p; if (_p == end) diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 197605b..b6ca404 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -6,14 +6,14 @@ #include "pycore_bitutils.h" // _Py_bswap32() -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif /* 10xxxxxx */ @@ -26,7 +26,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, { Py_UCS4 ch; const char *s = *inptr; - const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); STRINGLIB_CHAR *p = dest + *outpos; while (s < end) { @@ -36,19 +36,19 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, /* Fast path for runs of ASCII characters. Given that common UTF-8 input will consist of an overwhelming majority of ASCII characters, we try to optimize for this case by checking - as many characters as a C 'long' can contain. + as many characters as a C 'size_t' can contain. First, check if we can do an aligned read, as most CPUs have a penalty for unaligned reads. */ - if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) { /* Help register allocation */ const char *_s = s; STRINGLIB_CHAR *_p = p; while (_s < aligned_end) { - /* Read a whole long at a time (either 4 or 8 bytes), + /* Read a whole size_t at a time (either 4 or 8 bytes), and do a fast unrolled copy if it only contains ASCII characters. */ - unsigned long value = *(const unsigned long *) _s; + size_t value = *(const size_t *) _s; if (value & ASCII_CHAR_MASK) break; #if PY_LITTLE_ENDIAN @@ -56,14 +56,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu); -# if SIZEOF_LONG == 8 +# if SIZEOF_SIZE_T == 8 _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu); _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); # endif #else -# if SIZEOF_LONG == 8 +# if SIZEOF_SIZE_T == 8 _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); @@ -79,8 +79,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, _p[3] = (STRINGLIB_CHAR)(value & 0xFFu); # endif #endif - _s += SIZEOF_LONG; - _p += SIZEOF_LONG; + _s += SIZEOF_SIZE_T; + _p += SIZEOF_SIZE_T; } s = _s; p = _p; diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h index f4e0a77..3319a46 100644 --- a/Objects/stringlib/find_max_char.h +++ b/Objects/stringlib/find_max_char.h @@ -4,14 +4,14 @@ # error "find_max_char.h is specific to Unicode" #endif -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define UCS1_ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define UCS1_ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif #if STRINGLIB_SIZEOF_CHAR == 1 @@ -21,17 +21,17 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) { const unsigned char *p = (const unsigned char *) begin; const unsigned char *aligned_end = - (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); while (p < end) { - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Help register allocation */ const unsigned char *_p = p; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & UCS1_ASCII_CHAR_MASK) return 255; - _p += SIZEOF_LONG; + _p += SIZEOF_SIZE_T; } p = _p; if (p == end) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c4e73eb..f963deb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5025,21 +5025,21 @@ PyUnicode_DecodeUTF8(const char *s, #include "stringlib/codecs.h" #include "stringlib/undef.h" -/* Mask to quickly check whether a C 'long' contains a +/* Mask to quickly check whether a C 'size_t' contains a non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080UL +#if (SIZEOF_SIZE_T == 8) +# define ASCII_CHAR_MASK 0x8080808080808080ULL +#elif (SIZEOF_SIZE_T == 4) +# define ASCII_CHAR_MASK 0x80808080U #else -# error C 'long' size should be either 4 or 8! +# error C 'size_t' size should be either 4 or 8! #endif static Py_ssize_t ascii_decode(const char *start, const char *end, Py_UCS1 *dest) { const char *p = start; - const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T); /* * Issue #17237: m68k is a bit different from most architectures in @@ -5049,21 +5049,21 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) * version" will even speed up m68k. */ #if !defined(__m68k__) -#if SIZEOF_LONG <= SIZEOF_VOID_P - assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG)); - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { +#if SIZEOF_SIZE_T <= SIZEOF_VOID_P + assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T)); + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Fast path, see in STRINGLIB(utf8_decode) for an explanation. */ /* Help allocation */ const char *_p = p; Py_UCS1 * q = dest; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & ASCII_CHAR_MASK) break; - *((unsigned long *)q) = value; - _p += SIZEOF_LONG; - q += SIZEOF_LONG; + *((size_t *)q) = value; + _p += SIZEOF_SIZE_T; + q += SIZEOF_SIZE_T; } p = _p; while (p < end) { @@ -5078,14 +5078,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) while (p < end) { /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h for an explanation. */ - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { + if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) { /* Help allocation */ const char *_p = p; while (_p < aligned_end) { - unsigned long value = *(const unsigned long *) _p; + size_t value = *(const size_t *) _p; if (value & ASCII_CHAR_MASK) break; - _p += SIZEOF_LONG; + _p += SIZEOF_SIZE_T; } p = _p; if (_p == end) |