summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorMa Lin <animalize@users.noreply.github.com>2020-10-18 14:48:38 (GMT)
committerGitHub <noreply@github.com>2020-10-18 14:48:38 (GMT)
commita0c603cb9d4dbb9909979313a88bcd1f5fde4f62 (patch)
tree4e25844a35bc4dd3436cec0087450e73720bac42 /Objects
parent3635388f52b42e5280229104747962117104c453 (diff)
downloadcpython-a0c603cb9d4dbb9909979313a88bcd1f5fde4f62.zip
cpython-a0c603cb9d4dbb9909979313a88bcd1f5fde4f62.tar.gz
cpython-a0c603cb9d4dbb9909979313a88bcd1f5fde4f62.tar.bz2
bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334)
Diffstat (limited to 'Objects')
-rw-r--r--Objects/bytes_methods.c20
-rw-r--r--Objects/stringlib/codecs.h30
-rw-r--r--Objects/stringlib/find_max_char.h20
-rw-r--r--Objects/unicodeobject.c34
4 files changed, 52 insertions, 52 deletions
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c
index 72daa1f..1512086 100644
--- a/Objects/bytes_methods.c
+++ b/Objects/bytes_methods.c
@@ -100,14 +100,14 @@ Return True if B is empty or all characters in B are ASCII,\n\
False otherwise.");
// Optimization is copied from ascii_decode in unicodeobject.c
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
#else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
#endif
PyObject*
@@ -115,20 +115,20 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
{
const char *p = cptr;
const char *end = p + len;
- const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+ const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */
- if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+ if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
- unsigned long value = *(const unsigned long *) _p;
+ size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK) {
Py_RETURN_FALSE;
}
- _p += SIZEOF_LONG;
+ _p += SIZEOF_SIZE_T;
}
p = _p;
if (_p == end)
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index 197605b..b6ca404 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -6,14 +6,14 @@
#include "pycore_bitutils.h" // _Py_bswap32()
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
#else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
#endif
/* 10xxxxxx */
@@ -26,7 +26,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
{
Py_UCS4 ch;
const char *s = *inptr;
- const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+ const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
STRINGLIB_CHAR *p = dest + *outpos;
while (s < end) {
@@ -36,19 +36,19 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
/* Fast path for runs of ASCII characters. Given that common UTF-8
input will consist of an overwhelming majority of ASCII
characters, we try to optimize for this case by checking
- as many characters as a C 'long' can contain.
+ as many characters as a C 'size_t' can contain.
First, check if we can do an aligned read, as most CPUs have
a penalty for unaligned reads.
*/
- if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
+ if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
/* Help register allocation */
const char *_s = s;
STRINGLIB_CHAR *_p = p;
while (_s < aligned_end) {
- /* Read a whole long at a time (either 4 or 8 bytes),
+ /* Read a whole size_t at a time (either 4 or 8 bytes),
and do a fast unrolled copy if it only contains ASCII
characters. */
- unsigned long value = *(const unsigned long *) _s;
+ size_t value = *(const size_t *) _s;
if (value & ASCII_CHAR_MASK)
break;
#if PY_LITTLE_ENDIAN
@@ -56,14 +56,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
_p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
-# if SIZEOF_LONG == 8
+# if SIZEOF_SIZE_T == 8
_p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
_p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
_p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
_p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
# endif
#else
-# if SIZEOF_LONG == 8
+# if SIZEOF_SIZE_T == 8
_p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
_p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
_p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
@@ -79,8 +79,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
_p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
# endif
#endif
- _s += SIZEOF_LONG;
- _p += SIZEOF_LONG;
+ _s += SIZEOF_SIZE_T;
+ _p += SIZEOF_SIZE_T;
}
s = _s;
p = _p;
diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h
index f4e0a77..3319a46 100644
--- a/Objects/stringlib/find_max_char.h
+++ b/Objects/stringlib/find_max_char.h
@@ -4,14 +4,14 @@
# error "find_max_char.h is specific to Unicode"
#endif
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define UCS1_ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define UCS1_ASCII_CHAR_MASK 0x80808080U
#else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
#endif
#if STRINGLIB_SIZEOF_CHAR == 1
@@ -21,17 +21,17 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
{
const unsigned char *p = (const unsigned char *) begin;
const unsigned char *aligned_end =
- (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+ (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
while (p < end) {
- if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+ if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help register allocation */
const unsigned char *_p = p;
while (_p < aligned_end) {
- unsigned long value = *(const unsigned long *) _p;
+ size_t value = *(const size_t *) _p;
if (value & UCS1_ASCII_CHAR_MASK)
return 255;
- _p += SIZEOF_LONG;
+ _p += SIZEOF_SIZE_T;
}
p = _p;
if (p == end)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index c4e73eb..f963deb 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5025,21 +5025,21 @@ PyUnicode_DecodeUTF8(const char *s,
#include "stringlib/codecs.h"
#include "stringlib/undef.h"
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
#else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
#endif
static Py_ssize_t
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
{
const char *p = start;
- const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+ const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
/*
* Issue #17237: m68k is a bit different from most architectures in
@@ -5049,21 +5049,21 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
* version" will even speed up m68k.
*/
#if !defined(__m68k__)
-#if SIZEOF_LONG <= SIZEOF_VOID_P
- assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG));
- if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
+ assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
+ if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Fast path, see in STRINGLIB(utf8_decode) for
an explanation. */
/* Help allocation */
const char *_p = p;
Py_UCS1 * q = dest;
while (_p < aligned_end) {
- unsigned long value = *(const unsigned long *) _p;
+ size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK)
break;
- *((unsigned long *)q) = value;
- _p += SIZEOF_LONG;
- q += SIZEOF_LONG;
+ *((size_t *)q) = value;
+ _p += SIZEOF_SIZE_T;
+ q += SIZEOF_SIZE_T;
}
p = _p;
while (p < end) {
@@ -5078,14 +5078,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */
- if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+ if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
- unsigned long value = *(const unsigned long *) _p;
+ size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK)
break;
- _p += SIZEOF_LONG;
+ _p += SIZEOF_SIZE_T;
}
p = _p;
if (_p == end)