summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYury Selivanov <yury@magic.io>2018-01-24 03:17:04 (GMT)
committerGitHub <noreply@github.com>2018-01-24 03:17:04 (GMT)
commitb7a80d543e1e94475ab9c8214f7a9eab4e63c9ab (patch)
tree3a76dbdbf6d24363150a589ef8025de05e7e1dc6
parent6b273f7f4056f8276f61a97c789d6bb4425e653c (diff)
downloadcpython-b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab.zip
cpython-b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab.tar.gz
cpython-b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab.tar.bz2
bpo-32436: Don't use native popcount() (also fixes bpo-32641) (#5292)
-rw-r--r--Python/context.c2
-rw-r--r--Python/hamt.c26
2 files changed, 13 insertions, 15 deletions
diff --git a/Python/context.c b/Python/context.c
index 2f1d0f5..5439531 100644
--- a/Python/context.c
+++ b/Python/context.c
@@ -1171,7 +1171,7 @@ get_token_missing(void)
int
PyContext_ClearFreeList(void)
{
- int size = ctx_freelist_len;
+ Py_ssize_t size = ctx_freelist_len;
while (ctx_freelist_len) {
PyContext *ctx = ctx_freelist;
ctx_freelist = (PyContext *)ctx->ctx_weakreflist;
diff --git a/Python/hamt.c b/Python/hamt.c
index af3dfce..81629e9 100644
--- a/Python/hamt.c
+++ b/Python/hamt.c
@@ -4,11 +4,6 @@
#include "internal/pystate.h"
#include "internal/hamt.h"
-/* popcnt support in Visual Studio */
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
/*
This file provides an implemention of an immutable mapping using the
Hash Array Mapped Trie (or HAMT) datastructure.
@@ -440,18 +435,21 @@ hamt_bitpos(int32_t hash, uint32_t shift)
static inline uint32_t
hamt_bitcount(uint32_t i)
{
-#if defined(__GNUC__) && (__GNUC__ > 4)
- return (uint32_t)__builtin_popcountl(i);
-#elif defined(__clang__) && (__clang_major__ > 3)
- return (uint32_t)__builtin_popcountl(i);
-#elif defined(_MSC_VER)
- return (uint32_t)__popcnt(i);
-#else
- /* https://graphics.stanford.edu/~seander/bithacks.html */
+ /* We could use native popcount instruction but that would
+ require to either add configure flags to enable SSE4.2
+ support or to detect it dynamically. Otherwise, we have
+ a risk of CPython not working properly on older hardware.
+
+ In practice, there's no observable difference in
+ performance between using a popcount instruction or the
+ following fallback code.
+
+ The algorithm is copied from:
+ https://graphics.stanford.edu/~seander/bithacks.html
+ */
i = i - ((i >> 1) & 0x55555555);
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
return ((i + (i >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
-#endif
}
static inline uint32_t