diff options
author | Yury Selivanov <yury@magic.io> | 2018-01-24 03:17:04 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-24 03:17:04 (GMT) |
commit | b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab (patch) | |
tree | 3a76dbdbf6d24363150a589ef8025de05e7e1dc6 | |
parent | 6b273f7f4056f8276f61a97c789d6bb4425e653c (diff) | |
download | cpython-b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab.zip cpython-b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab.tar.gz cpython-b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab.tar.bz2 |
bpo-32436: Don't use native popcount() (also fixes bpo-32641) (#5292)
-rw-r--r-- | Python/context.c | 2 | ||||
-rw-r--r-- | Python/hamt.c | 26 |
2 files changed, 13 insertions, 15 deletions
diff --git a/Python/context.c b/Python/context.c index 2f1d0f5..5439531 100644 --- a/Python/context.c +++ b/Python/context.c @@ -1171,7 +1171,7 @@ get_token_missing(void) int PyContext_ClearFreeList(void) { - int size = ctx_freelist_len; + Py_ssize_t size = ctx_freelist_len; while (ctx_freelist_len) { PyContext *ctx = ctx_freelist; ctx_freelist = (PyContext *)ctx->ctx_weakreflist; diff --git a/Python/hamt.c b/Python/hamt.c index af3dfce..81629e9 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -4,11 +4,6 @@ #include "internal/pystate.h" #include "internal/hamt.h" -/* popcnt support in Visual Studio */ -#ifdef _MSC_VER -#include <intrin.h> -#endif - /* This file provides an implemention of an immutable mapping using the Hash Array Mapped Trie (or HAMT) datastructure. @@ -440,18 +435,21 @@ hamt_bitpos(int32_t hash, uint32_t shift) static inline uint32_t hamt_bitcount(uint32_t i) { -#if defined(__GNUC__) && (__GNUC__ > 4) - return (uint32_t)__builtin_popcountl(i); -#elif defined(__clang__) && (__clang_major__ > 3) - return (uint32_t)__builtin_popcountl(i); -#elif defined(_MSC_VER) - return (uint32_t)__popcnt(i); -#else - /* https://graphics.stanford.edu/~seander/bithacks.html */ + /* We could use native popcount instruction but that would + require to either add configure flags to enable SSE4.2 + support or to detect it dynamically. Otherwise, we have + a risk of CPython not working properly on older hardware. + + In practice, there's no observable difference in + performance between using a popcount instruction or the + following fallback code. + + The algorithm is copied from: + https://graphics.stanford.edu/~seander/bithacks.html + */ i = i - ((i >> 1) & 0x55555555); i = (i & 0x33333333) + ((i >> 2) & 0x33333333); return ((i + (i >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; -#endif } static inline uint32_t |