diff options
author | Victor Stinner <vstinner@python.org> | 2020-06-08 14:30:33 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-08 14:30:33 (GMT) |
commit | c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e (patch) | |
tree | 4a7686465f3ed4a171382d4717ad558d82413951 /Include | |
parent | 301f0d4ff9b6bd60599eea0612904f65a92e6dd9 (diff) | |
download | cpython-c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e.zip cpython-c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e.tar.gz cpython-c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e.tar.bz2 |
bpo-29882: Add _Py_popcount32() function (GH-20518)
* Rename pycore_byteswap.h to pycore_bitutils.h.
* Move popcount_digit() to pycore_bitutils.h as _Py_popcount32().
* _Py_popcount32() uses GCC and clang builtin function if available.
* Add unit tests to _Py_popcount32().
Diffstat (limited to 'Include')
-rw-r--r-- | Include/internal/pycore_bitutils.h (renamed from Include/internal/pycore_byteswap.h) | 51 |
1 files changed, 50 insertions, 1 deletions
diff --git a/Include/internal/pycore_byteswap.h b/Include/internal/pycore_bitutils.h index 5e64704..36ffe23 100644 --- a/Include/internal/pycore_byteswap.h +++ b/Include/internal/pycore_bitutils.h @@ -1,4 +1,6 @@ -/* Bytes swap functions, reverse order of bytes: +/* Bit and bytes utilities. + + Bytes swap functions, reverse order of bytes: - _Py_bswap16(uint16_t) - _Py_bswap32(uint32_t) @@ -82,6 +84,53 @@ _Py_bswap64(uint64_t word) } +// Population count: count the number of 1's in 'x' +// (number of bits set to 1), also known as the hamming weight. +// +// Implementation note. CPUID is not used, to test if x86 POPCNT instruction +// can be used, to keep the implementation simple. For example, Visual Studio +// __popcnt() is not used this reason. The clang and GCC builtin function can +// use the x86 POPCNT instruction if the target architecture has SSE4a or +// newer. +static inline int +_Py_popcount32(uint32_t x) +{ +#if (defined(__clang__) || defined(__GNUC__)) + +#if SIZEOF_INT >= 4 + Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int)); + return __builtin_popcount(x); +#else + // The C standard guarantees that unsigned long will always be big enough + // to hold a uint32_t value without losing information. + Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long)); + return __builtin_popcountl(x); +#endif + +#else + // 32-bit SWAR (SIMD Within A Register) popcount + + // Binary: 0 1 0 1 ... + const uint32_t M1 = 0x55555555; + // Binary: 00 11 00 11. .. + const uint32_t M2 = 0x33333333; + // Binary: 0000 1111 0000 1111 ... + const uint32_t M4 = 0x0F0F0F0F; + // 256**4 + 256**3 + 256**2 + 256**1 + const uint32_t SUM = 0x01010101; + + // Put count of each 2 bits into those 2 bits + x = x - ((x >> 1) & M1); + // Put count of each 4 bits into those 4 bits + x = (x & M2) + ((x >> 2) & M2); + // Put count of each 8 bits into those 8 bits + x = (x + (x >> 4)) & M4; + // Sum of the 4 byte counts + return (uint32_t)((uint64_t)x * (uint64_t)SUM) >> 24; +#endif +} + + #ifdef __cplusplus } #endif |