summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2020-06-08 14:30:33 (GMT)
committerGitHub <noreply@github.com>2020-06-08 14:30:33 (GMT)
commitc6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e (patch)
tree4a7686465f3ed4a171382d4717ad558d82413951 /Include
parent301f0d4ff9b6bd60599eea0612904f65a92e6dd9 (diff)
downloadcpython-c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e.zip
cpython-c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e.tar.gz
cpython-c6b292cdeee689f0bfac6c1e2c2d4e4e01fa8d9e.tar.bz2
bpo-29882: Add _Py_popcount32() function (GH-20518)
* Rename pycore_byteswap.h to pycore_bitutils.h. * Move popcount_digit() to pycore_bitutils.h as _Py_popcount32(). * _Py_popcount32() uses GCC and clang builtin function if available. * Add unit tests to _Py_popcount32().
Diffstat (limited to 'Include')
-rw-r--r--Include/internal/pycore_bitutils.h (renamed from Include/internal/pycore_byteswap.h)51
1 files changed, 50 insertions, 1 deletions
diff --git a/Include/internal/pycore_byteswap.h b/Include/internal/pycore_bitutils.h
index 5e64704..36ffe23 100644
--- a/Include/internal/pycore_byteswap.h
+++ b/Include/internal/pycore_bitutils.h
@@ -1,4 +1,6 @@
-/* Bytes swap functions, reverse order of bytes:
+/* Bit and bytes utilities.
+
+ Bytes swap functions, reverse order of bytes:
- _Py_bswap16(uint16_t)
- _Py_bswap32(uint32_t)
@@ -82,6 +84,53 @@ _Py_bswap64(uint64_t word)
}
+// Population count: count the number of 1's in 'x'
+// (number of bits set to 1), also known as the hamming weight.
+//
+// Implementation note. CPUID is not used, to test if x86 POPCNT instruction
+// can be used, to keep the implementation simple. For example, Visual Studio
+// __popcnt() is not used this reason. The clang and GCC builtin function can
+// use the x86 POPCNT instruction if the target architecture has SSE4a or
+// newer.
+static inline int
+_Py_popcount32(uint32_t x)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+
+#if SIZEOF_INT >= 4
+ Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
+ return __builtin_popcount(x);
+#else
+ // The C standard guarantees that unsigned long will always be big enough
+ // to hold a uint32_t value without losing information.
+ Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
+ return __builtin_popcountl(x);
+#endif
+
+#else
+ // 32-bit SWAR (SIMD Within A Register) popcount
+
+ // Binary: 0 1 0 1 ...
+ const uint32_t M1 = 0x55555555;
+ // Binary: 00 11 00 11. ..
+ const uint32_t M2 = 0x33333333;
+ // Binary: 0000 1111 0000 1111 ...
+ const uint32_t M4 = 0x0F0F0F0F;
+ // 256**4 + 256**3 + 256**2 + 256**1
+ const uint32_t SUM = 0x01010101;
+
+ // Put count of each 2 bits into those 2 bits
+ x = x - ((x >> 1) & M1);
+ // Put count of each 4 bits into those 4 bits
+ x = (x & M2) + ((x >> 2) & M2);
+ // Put count of each 8 bits into those 8 bits
+ x = (x + (x >> 4)) & M4;
+ // Sum of the 4 byte counts
+ return (uint32_t)((uint64_t)x * (uint64_t)SUM) >> 24;
+#endif
+}
+
+
#ifdef __cplusplus
}
#endif