diff options
Diffstat (limited to 'generic/tclUtil.c')
| -rw-r--r-- | generic/tclUtil.c | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 4beb25d..7940b66 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -19,6 +19,11 @@ #include "tclTomMath.h" #include <math.h> +#if defined(_MSC_VER) && defined(_WIN64) +# include <intrin.h> +# pragma intrinsic(_BitScanReverse64) +#endif + /* * The absolute pathname of the executable in which this Tcl library is * running. @@ -4611,6 +4616,181 @@ TclReToGlob( } /* + *---------------------------------------------------------------------- + * + * TclMSB -- + * + * Given a unsigned long long non-zero value n, return the index of + * the most significant bit in n that is set. This is equivalent to + * returning trunc(log2(n)). It's also equivalent to the largest + * integer k such that 2^k <= n. + * + * This routine is adapted from Andrej Brodnik, "Computation of the + * Least Significant Set Bit", pp 7-10, Proceedings of the 2nd + * Electrotechnical and Computer Science Conference, Portoroz, + * Slovenia, 1993. The adaptations permit the computation to take + * place within unsigned long long values without the need for double + * length buffers for calculation. They also fill in a number of + * details the paper omits or leaves unclear. + * + * Results: + * The index of the most significant set bit in n, a value between + * 0 and 63, inclusive. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +TclMSB( + unsigned long long n) +{ + /* assert ( 64 == CHAR_BIT * sizeof(unsigned long long) ); */ + /* assert ( n != 0 ); */ + + /* + * Many platforms offer access to this functionality through + * compiler specific incantations that exploit processor + * instructions. Add more as appropriate. + */ + +#if defined(_MSC_VER) && defined(_WIN64) + /* + * This candidate implementation for Microsoft compilers is + * untested. (Remove this comment when someone tests it and + * either finds it working, or fixes any brokenness.) + */ + unsigned long result; + + (void) _BitScanReverse64(&result, (unsigned __int64)n); + return (int)result; + +#elif defined(__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) + + /* + * The GNU Compiler Collection offers this builtin routine + * starting with version 3.4, released 2004. + * clzll() = Count of Leading Zeroes in a Long Long + * NOTE: we rely on input constraint (n != 0). + */ + + return 63 - __builtin_clzll(n); + +#else + + /* + * For a byte, consider two masks, C1 = 10000000 selecting just + * the high bit, and C2 = 01111111 selecting all other bits. + * Then for any byte value n, the computation + * LEAD(n) = C1 & (n | (C2 + (n & C2))) + * will leave all bits but the high bit unset, and will have the + * high bit set iff n!=0. The whole thing is an 8-bit test + * for being non-zero. For an 8-byte n, each byte can have + * the test applied all at once, with combined masks. + */ + const unsigned long long C1 = 0x8080808080808080; + const unsigned long long C2 = 0x7F7F7F7F7F7F7F7F; +#define LEAD(n) (C1 & (n | (C2 + (n & C2)))) + + /* + * To shift a bit to a new place, multiplication by 2^k will do. + * To shift the top 7 bits produced by the LEAD test to the high + * 7 bits of the entire long long, multiply by the right sum of + * powers of 2. In this case + * Q = 1 + 2^7 + 2^14 + 2^21 + 2^28 + 2^35 + 2^42 + * Then shift those 7 bits down to the low 7 bits of the long long. + * The key to making this work is that none of the shifted bits + * collide with each other in the top 7-bit destination. + * Note that we lose the bit that indicates whether the low byte + * is non-zero. That doesn't matter because we require the original + * value n to be non-zero, so if all other bytes signal to be zero, + * we know the low byte is non-zero, and if one of the other bytes + * signals non-zero, we just don't care what the low byte is. + */ + const unsigned long long Q = 0x0000040810204081; + + /* + * To place a copy of a 7-bit value in each of 7 bytes in + * a long long, just multply by the right value. In this case + * P = 0x00 01 01 01 01 01 01 01 + * We don't put a copy in the high byte since analysis of the + * remaining steps in the algorithm indicates we do not need it. + */ + const unsigned long long P = 0x0001010101010101; + + /* + * With 7 copies of the LEAD value, we can now apply 7 masks + * to it in a single step by an & against the right value. + * B = 00000000 01111111 01111110 01111100 + * 01111000 01110000 01100000 01000000 + * The higher the MSB of the copied value is, the more of the + * B-masked bytes stored in t will be non-zero. + */ + const unsigned long long B = 0x007F7E7C78706040; + unsigned long long t = B & P * (LEAD(n) * Q >> 57); + + /* + * We want to get a count of the non-zero bytes stored in t. + * First use LEAD(t) to create a set of high bits signaling + * non-zero values as before. Call this value + * X = x6*2^55 +x5*2^47 +x4*2^39 +x3*2^31 +x2*2^23 +x1*2^15 +x0*2^7 + * Then notice what multiplication by + * P = 2^48 + 2^40 + 2^32 + 2^24 + 2^16 + 2^8 + 1 + * produces: + * P*X = x0*2^7 + (x0 + x1)*2^15 + ... + * ... + (x0 + x1 + x2 + x3 + x4 + x5 + x6) * 2^55 + ... + * ... + (x5 + x6)*2^95 + x6*2^103 + * The high terms of this product are going to overflow the long long + * and get lost, but we don't care about them. What we care is that + * the 2^55 term is exactly the sum we seek. We shift the product + * down by 55 bits and then mask away all but the bottom 3 bits + * (Max sum can be 7) we get exactly the count of non-zero B-masked + * bytes. By design of the mask, this count is the index of the + * MSB of the LEAD value. It indicates which byte of the original + * value contains the MSB of the original value. + */ +#define SUM(t) (0x7 & (int)(LEAD(t) * P >> 55)); + + /* + * Multiply by 8 to get the number of bits to shift to place + * that MSB-containing byte in the low byte. + */ + int k = 8 * SUM(t); + + /* + * Shift the MSB byte to the low byte. Then shift one more bit. + * Since we know the MSB byte is non-zero we only need to compute + * the MSB of the top 7 bits. If all top 7 bits are zero, we know + * the bottom bit is the 1 and the correct index is 0. Compute the + * MSB of that value by the same steps we did before. + */ + t = B & P * (n >> k >> 1); + + /* + * Add the index of the MSB of the byte to the index of the low + * bit of that byte computed before to get the final answer. + */ + return k + SUM(t); + + /* Total operations: 33 + * 10 bit-ands, 6 multiplies, 4 adds, 5 rightshifts, + * 3 assignments, 3 bit-ors, 2 typecasts. + * + * The whole task is one direct computation. + * No branches. No loops. + * + * 33 operations cannot beat one instruction, so assembly + * wins and should be used wherever possible, but this isn't bad. + */ + +#undef SUM +#undef LEAD +#endif +} + +/* * Local Variables: * mode: c * c-basic-offset: 4 |
