From 6736cf8d20b67b74e8e959622132963285156242 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Mon, 20 Apr 2009 21:13:33 +0000 Subject: Issue #3166: Make long -> float (and int -> float) conversions correctly rounded, using round-half-to-even. This ensures that the value of float(n) doesn't depend on whether we're using 15-bit digits or 30-bit digits for Python longs. --- Lib/test/test_int.py | 34 ++++++++++ Lib/test/test_long.py | 59 ++++++++++++++++++ Misc/NEWS | 3 + Objects/intobject.c | 80 ++++++++++++++++++++---- Objects/longobject.c | 167 +++++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 316 insertions(+), 27 deletions(-) diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index ce18ad2..514a98e 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -275,6 +275,40 @@ class IntTestCases(unittest.TestCase): self.assertEqual((a+1).bit_length(), i+1) self.assertEqual((-a-1).bit_length(), i+1) + @unittest.skipUnless(float.__getformat__("double").startswith("IEEE"), + "test requires IEEE 754 doubles") + def test_float_conversion(self): + # values exactly representable as floats + exact_values = [-2, -1, 0, 1, 2, 2**52, 2**53-1, 2**53, 2**53+2, + 2**53+4, 2**54-4, 2**54-2, 2**63, -2**63, 2**64, + -2**64, 10**20, 10**21, 10**22] + for value in exact_values: + self.assertEqual(int(float(int(value))), value) + + # test round-half-to-even + self.assertEqual(int(float(2**53+1)), 2**53) + self.assertEqual(int(float(2**53+2)), 2**53+2) + self.assertEqual(int(float(2**53+3)), 2**53+4) + self.assertEqual(int(float(2**53+5)), 2**53+4) + self.assertEqual(int(float(2**53+6)), 2**53+6) + self.assertEqual(int(float(2**53+7)), 2**53+8) + + self.assertEqual(int(float(-2**53-1)), -2**53) + self.assertEqual(int(float(-2**53-2)), -2**53-2) + self.assertEqual(int(float(-2**53-3)), -2**53-4) + self.assertEqual(int(float(-2**53-5)), -2**53-4) + self.assertEqual(int(float(-2**53-6)), -2**53-6) + self.assertEqual(int(float(-2**53-7)), -2**53-8) + + self.assertEqual(int(float(2**54-2)), 2**54-2) + self.assertEqual(int(float(2**54-1)), 2**54) + self.assertEqual(int(float(2**54+2)), 2**54) + self.assertEqual(int(float(2**54+3)), 2**54+4) + self.assertEqual(int(float(2**54+5)), 2**54+4) + self.assertEqual(int(float(2**54+6)), 2**54+8) + self.assertEqual(int(float(2**54+10)), 2**54+8) + self.assertEqual(int(float(2**54+11)), 2**54+12) + def test_intconversion(self): # Test __int__() class ClassicMissingMethods: diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 1a0e33d..b41d270 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -645,6 +645,65 @@ class LongTest(unittest.TestCase): else: self.assertRaises(TypeError, pow,longx, longy, long(z)) + @unittest.skipUnless(float.__getformat__("double").startswith("IEEE"), + "test requires IEEE 754 doubles") + def test_float_conversion(self): + import sys + DBL_MAX = sys.float_info.max + DBL_MAX_EXP = sys.float_info.max_exp + DBL_MANT_DIG = sys.float_info.mant_dig + + exact_values = [0L, 1L, 2L, + long(2**53-3), + long(2**53-2), + long(2**53-1), + long(2**53), + long(2**53+2), + long(2**54-4), + long(2**54-2), + long(2**54), + long(2**54+4)] + for x in exact_values: + self.assertEqual(long(float(x)), x) + self.assertEqual(long(float(-x)), -x) + + # test round-half-even + for x, y in [(1, 0), (2, 2), (3, 4), (4, 4), (5, 4), (6, 6), (7, 8)]: + for p in xrange(15): + self.assertEqual(long(float(2L**p*(2**53+x))), 2L**p*(2**53+y)) + + for x, y in [(0, 0), (1, 0), (2, 0), (3, 4), (4, 4), (5, 4), (6, 8), + (7, 8), (8, 8), (9, 8), (10, 8), (11, 12), (12, 12), + (13, 12), (14, 16), (15, 16)]: + for p in xrange(15): + self.assertEqual(long(float(2L**p*(2**54+x))), 2L**p*(2**54+y)) + + # behaviour near extremes of floating-point range + long_dbl_max = long(DBL_MAX) + top_power = 2**DBL_MAX_EXP + halfway = (long_dbl_max + top_power)/2 + self.assertEqual(float(long_dbl_max), DBL_MAX) + self.assertEqual(float(long_dbl_max+1), DBL_MAX) + self.assertEqual(float(halfway-1), DBL_MAX) + self.assertRaises(OverflowError, float, halfway) + self.assertEqual(float(1-halfway), -DBL_MAX) + self.assertRaises(OverflowError, float, -halfway) + self.assertRaises(OverflowError, float, top_power-1) + self.assertRaises(OverflowError, float, top_power) + self.assertRaises(OverflowError, float, top_power+1) + self.assertRaises(OverflowError, float, 2*top_power-1) + self.assertRaises(OverflowError, float, 2*top_power) + self.assertRaises(OverflowError, float, top_power*top_power) + + for p in xrange(100): + x = long(2**p * (2**53 + 1) + 1) + y = long(2**p * (2**53+ 2)) + self.assertEqual(long(float(x)), y) + + x = long(2**p * (2**53 + 1)) + y = long(2**p * 2**53) + self.assertEqual(long(float(x)), y) + def test_float_overflow(self): import math diff --git a/Misc/NEWS b/Misc/NEWS index 9a5387d..d4dc2dc 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1 Core and Builtins ----------------- +- Issue #3166: Make long -> float (and int -> float) conversions + correctly rounded. + - Issue #5787: object.__getattribute__(some_type, "__bases__") segfaulted on some builtin types. diff --git a/Objects/intobject.c b/Objects/intobject.c index d4532f4..1b64fe7 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include +#include static PyObject *int_int(PyIntObject *v); @@ -928,12 +929,78 @@ int_long(PyIntObject *v) return PyLong_FromLong((v -> ob_ival)); } +static const unsigned char BitLengthTable[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 +}; + +static int +bits_in_ulong(unsigned long d) +{ + int d_bits = 0; + while (d >= 32) { + d_bits += 6; + d >>= 6; + } + d_bits += (int)BitLengthTable[d]; + return d_bits; +} + +#if 8*SIZEOF_LONG-1 <= DBL_MANT_DIG +/* Every Python int can be exactly represented as a float. */ + static PyObject * int_float(PyIntObject *v) { return PyFloat_FromDouble((double)(v -> ob_ival)); } +#else +/* Here not all Python ints are exactly representable as floats, so we may + have to round. We do this manually, since the C standards don't specify + whether converting an integer to a float rounds up or down */ + +static PyObject * +int_float(PyIntObject *v) +{ + unsigned long abs_ival, lsb; + int round_up; + + if (v->ob_ival < 0) + abs_ival = 0U-(unsigned long)v->ob_ival; + else + abs_ival = (unsigned long)v->ob_ival; + if (abs_ival < (1L << DBL_MANT_DIG)) + /* small integer; no need to round */ + return PyFloat_FromDouble((double)v->ob_ival); + + /* Round abs_ival to MANT_DIG significant bits, using the + round-half-to-even rule. abs_ival & lsb picks out the 'rounding' + bit: the first bit after the most significant MANT_DIG bits of + abs_ival. We round up if this bit is set, provided that either: + + (1) abs_ival isn't exactly halfway between two floats, in which + case at least one of the bits following the rounding bit must be + set; i.e., abs_ival & lsb-1 != 0, or: + + (2) the resulting rounded value has least significant bit 0; or + in other words the bit above the rounding bit is set (this is the + 'to-even' bit of round-half-to-even); i.e., abs_ival & 2*lsb != 0 + + The condition "(1) or (2)" equates to abs_ival & 3*lsb-1 != 0. */ + + lsb = 1L << (bits_in_ulong(abs_ival)-DBL_MANT_DIG-1); + round_up = (abs_ival & lsb) && (abs_ival & (3*lsb-1)); + abs_ival &= -2*lsb; + if (round_up) + abs_ival += 2*lsb; + return PyFloat_FromDouble(v->ob_ival < 0 ? + -(double)abs_ival : + (double)abs_ival); +} + +#endif + static PyObject * int_oct(PyIntObject *v) { @@ -1139,16 +1206,10 @@ int__format__(PyObject *self, PyObject *args) return NULL; } -static const unsigned char BitLengthTable[32] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 -}; - static PyObject * int_bit_length(PyIntObject *v) { unsigned long n; - long r = 0; if (v->ob_ival < 0) /* avoid undefined behaviour when v->ob_ival == -LONG_MAX-1 */ @@ -1156,12 +1217,7 @@ int_bit_length(PyIntObject *v) else n = (unsigned long)v->ob_ival; - while (n >= 32) { - r += 6; - n >>= 6; - } - r += (long)(BitLengthTable[n]); - return PyInt_FromLong(r); + return PyInt_FromLong(bits_in_ulong(n)); } PyDoc_STRVAR(int_bit_length_doc, diff --git a/Objects/longobject.c b/Objects/longobject.c index dd22ce0..4b6502d 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -8,6 +8,7 @@ #include "longintrepr.h" #include "structseq.h" +#include #include #include @@ -38,6 +39,9 @@ if (PyErr_CheckSignals()) PyTryBlock \ } +/* forward declaration */ +static int bits_in_digit(digit d); + /* Normalize (remove leading zeros from) a long int object. Doesn't attempt to free the storage--in most cases, due to the nature of the algorithms used, this could save at most be one word anyway. */ @@ -729,33 +733,166 @@ _PyLong_AsScaledDouble(PyObject *vv, int *exponent) #undef NBITS_WANTED } -/* Get a C double from a long int object. */ +/* Get a C double from a long int object. Rounds to the nearest double, + using the round-half-to-even rule in the case of a tie. */ double PyLong_AsDouble(PyObject *vv) { - int e = -1; + PyLongObject *v = (PyLongObject *)vv; + Py_ssize_t rnd_digit, rnd_bit, m, n; + digit lsb, *d; + int round_up = 0; double x; if (vv == NULL || !PyLong_Check(vv)) { PyErr_BadInternalCall(); - return -1; - } - x = _PyLong_AsScaledDouble(vv, &e); - if (x == -1.0 && PyErr_Occurred()) return -1.0; - /* 'e' initialized to -1 to silence gcc-4.0.x, but it should be - set correctly after a successful _PyLong_AsScaledDouble() call */ - assert(e >= 0); - if (e > INT_MAX / PyLong_SHIFT) + } + + /* Notes on the method: for simplicity, assume v is positive and >= + 2**DBL_MANT_DIG. (For negative v we just ignore the sign until the + end; for small v no rounding is necessary.) Write n for the number + of bits in v, so that 2**(n-1) <= v < 2**n, and n > DBL_MANT_DIG. + + Some terminology: the *rounding bit* of v is the 1st bit of v that + will be rounded away (bit n - DBL_MANT_DIG - 1); the *parity bit* + is the bit immediately above. The round-half-to-even rule says + that we round up if the rounding bit is set, unless v is exactly + halfway between two floats and the parity bit is zero. + + Write d[0] ... d[m] for the digits of v, least to most significant. + Let rnd_bit be the index of the rounding bit, and rnd_digit the + index of the PyLong digit containing the rounding bit. Then the + bits of the digit d[rnd_digit] look something like: + + rounding bit + | + v + msb -> sssssrttttttttt <- lsb + ^ + | + parity bit + + where 's' represents a 'significant bit' that will be included in + the mantissa of the result, 'r' is the rounding bit, and 't' + represents a 'trailing bit' following the rounding bit. Note that + if the rounding bit is at the top of d[rnd_digit] then the parity + bit will be the lsb of d[rnd_digit+1]. If we set + + lsb = 1 << (rnd_bit % PyLong_SHIFT) + + then d[rnd_digit] & (PyLong_BASE - 2*lsb) selects just the + significant bits of d[rnd_digit], d[rnd_digit] & (lsb-1) gets the + trailing bits, and d[rnd_digit] & lsb gives the rounding bit. + + We initialize the double x to the integer given by digits + d[rnd_digit:m-1], but with the rounding bit and trailing bits of + d[rnd_digit] masked out. So the value of x comes from the top + DBL_MANT_DIG bits of v, multiplied by 2*lsb. Note that in the loop + that produces x, all floating-point operations are exact (assuming + that FLT_RADIX==2). Now if we're rounding down, the value we want + to return is simply + + x * 2**(PyLong_SHIFT * rnd_digit). + + and if we're rounding up, it's + + (x + 2*lsb) * 2**(PyLong_SHIFT * rnd_digit). + + Under the round-half-to-even rule, we round up if, and only + if, the rounding bit is set *and* at least one of the + following three conditions is satisfied: + + (1) the parity bit is set, or + (2) at least one of the trailing bits of d[rnd_digit] is set, or + (3) at least one of the digits d[i], 0 <= i < rnd_digit + is nonzero. + + Finally, we have to worry about overflow. If v >= 2**DBL_MAX_EXP, + or equivalently n > DBL_MAX_EXP, then overflow occurs. If v < + 2**DBL_MAX_EXP then we're usually safe, but there's a corner case + to consider: if v is very close to 2**DBL_MAX_EXP then it's + possible that v is rounded up to exactly 2**DBL_MAX_EXP, and then + again overflow occurs. + */ + + if (Py_SIZE(v) == 0) + return 0.0; + m = ABS(Py_SIZE(v)) - 1; + d = v->ob_digit; + assert(d[m]); /* v should be normalized */ + + /* fast path for case where 0 < abs(v) < 2**DBL_MANT_DIG */ + if (m < DBL_MANT_DIG / PyLong_SHIFT || + (m == DBL_MANT_DIG / PyLong_SHIFT && + d[m] < (digit)1 << DBL_MANT_DIG%PyLong_SHIFT)) { + x = d[m]; + while (--m >= 0) + x = x*PyLong_BASE + d[m]; + return Py_SIZE(v) < 0 ? -x : x; + } + + /* if m is huge then overflow immediately; otherwise, compute the + number of bits n in v. The condition below implies n (= #bits) >= + m * PyLong_SHIFT + 1 > DBL_MAX_EXP, hence v >= 2**DBL_MAX_EXP. */ + if (m > (DBL_MAX_EXP-1)/PyLong_SHIFT) goto overflow; - errno = 0; - x = ldexp(x, e * PyLong_SHIFT); - if (Py_OVERFLOWED(x)) + n = m * PyLong_SHIFT + bits_in_digit(d[m]); + if (n > DBL_MAX_EXP) goto overflow; - return x; -overflow: + /* find location of rounding bit */ + assert(n > DBL_MANT_DIG); /* dealt with |v| < 2**DBL_MANT_DIG above */ + rnd_bit = n - DBL_MANT_DIG - 1; + rnd_digit = rnd_bit/PyLong_SHIFT; + lsb = (digit)1 << (rnd_bit%PyLong_SHIFT); + + /* Get top DBL_MANT_DIG bits of v. Assumes PyLong_SHIFT < + DBL_MANT_DIG, so we'll need bits from at least 2 digits of v. */ + x = d[m]; + assert(m > rnd_digit); + while (--m > rnd_digit) + x = x*PyLong_BASE + d[m]; + x = x*PyLong_BASE + (d[m] & (PyLong_BASE-2*lsb)); + + /* decide whether to round up, using round-half-to-even */ + assert(m == rnd_digit); + if (d[m] & lsb) { /* if (rounding bit is set) */ + digit parity_bit; + if (lsb == PyLong_BASE/2) + parity_bit = d[m+1] & 1; + else + parity_bit = d[m] & 2*lsb; + if (parity_bit) + round_up = 1; + else if (d[m] & (lsb-1)) + round_up = 1; + else { + while (--m >= 0) { + if (d[m]) { + round_up = 1; + break; + } + } + } + } + + /* and round up if necessary */ + if (round_up) { + x += 2*lsb; + if (n == DBL_MAX_EXP && + x == ldexp((double)(2*lsb), DBL_MANT_DIG)) { + /* overflow corner case */ + goto overflow; + } + } + + /* shift, adjust for sign, and return */ + x = ldexp(x, rnd_digit*PyLong_SHIFT); + return Py_SIZE(v) < 0 ? -x : x; + + overflow: PyErr_SetString(PyExc_OverflowError, "long int too large to convert to float"); return -1.0; -- cgit v0.12