summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2009-12-27 15:09:50 (GMT)
committerMark Dickinson <dickinsm@gmail.com>2009-12-27 15:09:50 (GMT)
commitcbb62745acc34e7f6ac27a6eb3fb6a5858494d5f (patch)
tree4ecc5eb3a6073089f3668da6654dd2629b4379ac /Objects
parent99b2c8f811a2afcd129ac82625e876c7e3731c04 (diff)
downloadcpython-cbb62745acc34e7f6ac27a6eb3fb6a5858494d5f.zip
cpython-cbb62745acc34e7f6ac27a6eb3fb6a5858494d5f.tar.gz
cpython-cbb62745acc34e7f6ac27a6eb3fb6a5858494d5f.tar.bz2
Merged revisions 77062 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r77062 | mark.dickinson | 2009-12-27 14:55:57 +0000 (Sun, 27 Dec 2009) | 2 lines Issue #1811: Improve accuracy and consistency of true division for integers. ........
Diffstat (limited to 'Objects')
-rw-r--r--Objects/longobject.c280
1 files changed, 250 insertions, 30 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 8e4093c..429b635 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -3213,47 +3213,267 @@ long_div(PyObject *a, PyObject *b)
return (PyObject *)div;
}
+/* PyLong/PyLong -> float, with correctly rounded result. */
+
+#define MANT_DIG_DIGITS (DBL_MANT_DIG / PyLong_SHIFT)
+#define MANT_DIG_BITS (DBL_MANT_DIG % PyLong_SHIFT)
+
static PyObject *
-long_true_divide(PyObject *a, PyObject *b)
+long_true_divide(PyObject *v, PyObject *w)
{
- double ad, bd;
- int failed, aexp = -1, bexp = -1;
+ PyLongObject *a, *b, *x;
+ Py_ssize_t a_size, b_size, shift, extra_bits, diff, x_size, x_bits;
+ digit mask, low;
+ int inexact, negate, a_is_small, b_is_small;
+ double dx, result;
- CHECK_BINOP(a, b);
- ad = _PyLong_AsScaledDouble((PyObject *)a, &aexp);
- bd = _PyLong_AsScaledDouble((PyObject *)b, &bexp);
- failed = (ad == -1.0 || bd == -1.0) && PyErr_Occurred();
- if (failed)
- return NULL;
- /* 'aexp' and 'bexp' were initialized to -1 to silence gcc-4.0.x,
- but should really be set correctly after sucessful calls to
- _PyLong_AsScaledDouble() */
- assert(aexp >= 0 && bexp >= 0);
+ CHECK_BINOP(v, w);
+ a = (PyLongObject *)v;
+ b = (PyLongObject *)w;
+
+ /*
+ Method in a nutshell:
+
+ 0. reduce to case a, b > 0; filter out obvious underflow/overflow
+ 1. choose a suitable integer 'shift'
+ 2. use integer arithmetic to compute x = floor(2**-shift*a/b)
+ 3. adjust x for correct rounding
+ 4. convert x to a double dx with the same value
+ 5. return ldexp(dx, shift).
+
+ In more detail:
+
+ 0. For any a, a/0 raises ZeroDivisionError; for nonzero b, 0/b
+ returns either 0.0 or -0.0, depending on the sign of b. For a and
+ b both nonzero, ignore signs of a and b, and add the sign back in
+ at the end. Now write a_bits and b_bits for the bit lengths of a
+ and b respectively (that is, a_bits = 1 + floor(log_2(a)); likewise
+ for b). Then
+
+ 2**(a_bits - b_bits - 1) < a/b < 2**(a_bits - b_bits + 1).
+
+ So if a_bits - b_bits > DBL_MAX_EXP then a/b > 2**DBL_MAX_EXP and
+ so overflows. Similarly, if a_bits - b_bits < DBL_MIN_EXP -
+ DBL_MANT_DIG - 1 then a/b underflows to 0. With these cases out of
+ the way, we can assume that
+
+ DBL_MIN_EXP - DBL_MANT_DIG - 1 <= a_bits - b_bits <= DBL_MAX_EXP.
+
+ 1. The integer 'shift' is chosen so that x has the right number of
+ bits for a double, plus two or three extra bits that will be used
+ in the rounding decisions. Writing a_bits and b_bits for the
+ number of significant bits in a and b respectively, a
+ straightforward formula for shift is:
+
+ shift = a_bits - b_bits - DBL_MANT_DIG - 2
+
+ This is fine in the usual case, but if a/b is smaller than the
+ smallest normal float then it can lead to double rounding on an
+ IEEE 754 platform, giving incorrectly rounded results. So we
+ adjust the formula slightly. The actual formula used is:
+
+ shift = MAX(a_bits - b_bits, DBL_MIN_EXP) - DBL_MANT_DIG - 2
+
+ 2. The quantity x is computed by first shifting a (left -shift bits
+ if shift <= 0, right shift bits if shift > 0) and then dividing by
+ b. For both the shift and the division, we keep track of whether
+ the result is inexact, in a flag 'inexact'; this information is
+ needed at the rounding stage.
+
+ With the choice of shift above, together with our assumption that
+ a_bits - b_bits >= DBL_MIN_EXP - DBL_MANT_DIG - 1, it follows
+ that x >= 1.
+
+ 3. Now x * 2**shift <= a/b < (x+1) * 2**shift. We want to replace
+ this with an exactly representable float of the form
+
+ round(x/2**extra_bits) * 2**(extra_bits+shift).
+
+ For float representability, we need x/2**extra_bits <
+ 2**DBL_MANT_DIG and extra_bits + shift >= DBL_MIN_EXP -
+ DBL_MANT_DIG. This translates to the condition:
+
+ extra_bits >= MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG
+
+ To round, we just modify the bottom digit of x in-place; this can
+ end up giving a digit with value > PyLONG_MASK, but that's not a
+ problem since digits can hold values up to 2*PyLONG_MASK+1.
+
+ With the original choices for shift above, extra_bits will always
+ be 2 or 3. Then rounding under the round-half-to-even rule, we
+ round up iff the most significant of the extra bits is 1, and
+ either: (a) the computation of x in step 2 had an inexact result,
+ or (b) at least one other of the extra bits is 1, or (c) the least
+ significant bit of x (above those to be rounded) is 1.
+
+ 4. Conversion to a double is straightforward; all floating-point
+ operations involved in the conversion are exact, so there's no
+ danger of rounding errors.
+
+ 5. Use ldexp(x, shift) to compute x*2**shift, the final result.
+ The result will always be exactly representable as a double, except
+ in the case that it overflows. To avoid dependence on the exact
+ behaviour of ldexp on overflow, we check for overflow before
+ applying ldexp. The result of ldexp is adjusted for sign before
+ returning.
+ */
- if (bd == 0.0) {
+ /* Reduce to case where a and b are both positive. */
+ a_size = ABS(Py_SIZE(a));
+ b_size = ABS(Py_SIZE(b));
+ negate = (Py_SIZE(a) < 0) ^ (Py_SIZE(b) < 0);
+ if (b_size == 0) {
PyErr_SetString(PyExc_ZeroDivisionError,
- "integer division or modulo by zero");
- return NULL;
+ "division by zero");
+ goto error;
}
-
- /* True value is very close to ad/bd * 2**(PyLong_SHIFT*(aexp-bexp)) */
- ad /= bd; /* overflow/underflow impossible here */
- aexp -= bexp;
- if (aexp > INT_MAX / PyLong_SHIFT)
+ if (a_size == 0)
+ goto underflow_or_zero;
+
+ /* Fast path for a and b small (exactly representable in a double).
+ Relies on floating-point division being correctly rounded; results
+ may be subject to double rounding on x86 machines that operate with
+ the x87 FPU set to 64-bit precision. */
+ a_is_small = a_size <= MANT_DIG_DIGITS ||
+ (a_size == MANT_DIG_DIGITS+1 &&
+ a->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0);
+ b_is_small = b_size <= MANT_DIG_DIGITS ||
+ (b_size == MANT_DIG_DIGITS+1 &&
+ b->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0);
+ if (a_is_small && b_is_small) {
+ double da, db;
+ da = a->ob_digit[--a_size];
+ while (a_size > 0)
+ da = da * PyLong_BASE + a->ob_digit[--a_size];
+ db = b->ob_digit[--b_size];
+ while (b_size > 0)
+ db = db * PyLong_BASE + b->ob_digit[--b_size];
+ result = da / db;
+ goto success;
+ }
+
+ /* Catch obvious cases of underflow and overflow */
+ diff = a_size - b_size;
+ if (diff > PY_SSIZE_T_MAX/PyLong_SHIFT - 1)
+ /* Extreme overflow */
goto overflow;
- else if (aexp < -(INT_MAX / PyLong_SHIFT))
- return PyFloat_FromDouble(0.0); /* underflow to 0 */
- errno = 0;
- ad = ldexp(ad, aexp * PyLong_SHIFT);
- if (Py_OVERFLOWED(ad)) /* ignore underflow to 0.0 */
+ else if (diff < 1 - PY_SSIZE_T_MAX/PyLong_SHIFT)
+ /* Extreme underflow */
+ goto underflow_or_zero;
+ /* Next line is now safe from overflowing a Py_ssize_t */
+ diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) -
+ bits_in_digit(b->ob_digit[b_size - 1]);
+ /* Now diff = a_bits - b_bits. */
+ if (diff > DBL_MAX_EXP)
goto overflow;
- return PyFloat_FromDouble(ad);
+ else if (diff < DBL_MIN_EXP - DBL_MANT_DIG - 1)
+ goto underflow_or_zero;
+
+ /* Choose value for shift; see comments for step 1 above. */
+ shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2;
+
+ inexact = 0;
+
+ /* x = abs(a * 2**-shift) */
+ if (shift <= 0) {
+ Py_ssize_t i, shift_digits = -shift / PyLong_SHIFT;
+ digit rem;
+ /* x = a << -shift */
+ if (a_size >= PY_SSIZE_T_MAX - 1 - shift_digits) {
+ /* In practice, it's probably impossible to end up
+ here. Both a and b would have to be enormous,
+ using close to SIZE_T_MAX bytes of memory each. */
+ PyErr_SetString(PyExc_OverflowError,
+ "intermediate overflow during division");
+ goto error;
+ }
+ x = _PyLong_New(a_size + shift_digits + 1);
+ if (x == NULL)
+ goto error;
+ for (i = 0; i < shift_digits; i++)
+ x->ob_digit[i] = 0;
+ rem = v_lshift(x->ob_digit + shift_digits, a->ob_digit,
+ a_size, -shift % PyLong_SHIFT);
+ x->ob_digit[a_size + shift_digits] = rem;
+ }
+ else {
+ Py_ssize_t shift_digits = shift / PyLong_SHIFT;
+ digit rem;
+ /* x = a >> shift */
+ assert(a_size >= shift_digits);
+ x = _PyLong_New(a_size - shift_digits);
+ if (x == NULL)
+ goto error;
+ rem = v_rshift(x->ob_digit, a->ob_digit + shift_digits,
+ a_size - shift_digits, shift % PyLong_SHIFT);
+ /* set inexact if any of the bits shifted out is nonzero */
+ if (rem)
+ inexact = 1;
+ while (!inexact && shift_digits > 0)
+ if (a->ob_digit[--shift_digits])
+ inexact = 1;
+ }
+ long_normalize(x);
+ x_size = Py_SIZE(x);
+
+ /* x //= b. If the remainder is nonzero, set inexact. We own the only
+ reference to x, so it's safe to modify it in-place. */
+ if (b_size == 1) {
+ digit rem = inplace_divrem1(x->ob_digit, x->ob_digit, x_size,
+ b->ob_digit[0]);
+ long_normalize(x);
+ if (rem)
+ inexact = 1;
+ }
+ else {
+ PyLongObject *div, *rem;
+ div = x_divrem(x, b, &rem);
+ Py_DECREF(x);
+ x = div;
+ if (x == NULL)
+ goto error;
+ if (Py_SIZE(rem))
+ inexact = 1;
+ Py_DECREF(rem);
+ }
+ x_size = ABS(Py_SIZE(x));
+ assert(x_size > 0); /* result of division is never zero */
+ x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]);
+
+ /* The number of extra bits that have to be rounded away. */
+ extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
+ assert(extra_bits == 2 || extra_bits == 3);
+
+ /* Round by directly modifying the low digit of x. */
+ mask = (digit)1 << (extra_bits - 1);
+ low = x->ob_digit[0] | inexact;
+ if (low & mask && low & (3*mask-1))
+ low += mask;
+ x->ob_digit[0] = low & ~(mask-1U);
+
+ /* Convert x to a double dx; the conversion is exact. */
+ dx = x->ob_digit[--x_size];
+ while (x_size > 0)
+ dx = dx * PyLong_BASE + x->ob_digit[--x_size];
+ Py_DECREF(x);
-overflow:
+ /* Check whether ldexp result will overflow a double. */
+ if (shift + x_bits >= DBL_MAX_EXP &&
+ (shift + x_bits > DBL_MAX_EXP || dx == ldexp(1.0, x_bits)))
+ goto overflow;
+ result = ldexp(dx, shift);
+
+ success:
+ return PyFloat_FromDouble(negate ? -result : result);
+
+ underflow_or_zero:
+ return PyFloat_FromDouble(negate ? -0.0 : 0.0);
+
+ overflow:
PyErr_SetString(PyExc_OverflowError,
- "int/int too large for a float");
+ "integer division result too large for a float");
+ error:
return NULL;
-
}
static PyObject *