diff options
-rw-r--r-- | Modules/mathmodule.c | 50 |
1 files changed, 31 insertions, 19 deletions
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index dbe49bd..5520ca9 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -322,10 +322,16 @@ FUNC1(tanh, tanh, 0, sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the overflow of the first partial sum. - Note 3: Aggressively optimizing compilers can potentially eliminate the - residual values needed for accurate summation. For instance, the statements - "hi = x + y; lo = y - (hi - x);" could be mis-transformed to - "hi = x + y; lo = 0.0;" which defeats the computation of residuals. + Note 3: The itermediate values lo, yr, and hi are declared volatile so + aggressive compilers won't algebraicly reduce lo to always be exactly 0.0. + Also, the volatile declaration forces the values to be stored in memory as + regular doubles instead of extended long precision (80-bit) values. This + prevents double rounding because any addition or substraction of two doubles + can be resolved exactly into double-sized hi and lo values. As long as the + hi value gets forced into a double before yr and lo are computed, the extra + bits in downstream extended precision operations (x87 for example) will be + exactly zero and therefore can be losslessly stored back into a double, + thereby preventing double rounding. Note 4: A similar implementation is in Modules/cmathmodule.c. Be sure to update both when making changes. @@ -402,7 +408,8 @@ math_sum(PyObject *self, PyObject *seq) { PyObject *item, *iter, *sum = NULL; Py_ssize_t i, j, n = 0, m = NUM_PARTIALS; - double x, y, hi, lo=0.0, ps[NUM_PARTIALS], *p = ps; + double x, y, t, ps[NUM_PARTIALS], *p = ps; + volatile double hi, yr, lo; iter = PyObject_GetIter(seq); if (iter == NULL) @@ -428,10 +435,12 @@ math_sum(PyObject *self, PyObject *seq) for (i = j = 0; j < n; j++) { /* for y in partials */ y = p[j]; + if (fabs(x) < fabs(y)) { + t = x; x = y; y = t; + } hi = x + y; - lo = fabs(x) < fabs(y) - ? x - (hi - y) - : y - (hi - x); + yr = hi - x; + lo = y - yr; if (lo != 0.0) p[i++] = lo; x = hi; @@ -451,38 +460,41 @@ math_sum(PyObject *self, PyObject *seq) } } + hi = 0.0; if (n > 0) { hi = p[--n]; if (Py_IS_FINITE(hi)) { /* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */ while (n > 0) { - x = p[--n]; - y = hi; + x = hi; + y = p[--n]; + assert(fabs(y) < fabs(x)); hi = x + y; - assert(fabs(x) < fabs(y)); - lo = x - (hi - y); + yr = hi - x; + lo = y - yr; if (lo != 0.0) break; } - /* Little dance to allow half-even rounding across multiple partials. - Needed so that sum([1e-16, 1, 1e16]) will round-up to two instead - of down to zero (the 1e-16 makes the 1 slightly closer to two). */ + /* Make half-even rounding work across multiple partials. Needed + so that sum([1e-16, 1, 1e16]) will round-up the last digit to + two instead of down to zero (the 1e-16 makes the 1 slightly + closer to two). With a potential 1 ULP rounding error fixed-up, + math.sum() can guarantee commutativity. */ if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || (lo > 0.0 && p[n-1] > 0.0))) { y = lo * 2.0; x = hi + y; - if (y == (x - hi)) + yr = x - hi; + if (y == yr) hi = x; } } - else { /* raise corresponding error */ + else { /* raise exception corresponding to a special value */ errno = Py_IS_NAN(hi) ? EDOM : ERANGE; if (is_error(hi)) goto _sum_error; } } - else /* default */ - hi = 0.0; sum = PyFloat_FromDouble(hi); _sum_error: |