diff options
Diffstat (limited to 'Modules/mathmodule.c')
| -rw-r--r-- | Modules/mathmodule.c | 50 | 
1 files changed, 31 insertions, 19 deletions
| diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index dbe49bd..5520ca9 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -322,10 +322,16 @@ FUNC1(tanh, tanh, 0,     sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the     overflow of the first partial sum. -   Note 3: Aggressively optimizing compilers can potentially eliminate the -   residual values needed for accurate summation. For instance, the statements -   "hi = x + y; lo = y - (hi - x);" could be mis-transformed to -   "hi = x + y; lo = 0.0;" which defeats the computation of residuals. +   Note 3: The itermediate values lo, yr, and hi are declared volatile so +   aggressive compilers won't algebraicly reduce lo to always be exactly 0.0. +   Also, the volatile declaration forces the values to be stored in memory as +   regular doubles instead of extended long precision (80-bit) values.  This +   prevents double rounding because any addition or substraction of two doubles +   can be resolved exactly into double-sized hi and lo values.  As long as the  +   hi value gets forced into a double before yr and lo are computed, the extra +   bits in downstream extended precision operations (x87 for example) will be +   exactly zero and therefore can be losslessly stored back into a double, +   thereby preventing double rounding.     Note 4: A similar implementation is in Modules/cmathmodule.c.     Be sure to update both when making changes. @@ -402,7 +408,8 @@ math_sum(PyObject *self, PyObject *seq)  {  	PyObject *item, *iter, *sum = NULL;  	Py_ssize_t i, j, n = 0, m = NUM_PARTIALS; -	double x, y, hi, lo=0.0, ps[NUM_PARTIALS], *p = ps; +	double x, y, t, ps[NUM_PARTIALS], *p = ps; +	volatile double hi, yr, lo;  	iter = PyObject_GetIter(seq);  	if (iter == NULL) @@ -428,10 +435,12 @@ math_sum(PyObject *self, PyObject *seq)  		for (i = j = 0; j < n; j++) {       /* for y in partials */  			y = p[j]; +			if (fabs(x) < fabs(y)) { +					t = x; x = y; y = t; +			}  			hi = x + y; -			lo = fabs(x) < fabs(y) -			   ? x - (hi - y) -			   : y - (hi - x); +			yr = hi - x; +			lo = y - yr;  			if (lo != 0.0)  				p[i++] = lo;  			x = hi; @@ -451,38 +460,41 @@ math_sum(PyObject *self, PyObject *seq)  		}  	} +	hi = 0.0;  	if (n > 0) {  		hi = p[--n];  		if (Py_IS_FINITE(hi)) {  			/* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */  			while (n > 0) { -				x = p[--n]; -				y = hi; +				x = hi; +				y = p[--n]; +				assert(fabs(y) < fabs(x));  				hi = x + y; -				assert(fabs(x) < fabs(y)); -				lo = x - (hi - y); +				yr = hi - x; +				lo = y - yr;  				if (lo != 0.0)  					break;  			} -			/* Little dance to allow half-even rounding across multiple partials. -                           Needed so that sum([1e-16, 1, 1e16]) will round-up to two instead -                           of down to zero (the 1e-16 makes the 1 slightly closer to two). */ +			/* Make half-even rounding work across multiple partials.  Needed  +			   so that sum([1e-16, 1, 1e16]) will round-up the last digit to  +			   two instead of down to zero (the 1e-16 makes the 1 slightly  +			   closer to two).  With a potential 1 ULP rounding error fixed-up, +			   math.sum() can guarantee commutativity. */  			if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||  			              (lo > 0.0 && p[n-1] > 0.0))) {  				y = lo * 2.0;  				x = hi + y; -				if (y == (x - hi)) +				yr = x - hi; +				if (y == yr)  					hi = x;  			}  		} -		else {  /* raise corresponding error */ +		else {  /* raise exception corresponding to a special value */  			errno = Py_IS_NAN(hi) ? EDOM : ERANGE;  			if (is_error(hi))  				goto _sum_error;  		}  	} -	else  /* default */ -		hi = 0.0;  	sum = PyFloat_FromDouble(hi);  _sum_error: | 
