5 files changed, 201 insertions, 211 deletions
diff --git a/Doc/library/math.rst b/Doc/library/math.rst
index c8090b5..df4ec1b 100644
--- a/Doc/library/math.rst
+++ b/Doc/library/math.rst
@@ -76,6 +76,42 @@ Number-theoretic and representation functions:
    apart" the internal representation of a float in a portable way.
 
 
+.. function:: fsum(iterable)
+
+   Return an accurate floating point sum of values in the iterable.  Avoids
+   loss of precision by tracking multiple intermediate partial sums.  The
+   algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the
+   typical case where the rounding mode is half-even.
+
+   .. note::
+
+      On platforms where arithmetic results are not correctly rounded,
+      :func:`fsum` may occasionally produce incorrect results; these
+      results should be no less accurate than those from the builtin
+      :func:`sum` function, but nevertheless may have arbitrarily
+      large relative error.
+
+      In particular, this affects some older Intel hardware (for
+      example Pentium and earlier x86 processors) that makes use of
+      'extended precision' floating-point registers with 64 bits of
+      precision instead of the 53 bits of precision provided by a C
+      double.  Arithmetic operations using these registers may be
+      doubly rounded (rounded first to 64 bits, and then rerounded to
+      53 bits), leading to incorrectly rounded results.  To test
+      whether your machine is one of those affected, try the following
+      at a Python prompt::
+
+         >>> 1e16 + 2.9999
+         10000000000000002.0
+
+      Machines subject to the double-rounding problem described above
+      are likely to print ``10000000000000004.0`` instead of
+      ``10000000000000002.0``.
+
+
+   .. versionadded:: 2.6
+
+
 .. function:: isinf(x)
 
    Checks if the float *x* is positive or negative infinite.
@@ -100,12 +136,6 @@ Number-theoretic and representation functions:
    Return the fractional and integer parts of *x*.  Both results carry the sign of
    *x*, and both are floats.
 
-.. function:: sum(iterable)
-
-   Return an accurate floating point sum of values in the iterable.  Avoids
-   loss of precision by tracking multiple intermediate partial sums.  The
-   algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the
-   typical case where the rounding mode is half-even.
 
 .. function:: trunc(x)
 
diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst
index a8d89cb..9959ecd 100644
--- a/Doc/whatsnew/2.6.rst
+++ b/Doc/whatsnew/2.6.rst
@@ -1537,7 +1537,7 @@ Here are all of the changes that Python 2.6 makes to the core Python language.
   * :func:`~math.factorial` computes the factorial of a number.
     (Contributed by Raymond Hettinger; :issue:`2138`.)
 
-  * :func:`~math.sum` adds up the stream of numbers from an iterable,
+  * :func:`~math.fsum` adds up the stream of numbers from an iterable,
     and is careful to avoid loss of precision by calculating partial sums.
     (Contributed by Jean Brouwers, Raymond Hettinger, and Mark Dickinson;
     :issue:`2819`.)
diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py
index 1eafeba..9e646ac 100644
--- a/Lib/test/test_math.py
+++ b/Lib/test/test_math.py
@@ -359,6 +359,102 @@ class MathTests(unittest.TestCase):
         self.assertEquals(math.frexp(NINF)[0], NINF)
         self.assert_(math.isnan(math.frexp(NAN)[0]))
 
+    def testFsum(self):
+        # math.fsum relies on exact rounding for correct operation.
+        # There's a known problem with IA32 floating-point that causes
+        # inexact rounding in some situations, and will cause the
+        # math.fsum tests below to fail; see issue #2937.  On non IEEE
+        # 754 platforms, and on IEEE 754 platforms that exhibit the
+        # problem described in issue #2937, we simply skip the whole
+        # test.
+
+        if not float.__getformat__("double").startswith("IEEE"):
+            return
+
+        # on IEEE 754 compliant machines, both of the expressions
+        # below should round to 10000000000000002.0.
+        if 1e16+2.0 != 1e16+2.9999:
+            return
+
+        # Python version of math.fsum, for comparison.  Uses a
+        # different algorithm based on frexp, ldexp and integer
+        # arithmetic.
+        from sys import float_info
+        mant_dig = float_info.mant_dig
+        etiny = float_info.min_exp - mant_dig
+
+        def msum(iterable):
+            """Full precision summation.  Compute sum(iterable) without any
+            intermediate accumulation of error.  Based on the 'lsum' function
+            at http://code.activestate.com/recipes/393090/
+
+            """
+            tmant, texp = 0, 0
+            for x in iterable:
+                mant, exp = math.frexp(x)
+                mant, exp = int(math.ldexp(mant, mant_dig)), exp - mant_dig
+                if texp > exp:
+                    tmant <<= texp-exp
+                    texp = exp
+                else:
+                    mant <<= exp-texp
+                tmant += mant
+            # Round tmant * 2**texp to a float.  The original recipe
+            # used float(str(tmant)) * 2.0**texp for this, but that's
+            # a little unsafe because str -> float conversion can't be
+            # relied upon to do correct rounding on all platforms.
+            tail = max(len(bin(abs(tmant)))-2 - mant_dig, etiny - texp)
+            if tail > 0:
+                h = 1 << (tail-1)
+                tmant = tmant // (2*h) + bool(tmant & h and tmant & 3*h-1)
+                texp += tail
+            return math.ldexp(tmant, texp)
+
+        test_values = [
+            ([], 0.0),
+            ([0.0], 0.0),
+            ([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100),
+            ([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0),
+            ([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0),
+            ([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0),
+            ([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0),
+            ([1./n for n in range(1, 1001)],
+             float.fromhex('0x1.df11f45f4e61ap+2')),
+            ([(-1.)**n/n for n in range(1, 1001)],
+             float.fromhex('-0x1.62a2af1bd3624p-1')),
+            ([1.7**(i+1)-1.7**i for i in range(1000)] + [-1.7**1000], -1.0),
+            ([1e16, 1., 1e-16], 10000000000000002.0),
+            ([1e16-2., 1.-2.**-53, -(1e16-2.), -(1.-2.**-53)], 0.0),
+            # exercise code for resizing partials array
+            ([2.**n - 2.**(n+50) + 2.**(n+52) for n in range(-1074, 972, 2)] +
+             [-2.**1022],
+             float.fromhex('0x1.5555555555555p+970')),
+            ]
+
+        for i, (vals, expected) in enumerate(test_values):
+            try:
+                actual = math.fsum(vals)
+            except OverflowError:
+                self.fail("test %d failed: got OverflowError, expected %r "
+                          "for math.fsum(%.100r)" % (i, expected, vals))
+            except ValueError:
+                self.fail("test %d failed: got ValueError, expected %r "
+                          "for math.fsum(%.100r)" % (i, expected, vals))
+            self.assertEqual(actual, expected)
+
+        from random import random, gauss, shuffle
+        for j in range(1000):
+            vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10
+            s = 0
+            for i in range(200):
+                v = gauss(0, random()) ** 7 - s
+                s += v
+                vals.append(v)
+            shuffle(vals)
+
+            s = msum(vals)
+            self.assertEqual(msum(vals), math.fsum(vals))
+
     def testHypot(self):
         self.assertRaises(TypeError, math.hypot)
         self.ftest('hypot(0,0)', math.hypot(0,0), 0)
@@ -641,158 +737,6 @@ class MathTests(unittest.TestCase):
         self.assertRaises(ValueError, math.sqrt, NINF)
         self.assert_(math.isnan(math.sqrt(NAN)))
 
-    def testSum(self):
-        # math.sum relies on exact rounding for correct operation.
-        # There's a known problem with IA32 floating-point that causes
-        # inexact rounding in some situations, and will cause the
-        # math.sum tests below to fail; see issue #2937.  On non IEEE
-        # 754 platforms, and on IEEE 754 platforms that exhibit the
-        # problem described in issue #2937, we simply skip the whole
-        # test.
-
-        if not float.__getformat__("double").startswith("IEEE"):
-            return
-
-        # on IEEE 754 compliant machines, both of the expressions
-        # below should round to 10000000000000002.0.
-        if 1e16+2.999 != 1e16+2.9999:
-            return
-
-        # Python version of math.sum algorithm, for comparison
-        def msum(iterable):
-            """Full precision sum of values in iterable.  Returns the value of
-            the sum, rounded to the nearest representable floating-point number
-            using the round-half-to-even rule.
-
-            """
-            # Stage 1: accumulate partials
-            partials = []
-            for x in iterable:
-                i = 0
-                for y in partials:
-                    if abs(x) < abs(y):
-                        x, y = y, x
-                    hi = x + y
-                    lo = y - (hi - x)
-                    if lo:
-                        partials[i] = lo
-                        i += 1
-                    x = hi
-                partials[i:] = [x] if x else []
-
-            # Stage 2: sum partials
-            if not partials:
-                return 0.0
-
-            # sum from the top, stopping as soon as the sum is inexact.
-            total = partials.pop()
-            while partials:
-                x = partials.pop()
-                old_total, total = total, total + x
-                error = x - (total - old_total)
-                if error != 0.0:
-                    # adjust for correct rounding if necessary
-                    if partials and (partials[-1] > 0.0) == (error > 0.0) and \
-                            total + 2*error - total == 2*error:
-                        total += 2*error
-                    break
-            return total
-
-        from sys import float_info
-        maxfloat = float_info.max
-        twopow = 2.**(float_info.max_exp - 1)
-
-        test_values = [
-            ([], 0.0),
-            ([0.0], 0.0),
-            ([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100),
-            ([1e308, 1e308, -1e308], OverflowError),
-            ([-1e308, 1e308, 1e308], 1e308),
-            ([1e308, -1e308, 1e308], 1e308),
-            ([2.0**1023, 2.0**1023, -2.0**1000], OverflowError),
-            ([twopow, twopow, twopow, twopow, -twopow, -twopow, -twopow],
-             OverflowError),
-            ([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0),
-            ([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0),
-            ([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0),
-
-            ([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0),
-            ([2.0**1023-2.0**970, -1.0, 2.0**1023], OverflowError),
-            ([maxfloat, maxfloat*2.**-54], maxfloat),
-            ([maxfloat, maxfloat*2.**-53], OverflowError),
-            ([1./n for n in range(1, 1001)], 7.4854708605503451),
-            ([(-1.)**n/n for n in range(1, 1001)], -0.69264743055982025),
-            ([1.7**(i+1)-1.7**i for i in range(1000)] + [-1.7**1000], -1.0),
-            ([INF, -INF, NAN], ValueError),
-            ([NAN, INF, -INF], ValueError),
-            ([INF, NAN, INF], ValueError),
-
-            ([INF, INF], OverflowError),
-            ([INF, -INF], ValueError),
-            ([-INF, 1e308, 1e308, -INF], OverflowError),
-            ([2.0**1023-2.0**970, 0.0, 2.0**1023], OverflowError),
-            ([2.0**1023-2.0**970, 1.0, 2.0**1023], OverflowError),
-            ([2.0**1023, 2.0**1023], OverflowError),
-            ([2.0**1023, 2.0**1023, -1.0], OverflowError),
-            ([twopow, twopow, twopow, twopow, -twopow, -twopow],
-             OverflowError),
-            ([twopow, twopow, twopow, twopow, -twopow, twopow], OverflowError),
-            ([-twopow, -twopow, -twopow, -twopow], OverflowError),
-
-            ([2.**1023, 2.**1023, -2.**971], OverflowError),
-            ([2.**1023, 2.**1023, -2.**970], OverflowError),
-            ([-2.**970,  2.**1023,  2.**1023, -2.**-1074], OverflowError),
-            ([ 2.**1023, 2.**1023, -2.**970,   2.**-1074], OverflowError),
-            ([-2.**1023,  2.**971, -2.**1023], -maxfloat),
-            ([-2.**1023, -2.**1023, 2.**970],   OverflowError),
-            ([-2.**1023,  -2.**1023,  2.**970,  2.**-1074], OverflowError),
-            ([-2.**-1074, -2.**1023, -2.**1023, 2.**970], OverflowError),
-            ([2.**930, -2.**980, 2.**1023, 2.**1023, twopow, -twopow],
-             OverflowError),
-            ([2.**1023, 2.**1023, -1e307], OverflowError),
-            ([1e16, 1., 1e-16], 10000000000000002.0),
-            ([1e16-2., 1.-2.**-53, -(1e16-2.), -(1.-2.**-53)], 0.0),
-        ]
-
-        for i, (vals, s) in enumerate(test_values):
-            if isinstance(s, type) and issubclass(s, Exception):
-                try:
-                    m = math.sum(vals)
-                except s:
-                    pass
-                else:
-                    self.fail("test %d failed: got %r, expected %r "
-                              "for math.sum(%.100r)" %
-                              (i, m, s.__name__, vals))
-            else:
-                try:
-                    self.assertEqual(math.sum(vals), s)
-                except OverflowError:
-                    self.fail("test %d failed: got OverflowError, expected %r "
-                              "for math.sum(%.100r)" % (i, s, vals))
-                except ValueError:
-                    self.fail("test %d failed: got ValueError, expected %r "
-                              "for math.sum(%.100r)" % (i, s, vals))
-
-                # compare with output of msum above, but only when
-                # result isn't an IEEE special or an exception
-                if not math.isinf(s) and not math.isnan(s):
-                    self.assertEqual(msum(vals), s)
-
-        from random import random, gauss, shuffle
-        for j in range(1000):
-            vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10
-            s = 0
-            for i in range(200):
-                v = gauss(0, random()) ** 7 - s
-                s += v
-                vals.append(v)
-            shuffle(vals)
-
-            s = msum(vals)
-            self.assertEqual(msum(vals), math.sum(vals))
-
-
     def testTan(self):
         self.assertRaises(TypeError, math.tan)
         self.ftest('tan(0)', math.tan(0), 0)
diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py
index da62a4f..14e9fca 100644
--- a/Lib/test/test_random.py
+++ b/Lib/test/test_random.py
@@ -5,7 +5,7 @@ import random
 import time
 import pickle
 import warnings
-from math import log, exp, sqrt, pi, sum as msum
+from math import log, exp, sqrt, pi, fsum as msum
 from test import support
 
 class TestBasicOps(unittest.TestCase):
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index a196834..d23d2ff 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -396,7 +396,7 @@ FUNC1(tanh, tanh, 0,
    Note 4: A similar implementation is in Modules/cmathmodule.c.
    Be sure to update both when making changes.
 
-   Note 5: The signature of math.sum() differs from __builtin__.sum()
+   Note 5: The signature of math.fsum() differs from __builtin__.sum()
    because the start argument doesn't make sense in the context of
    accurate summation.  Since the partials table is collapsed before
    returning a result, sum(seq2, start=sum(seq1)) may not equal the
@@ -407,7 +407,7 @@ FUNC1(tanh, tanh, 0,
 
 /* Extend the partials array p[] by doubling its size. */
 static int                          /* non-zero on error */
-_sum_realloc(double **p_ptr, Py_ssize_t  n,
+_fsum_realloc(double **p_ptr, Py_ssize_t  n,
              double  *ps,    Py_ssize_t *m_ptr)
 {
 	void *v = NULL;
@@ -425,7 +425,7 @@ _sum_realloc(double **p_ptr, Py_ssize_t  n,
 			v = PyMem_Realloc(p, sizeof(double) * m);
 	}
 	if (v == NULL) {        /* size overflow or no memory */
-		PyErr_SetString(PyExc_MemoryError, "math sum partials");
+		PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
 		return 1;
 	}
 	*p_ptr = (double*) v;
@@ -464,18 +464,19 @@ _sum_realloc(double **p_ptr, Py_ssize_t  n,
 */
 
 static PyObject*
-math_sum(PyObject *self, PyObject *seq)
+math_fsum(PyObject *self, PyObject *seq)
 {
 	PyObject *item, *iter, *sum = NULL;
 	Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
 	double x, y, t, ps[NUM_PARTIALS], *p = ps;
+	double xsave, special_sum = 0.0, inf_sum = 0.0;
 	volatile double hi, yr, lo;
 
 	iter = PyObject_GetIter(seq);
 	if (iter == NULL)
 		return NULL;
 
-	PyFPE_START_PROTECT("sum", Py_DECREF(iter); return NULL)
+	PyFPE_START_PROTECT("fsum", Py_DECREF(iter); return NULL)
 
 	for(;;) {           /* for x in iterable */
 		assert(0 <= n && n <= m);
@@ -485,18 +486,19 @@ math_sum(PyObject *self, PyObject *seq)
 		item = PyIter_Next(iter);
 		if (item == NULL) {
 			if (PyErr_Occurred())
-				goto _sum_error;
+				goto _fsum_error;
 			break;
 		}
 		x = PyFloat_AsDouble(item);
 		Py_DECREF(item);
 		if (PyErr_Occurred())
-			goto _sum_error;
+			goto _fsum_error;
 
+		xsave = x;
 		for (i = j = 0; j < n; j++) {       /* for y in partials */
 			y = p[j];
 			if (fabs(x) < fabs(y)) {
-					t = x; x = y; y = t;
+				t = x; x = y; y = t;
 			}
 			hi = x + y;
 			yr = hi - x;
@@ -505,59 +507,73 @@ math_sum(PyObject *self, PyObject *seq)
 				p[i++] = lo;
 			x = hi;
 		}
-		
-		n = i;                              /* ps[i:] = [x] */                   
+
+		n = i;                              /* ps[i:] = [x] */
 		if (x != 0.0) {
-			/* If non-finite, reset partials, effectively
-			   adding subsequent items without roundoff
-			   and yielding correct non-finite results,
-			   provided IEEE 754 rules are observed */
-			if (! Py_IS_FINITE(x))
+			if (! Py_IS_FINITE(x)) {
+				/* a nonfinite x could arise either as
+				   a result of intermediate overflow, or
+				   as a result of a nan or inf in the
+				   summands */
+				if (Py_IS_FINITE(xsave)) {
+					PyErr_SetString(PyExc_OverflowError,
+					      "intermediate overflow in fsum");
+					goto _fsum_error;
+				}
+				if (Py_IS_INFINITY(xsave))
+					inf_sum += xsave;
+				special_sum += xsave;
+				/* reset partials */
 				n = 0;
-			else if (n >= m && _sum_realloc(&p, n, ps, &m))
-				goto _sum_error;
-			p[n++] = x;
+			}
+			else if (n >= m && _fsum_realloc(&p, n, ps, &m))
+				goto _fsum_error;
+			else
+				p[n++] = x;
 		}
 	}
 
+	if (special_sum != 0.0) {
+		if (Py_IS_NAN(inf_sum))
+			PyErr_SetString(PyExc_ValueError,
+					"-inf + inf in fsum");
+		else
+			sum = PyFloat_FromDouble(special_sum);
+		goto _fsum_error;
+	}
+
 	hi = 0.0;
 	if (n > 0) {
 		hi = p[--n];
-		if (Py_IS_FINITE(hi)) {
-			/* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */
-			while (n > 0) {
-				x = hi;
-				y = p[--n];
-				assert(fabs(y) < fabs(x));
-				hi = x + y;
-				yr = hi - x;
-				lo = y - yr;
-				if (lo != 0.0)
-					break;
-			}
-			/* Make half-even rounding work across multiple partials.  Needed 
-			   so that sum([1e-16, 1, 1e16]) will round-up the last digit to 
-			   two instead of down to zero (the 1e-16 makes the 1 slightly 
-			   closer to two).  With a potential 1 ULP rounding error fixed-up,
-			   math.sum() can guarantee commutativity. */
-			if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
-			              (lo > 0.0 && p[n-1] > 0.0))) {
-				y = lo * 2.0;
-				x = hi + y;
-				yr = x - hi;
-				if (y == yr)
-					hi = x;
-			}
+		/* sum_exact(ps, hi) from the top, stop when the sum becomes
+		   inexact. */
+		while (n > 0) {
+			x = hi;
+			y = p[--n];
+			assert(fabs(y) < fabs(x));
+			hi = x + y;
+			yr = hi - x;
+			lo = y - yr;
+			if (lo != 0.0)
+				break;
 		}
-		else {  /* raise exception corresponding to a special value */
-			errno = Py_IS_NAN(hi) ? EDOM : ERANGE;
-			if (is_error(hi))
-				goto _sum_error;
+		/* Make half-even rounding work across multiple partials.
+		   Needed so that sum([1e-16, 1, 1e16]) will round-up the last
+		   digit to two instead of down to zero (the 1e-16 makes the 1
+		   slightly closer to two).  With a potential 1 ULP rounding
+		   error fixed-up, math.fsum() can guarantee commutativity. */
+		if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
+			      (lo > 0.0 && p[n-1] > 0.0))) {
+			y = lo * 2.0;
+			x = hi + y;
+			yr = x - hi;
+			if (y == yr)
+				hi = x;
 		}
 	}
 	sum = PyFloat_FromDouble(hi);
 
-_sum_error:
+_fsum_error:
 	PyFPE_END_PROTECT(hi)
 	Py_DECREF(iter);
 	if (p != ps)
@@ -567,7 +583,7 @@ _sum_error:
 
 #undef NUM_PARTIALS
 
-PyDoc_STRVAR(math_sum_doc,
+PyDoc_STRVAR(math_fsum_doc,
 "sum(iterable)\n\n\
 Return an accurate floating point sum of values in the iterable.\n\
 Assumes IEEE-754 floating point arithmetic.");
@@ -1078,6 +1094,7 @@ static PyMethodDef math_methods[] = {
 	{"floor",	math_floor,	METH_O,		math_floor_doc},
 	{"fmod",	math_fmod,	METH_VARARGS,	math_fmod_doc},
 	{"frexp",	math_frexp,	METH_O,		math_frexp_doc},
+	{"fsum",	math_fsum,	METH_O,		math_fsum_doc},
 	{"hypot",	math_hypot,	METH_VARARGS,	math_hypot_doc},
 	{"isinf",	math_isinf,	METH_O,		math_isinf_doc},
 	{"isnan",	math_isnan,	METH_O,		math_isnan_doc},
@@ -1091,10 +1108,9 @@ static PyMethodDef math_methods[] = {
 	{"sin",		math_sin,	METH_O,		math_sin_doc},
 	{"sinh",	math_sinh,	METH_O,		math_sinh_doc},
 	{"sqrt",	math_sqrt,	METH_O,		math_sqrt_doc},
-	{"sum",		math_sum,	METH_O,		math_sum_doc},
 	{"tan",		math_tan,	METH_O,		math_tan_doc},
 	{"tanh",	math_tanh,	METH_O,		math_tanh_doc},
- 	{"trunc",	math_trunc,	METH_O,		math_trunc_doc},
+	{"trunc",	math_trunc,	METH_O,		math_trunc_doc},
 	{NULL,		NULL}		/* sentinel */
 };