From 2b7411df5ca0b6ef714377730fd4d94693f26abd Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Mon, 26 May 2008 17:36:47 +0000 Subject: Merged revisions 63542-63544,63546,63553,63563-63564,63567,63569,63576 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r63542 | mark.dickinson | 2008-05-22 20:35:30 -0500 (Thu, 22 May 2008) | 5 lines Issue #2819: Add math.sum, a function that sums a sequence of floats efficiently but with no intermediate loss of precision. Based on Raymond Hettinger's ASPN recipe. Thanks Jean Brouwers for the patch. ........ r63543 | mark.dickinson | 2008-05-22 21:36:48 -0500 (Thu, 22 May 2008) | 2 lines Add tests for math.sum (Issue #2819) ........ r63544 | mark.dickinson | 2008-05-22 22:30:01 -0500 (Thu, 22 May 2008) | 2 lines Better error reporting in test_math.py ........ r63546 | raymond.hettinger | 2008-05-22 23:32:43 -0500 (Thu, 22 May 2008) | 1 line Tweak the comments and formatting. ........ r63553 | mark.dickinson | 2008-05-23 07:07:36 -0500 (Fri, 23 May 2008) | 3 lines Skip math.sum tests on non IEEE 754 platforms, and on IEEE 754 platforms that exhibit the problem described in issue #2937. ........ r63563 | martin.v.loewis | 2008-05-23 10:18:28 -0500 (Fri, 23 May 2008) | 3 lines Issue #1390: Raise ValueError in toxml when an invalid comment would otherwise be produced. ........ r63564 | raymond.hettinger | 2008-05-23 12:21:44 -0500 (Fri, 23 May 2008) | 1 line Issue 2909: show how to name unpacked fields. ........ r63567 | raymond.hettinger | 2008-05-23 12:34:34 -0500 (Fri, 23 May 2008) | 1 line Fix typo ........ r63569 | martin.v.loewis | 2008-05-23 14:33:13 -0500 (Fri, 23 May 2008) | 3 lines Mention that the leaking of variables from list comprehensions is fixed in 3.0. ........ r63576 | martin.v.loewis | 2008-05-24 04:36:45 -0500 (Sat, 24 May 2008) | 3 lines Don't try to get the window size if it was never set before. Fixes the test failure on Solaris. ........ --- Doc/library/collections.rst | 2 +- Doc/library/struct.rst | 10 +++ Lib/test/test_ioctl.py | 3 - Lib/test/test_math.py | 156 +++++++++++++++++++++++++++++++++++ Lib/test/test_minidom.py | 5 ++ Lib/xml/dom/minidom.py | 2 + Modules/mathmodule.c | 194 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 368 insertions(+), 4 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index a5cffdd..5035ac9 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -113,7 +113,7 @@ Notes on using :class:`Set` and :class:`MutableSet` as a mixin: Since some set operations create new sets, the default mixin methods need a way to create new instances from an iterable. The class constructor is assumed to have a signature in the form ``ClassName(iterable)``. - That assumption is factored-out to a single internal classmethod called + That assumption is factored-out to an internal classmethod called :meth:`_from_iterable` which calls ``cls(iterable)`` to produce a new set. If the :class:`Set` mixin is being used in a class with a different constructor signature, you will need to override :meth:`from_iterable` diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst index a1832a1..282483d 100644 --- a/Doc/library/struct.rst +++ b/Doc/library/struct.rst @@ -216,6 +216,16 @@ end, assuming longs are aligned on 4-byte boundaries. This only works when native size and alignment are in effect; standard size and alignment does not enforce any alignment. +Unpacked fields can be named by assigning them to variables or by wrapping +the result in a named tuple:: + + >>> record = 'raymond \x32\x12\x08\x01\x08' + >>> name, serialnum, school, gradelevel = unpack('<10sHHb', record) + + >>> from collections import namedtuple + >>> Student = namedtuple('Student', 'name serialnum school gradelevel') + >>> Student._make(unpack('<10sHHb', s)) + Student(name='raymond ', serialnum=4658, school=264, gradelevel=8) .. seealso:: diff --git a/Lib/test/test_ioctl.py b/Lib/test/test_ioctl.py index e9c1d0f..537579a 100644 --- a/Lib/test/test_ioctl.py +++ b/Lib/test/test_ioctl.py @@ -52,13 +52,10 @@ class IoctlTests(unittest.TestCase): set_winsz_opcode_maybe_neg, = struct.unpack("i", struct.pack("I", termios.TIOCSWINSZ)) - # We're just testing that these calls do not raise exceptions. - saved_winsz = fcntl.ioctl(mfd, termios.TIOCGWINSZ, "\0"*8) our_winsz = struct.pack("HHHH",80,25,0,0) # test both with a positive and potentially negative ioctl code new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_pos, our_winsz) new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_maybe_neg, our_winsz) - fcntl.ioctl(mfd, set_winsz_opcode_maybe_neg, saved_winsz) finally: os.close(mfd) os.close(sfd) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index f24bdb3..ae29cda 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -626,6 +626,158 @@ class MathTests(unittest.TestCase): self.assertRaises(ValueError, math.sqrt, NINF) self.assert_(math.isnan(math.sqrt(NAN))) + def testSum(self): + # math.sum relies on exact rounding for correct operation. + # There's a known problem with IA32 floating-point that causes + # inexact rounding in some situations, and will cause the + # math.sum tests below to fail; see issue #2937. On non IEEE + # 754 platforms, and on IEEE 754 platforms that exhibit the + # problem described in issue #2937, we simply skip the whole + # test. + + if not float.__getformat__("double").startswith("IEEE"): + return + + # on IEEE 754 compliant machines, both of the expressions + # below should round to 10000000000000002.0. + if 1e16+2.999 != 1e16+2.9999: + return + + # Python version of math.sum algorithm, for comparison + def msum(iterable): + """Full precision sum of values in iterable. Returns the value of + the sum, rounded to the nearest representable floating-point number + using the round-half-to-even rule. + + """ + # Stage 1: accumulate partials + partials = [] + for x in iterable: + i = 0 + for y in partials: + if abs(x) < abs(y): + x, y = y, x + hi = x + y + lo = y - (hi - x) + if lo: + partials[i] = lo + i += 1 + x = hi + partials[i:] = [x] if x else [] + + # Stage 2: sum partials + if not partials: + return 0.0 + + # sum from the top, stopping as soon as the sum is inexact. + total = partials.pop() + while partials: + x = partials.pop() + old_total, total = total, total + x + error = x - (total - old_total) + if error != 0.0: + # adjust for correct rounding if necessary + if partials and (partials[-1] > 0.0) == (error > 0.0) and \ + total + 2*error - total == 2*error: + total += 2*error + break + return total + + from sys import float_info + maxfloat = float_info.max + twopow = 2.**(float_info.max_exp - 1) + + test_values = [ + ([], 0.0), + ([0.0], 0.0), + ([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100), + ([1e308, 1e308, -1e308], OverflowError), + ([-1e308, 1e308, 1e308], 1e308), + ([1e308, -1e308, 1e308], 1e308), + ([2.0**1023, 2.0**1023, -2.0**1000], OverflowError), + ([twopow, twopow, twopow, twopow, -twopow, -twopow, -twopow], + OverflowError), + ([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0), + ([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0), + ([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0), + + ([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0), + ([2.0**1023-2.0**970, -1.0, 2.0**1023], OverflowError), + ([maxfloat, maxfloat*2.**-54], maxfloat), + ([maxfloat, maxfloat*2.**-53], OverflowError), + ([1./n for n in range(1, 1001)], 7.4854708605503451), + ([(-1.)**n/n for n in range(1, 1001)], -0.69264743055982025), + ([1.7**(i+1)-1.7**i for i in range(1000)] + [-1.7**1000], -1.0), + ([INF, -INF, NAN], ValueError), + ([NAN, INF, -INF], ValueError), + ([INF, NAN, INF], ValueError), + + ([INF, INF], OverflowError), + ([INF, -INF], ValueError), + ([-INF, 1e308, 1e308, -INF], OverflowError), + ([2.0**1023-2.0**970, 0.0, 2.0**1023], OverflowError), + ([2.0**1023-2.0**970, 1.0, 2.0**1023], OverflowError), + ([2.0**1023, 2.0**1023], OverflowError), + ([2.0**1023, 2.0**1023, -1.0], OverflowError), + ([twopow, twopow, twopow, twopow, -twopow, -twopow], + OverflowError), + ([twopow, twopow, twopow, twopow, -twopow, twopow], OverflowError), + ([-twopow, -twopow, -twopow, -twopow], OverflowError), + + ([2.**1023, 2.**1023, -2.**971], OverflowError), + ([2.**1023, 2.**1023, -2.**970], OverflowError), + ([-2.**970, 2.**1023, 2.**1023, -2.**-1074], OverflowError), + ([ 2.**1023, 2.**1023, -2.**970, 2.**-1074], OverflowError), + ([-2.**1023, 2.**971, -2.**1023], -maxfloat), + ([-2.**1023, -2.**1023, 2.**970], OverflowError), + ([-2.**1023, -2.**1023, 2.**970, 2.**-1074], OverflowError), + ([-2.**-1074, -2.**1023, -2.**1023, 2.**970], OverflowError), + ([2.**930, -2.**980, 2.**1023, 2.**1023, twopow, -twopow], + OverflowError), + ([2.**1023, 2.**1023, -1e307], OverflowError), + ([1e16, 1., 1e-16], 10000000000000002.0), + ([1e16-2., 1.-2.**53, -(1e16-2.), -(1.-2.**53)], 0.0), + ] + + for i, (vals, s) in enumerate(test_values): + if isinstance(s, type) and issubclass(s, Exception): + try: + m = math.sum(vals) + except s: + pass + else: + self.fail("test %d failed: got %r, expected %r " + "for math.sum(%.100r)" % + (i, m, s.__name__, vals)) + else: + try: + self.assertEqual(math.sum(vals), s) + except OverflowError: + self.fail("test %d failed: got OverflowError, expected %r " + "for math.sum(%.100r)" % (i, s, vals)) + except ValueError: + self.fail("test %d failed: got ValueError, expected %r " + "for math.sum(%.100r)" % (i, s, vals)) + + # compare with output of msum above, but only when + # result isn't an IEEE special or an exception + if not math.isinf(s) and not math.isnan(s): + self.assertEqual(msum(vals), s) + + from random import random, gauss, shuffle + for j in range(1000): + vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10 + s = 0 + for i in range(200): + v = gauss(0, random()) ** 7 - s + s += v + vals.append(v) + shuffle(vals) + + s = msum(vals) + self.assertEqual(msum(vals), math.sum(vals)) + + def testTan(self): self.assertRaises(TypeError, math.tan) self.ftest('tan(0)', math.tan(0), 0) @@ -763,6 +915,10 @@ class MathTests(unittest.TestCase): message = (("Unexpected ValueError: %s\n " + "in test %s:%s(%r)\n") % (exc.args[0], id, fn, ar)) self.fail(message) + except OverflowError: + message = ("Unexpected OverflowError in " + + "test %s:%s(%r)\n" % (id, fn, ar)) + self.fail(message) self.ftest("%s:%s(%r)" % (id, fn, ar), result, er) def test_main(): diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 20130c3..ca1f836 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1314,6 +1314,11 @@ class MinidomTest(unittest.TestCase): for i in range(len(n1.childNodes)): stack.append((n1.childNodes[i], n2.childNodes[i])) + def testSerializeCommentNodeWithDoubleHyphen(self): + doc = create_doc_without_doctype() + doc.appendChild(doc.createComment("foo--bar")) + self.assertRaises(ValueError, doc.toxml) + def test_main(): run_unittest(MinidomTest) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index b6f0a862..f229369 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -1132,6 +1132,8 @@ class Comment(CharacterData): self.data = self.nodeValue = data def writexml(self, writer, indent="", addindent="", newl=""): + if "--" in self.data: + raise ValueError("'--' is not allowed in a comment node") writer.write("%s%s" % (indent, self.data, newl)) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 45d842f..5c5def2 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -362,6 +362,199 @@ FUNC1(tan, tan, 0, FUNC1(tanh, tanh, 0, "tanh(x)\n\nReturn the hyperbolic tangent of x.") +/* Precision summation function as msum() by Raymond Hettinger in + , + enhanced with the exact partials sum and roundoff from Mark + Dickinson's post at . + See those links for more details, proofs and other references. + + Note 1: IEEE 754R floating point semantics are assumed, + but the current implementation does not re-establish special + value semantics across iterations (i.e. handling -Inf + Inf). + + Note 2: No provision is made for intermediate overflow handling; + therefore, sum([1e+308, 1e-308, 1e+308]) returns result 1e+308 while + sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the + overflow of the first partial sum. + + Note 3: Aggressively optimizing compilers can potentially eliminate the + residual values needed for accurate summation. For instance, the statements + "hi = x + y; lo = y - (hi - x);" could be mis-transformed to + "hi = x + y; lo = 0.0;" which defeats the computation of residuals. + + Note 4: A similar implementation is in Modules/cmathmodule.c. + Be sure to update both when making changes. + + Note 5: The signature of math.sum() differs from __builtin__.sum() + because the start argument doesn't make sense in the context of + accurate summation. Since the partials table is collapsed before + returning a result, sum(seq2, start=sum(seq1)) may not equal the + accurate result returned by sum(itertools.chain(seq1, seq2)). +*/ + +#define NUM_PARTIALS 32 /* initial partials array size, on stack */ + +/* Extend the partials array p[] by doubling its size. */ +static int /* non-zero on error */ +_sum_realloc(double **p_ptr, Py_ssize_t n, + double *ps, Py_ssize_t *m_ptr) +{ + void *v = NULL; + Py_ssize_t m = *m_ptr; + + m += m; /* double */ + if (n < m && m < (PY_SSIZE_T_MAX / sizeof(double))) { + double *p = *p_ptr; + if (p == ps) { + v = PyMem_Malloc(sizeof(double) * m); + if (v != NULL) + memcpy(v, ps, sizeof(double) * n); + } + else + v = PyMem_Realloc(p, sizeof(double) * m); + } + if (v == NULL) { /* size overflow or no memory */ + PyErr_SetString(PyExc_MemoryError, "math sum partials"); + return 1; + } + *p_ptr = (double*) v; + *m_ptr = m; + return 0; +} + +/* Full precision summation of a sequence of floats. + + def msum(iterable): + partials = [] # sorted, non-overlapping partial sums + for x in iterable: + i = 0 + for y in partials: + if abs(x) < abs(y): + x, y = y, x + hi = x + y + lo = y - (hi - x) + if lo: + partials[i] = lo + i += 1 + x = hi + partials[i:] = [x] + return sum_exact(partials) + + Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo + are exactly equal to x+y. The inner loop applies hi/lo summation to each + partial so that the list of partial sums remains exact. + + Sum_exact() adds the partial sums exactly and correctly rounds the final + result (using the round-half-to-even rule). The items in partials remain + non-zero, non-special, non-overlapping and strictly increasing in + magnitude, but possibly not all having the same sign. + + Depends on IEEE 754 arithmetic guarantees and half-even rounding. +*/ + +static PyObject* +math_sum(PyObject *self, PyObject *seq) +{ + PyObject *item, *iter, *sum = NULL; + Py_ssize_t i, j, n = 0, m = NUM_PARTIALS; + double x, y, hi, lo=0.0, ps[NUM_PARTIALS], *p = ps; + + iter = PyObject_GetIter(seq); + if (iter == NULL) + return NULL; + + PyFPE_START_PROTECT("sum", Py_DECREF(iter); return NULL) + + for(;;) { /* for x in iterable */ + assert(0 <= n && n <= m); + assert((m == NUM_PARTIALS && p == ps) || + (m > NUM_PARTIALS && p != NULL)); + + item = PyIter_Next(iter); + if (item == NULL) { + if (PyErr_Occurred()) + goto _sum_error; + break; + } + x = PyFloat_AsDouble(item); + Py_DECREF(item); + if (PyErr_Occurred()) + goto _sum_error; + + for (i = j = 0; j < n; j++) { /* for y in partials */ + y = p[j]; + hi = x + y; + lo = fabs(x) < fabs(y) + ? x - (hi - y) + : y - (hi - x); + if (lo != 0.0) + p[i++] = lo; + x = hi; + } + + n = i; /* ps[i:] = [x] */ + if (x != 0.0) { + /* If non-finite, reset partials, effectively + adding subsequent items without roundoff + and yielding correct non-finite results, + provided IEEE 754 rules are observed */ + if (! Py_IS_FINITE(x)) + n = 0; + else if (n >= m && _sum_realloc(&p, n, ps, &m)) + goto _sum_error; + p[n++] = x; + } + } + + if (n > 0) { + hi = p[--n]; + if (Py_IS_FINITE(hi)) { + /* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */ + while (n > 0) { + x = p[--n]; + y = hi; + hi = x + y; + assert(fabs(x) < fabs(y)); + lo = x - (hi - y); + if (lo != 0.0) + break; + } + /* Little dance to allow half-even rounding across multiple partials. + Needed so that sum([1e-16, 1, 1e16]) will round-up to two instead + of down to zero (the 1e16 makes the 1 slightly closer to two). */ + if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || + (lo > 0.0 && p[n-1] > 0.0))) { + y = lo * 2.0; + x = hi + y; + if (y == (x - hi)) + hi = x; + } + } + else { /* raise corresponding error */ + errno = Py_IS_NAN(hi) ? EDOM : ERANGE; + if (is_error(hi)) + goto _sum_error; + } + } + else /* default */ + hi = 0.0; + sum = PyFloat_FromDouble(hi); + +_sum_error: + PyFPE_END_PROTECT(hi) + Py_DECREF(iter); + if (p != ps) + PyMem_Free(p); + return sum; +} + +#undef NUM_PARTIALS + +PyDoc_STRVAR(math_sum_doc, +"sum(iterable)\n\n\ +Return an accurate floating point sum of values in the iterable.\n\ +Assumes IEEE-754 floating point arithmetic."); + static PyObject * math_trunc(PyObject *self, PyObject *number) { @@ -833,6 +1026,7 @@ static PyMethodDef math_methods[] = { {"sin", math_sin, METH_O, math_sin_doc}, {"sinh", math_sinh, METH_O, math_sinh_doc}, {"sqrt", math_sqrt, METH_O, math_sqrt_doc}, + {"sum", math_sum, METH_O, math_sum_doc}, {"tan", math_tan, METH_O, math_tan_doc}, {"tanh", math_tanh, METH_O, math_tanh_doc}, {"trunc", math_trunc, METH_O, math_trunc_doc}, -- cgit v0.12