From fff3c28052e6b0750d6218e00acacd2fded4991a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 15 Aug 2020 19:38:19 -0700 Subject: bpo-41513: Improve speed and accuracy of math.hypot() (GH-21803) --- Lib/test/test_math.py | 7 ++-- .../2020-08-09-18-16-05.bpo-41513.e6K6EK.rst | 2 ++ Modules/mathmodule.c | 41 ++++++++++++++++++++-- 3 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index e06b1e6..4d62eb1 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -795,7 +795,8 @@ class MathTests(unittest.TestCase): # Verify scaling for extremely large values fourthmax = FLOAT_MAX / 4.0 for n in range(32): - self.assertEqual(hypot(*([fourthmax]*n)), fourthmax * math.sqrt(n)) + self.assertTrue(math.isclose(hypot(*([fourthmax]*n)), + fourthmax * math.sqrt(n))) # Verify scaling for extremely small values for exp in range(32): @@ -904,8 +905,8 @@ class MathTests(unittest.TestCase): for n in range(32): p = (fourthmax,) * n q = (0.0,) * n - self.assertEqual(dist(p, q), fourthmax * math.sqrt(n)) - self.assertEqual(dist(q, p), fourthmax * math.sqrt(n)) + self.assertTrue(math.isclose(dist(p, q), fourthmax * math.sqrt(n))) + self.assertTrue(math.isclose(dist(q, p), fourthmax * math.sqrt(n))) # Verify scaling for extremely small values for exp in range(32): diff --git a/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst b/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst new file mode 100644 index 0000000..cfb9f98 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst @@ -0,0 +1,2 @@ +Minor algorithmic improvement to math.hypot() and math.dist() giving small +gains in speed and accuracy. diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 411c6eb..489802c 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2406,6 +2406,13 @@ math_fmod_impl(PyObject *module, double x, double y) /* Given an *n* length *vec* of values and a value *max*, compute: + sqrt(sum((x * scale) ** 2 for x in vec)) / scale + + where scale is the first power of two + greater than max. + +or compute: + max * sqrt(sum((x / max) ** 2 for x in vec)) The value of the *max* variable must be non-negative and @@ -2425,19 +2432,25 @@ The *csum* variable tracks the cumulative sum and *frac* tracks the cumulative fractional errors at each step. Since this variant assumes that |csum| >= |x| at each step, we establish the precondition by starting the accumulation from 1.0 which -represents the largest possible value of (x/max)**2. +represents the largest possible value of (x*scale)**2 or (x/max)**2. After the loop is finished, the initial 1.0 is subtracted out for a net zero effect on the final sum. Since *csum* will be greater than 1.0, the subtraction of 1.0 will not cause fractional digits to be dropped from *csum*. +To get the full benefit from compensated summation, the +largest addend should be in the range: 0.5 <= x <= 1.0. +Accordingly, scaling or division by *max* should not be skipped +even if not otherwise needed to prevent overflow or loss of precision. + */ static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { - double x, csum = 1.0, oldcsum, frac = 0.0; + double x, csum = 1.0, oldcsum, frac = 0.0, scale; + int max_e; Py_ssize_t i; if (Py_IS_INFINITY(max)) { @@ -2449,14 +2462,36 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) if (max == 0.0 || n <= 1) { return max; } + frexp(max, &max_e); + if (max_e >= -1023) { + scale = ldexp(1.0, -max_e); + assert(max * scale >= 0.5); + assert(max * scale < 1.0); + for (i=0 ; i < n ; i++) { + x = vec[i]; + assert(Py_IS_FINITE(x) && fabs(x) <= max); + x *= scale; + x = x*x; + assert(x <= 1.0); + assert(csum >= x); + oldcsum = csum; + csum += x; + frac += (oldcsum - csum) + x; + } + return sqrt(csum - 1.0 + frac) / scale; + } + /* When max_e < -1023, ldexp(1.0, -max_e) overflows. + So instead of multiplying by a scale, we just divide by *max*. + */ for (i=0 ; i < n ; i++) { x = vec[i]; assert(Py_IS_FINITE(x) && fabs(x) <= max); x /= max; x = x*x; + assert(x <= 1.0); + assert(csum >= x); oldcsum = csum; csum += x; - assert(csum >= x); frac += (oldcsum - csum) + x; } return max * sqrt(csum - 1.0 + frac); -- cgit v0.12