summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2020-08-16 02:38:19 (GMT)
committerGitHub <noreply@github.com>2020-08-16 02:38:19 (GMT)
commitfff3c28052e6b0750d6218e00acacd2fded4991a (patch)
treece64915e0eca743b0f0c448b714f5e316adca18a /Modules
parent39dab24621122338d01c1219bb0acc46ba9c9956 (diff)
downloadcpython-fff3c28052e6b0750d6218e00acacd2fded4991a.zip
cpython-fff3c28052e6b0750d6218e00acacd2fded4991a.tar.gz
cpython-fff3c28052e6b0750d6218e00acacd2fded4991a.tar.bz2
bpo-41513: Improve speed and accuracy of math.hypot() (GH-21803)
Diffstat (limited to 'Modules')
-rw-r--r--Modules/mathmodule.c41
1 files changed, 38 insertions, 3 deletions
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index 411c6eb..489802c 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2406,6 +2406,13 @@ math_fmod_impl(PyObject *module, double x, double y)
/*
Given an *n* length *vec* of values and a value *max*, compute:
+ sqrt(sum((x * scale) ** 2 for x in vec)) / scale
+
+ where scale is the first power of two
+ greater than max.
+
+or compute:
+
max * sqrt(sum((x / max) ** 2 for x in vec))
The value of the *max* variable must be non-negative and
@@ -2425,19 +2432,25 @@ The *csum* variable tracks the cumulative sum and *frac* tracks
the cumulative fractional errors at each step. Since this
variant assumes that |csum| >= |x| at each step, we establish
the precondition by starting the accumulation from 1.0 which
-represents the largest possible value of (x/max)**2.
+represents the largest possible value of (x*scale)**2 or (x/max)**2.
After the loop is finished, the initial 1.0 is subtracted out
for a net zero effect on the final sum. Since *csum* will be
greater than 1.0, the subtraction of 1.0 will not cause
fractional digits to be dropped from *csum*.
+To get the full benefit from compensated summation, the
+largest addend should be in the range: 0.5 <= x <= 1.0.
+Accordingly, scaling or division by *max* should not be skipped
+even if not otherwise needed to prevent overflow or loss of precision.
+
*/
static inline double
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{
- double x, csum = 1.0, oldcsum, frac = 0.0;
+ double x, csum = 1.0, oldcsum, frac = 0.0, scale;
+ int max_e;
Py_ssize_t i;
if (Py_IS_INFINITY(max)) {
@@ -2449,14 +2462,36 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
if (max == 0.0 || n <= 1) {
return max;
}
+ frexp(max, &max_e);
+ if (max_e >= -1023) {
+ scale = ldexp(1.0, -max_e);
+ assert(max * scale >= 0.5);
+ assert(max * scale < 1.0);
+ for (i=0 ; i < n ; i++) {
+ x = vec[i];
+ assert(Py_IS_FINITE(x) && fabs(x) <= max);
+ x *= scale;
+ x = x*x;
+ assert(x <= 1.0);
+ assert(csum >= x);
+ oldcsum = csum;
+ csum += x;
+ frac += (oldcsum - csum) + x;
+ }
+ return sqrt(csum - 1.0 + frac) / scale;
+ }
+ /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
+ So instead of multiplying by a scale, we just divide by *max*.
+ */
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
x /= max;
x = x*x;
+ assert(x <= 1.0);
+ assert(csum >= x);
oldcsum = csum;
csum += x;
- assert(csum >= x);
frac += (oldcsum - csum) + x;
}
return max * sqrt(csum - 1.0 + frac);