summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2023-03-17 19:06:52 (GMT)
committerGitHub <noreply@github.com>2023-03-17 19:06:52 (GMT)
commit72186aa637bc88cd5f5e234803af64acab25994c (patch)
treed5fc84040009ef2fb390d285d792fd81fd2ffd4b
parent174c4bfd0fee4622657a604af7a2e7d20a3f0dbc (diff)
downloadcpython-72186aa637bc88cd5f5e234803af64acab25994c.zip
cpython-72186aa637bc88cd5f5e234803af64acab25994c.tar.gz
cpython-72186aa637bc88cd5f5e234803af64acab25994c.tar.bz2
Simplify and improve accuracy for subnormals in hypot() (GH-102785)
-rw-r--r--Modules/mathmodule.c63
1 files changed, 28 insertions, 35 deletions
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index ae9e321..48cd9a6 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2498,7 +2498,7 @@ References:
static inline double
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{
- double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
+ double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
DoubleLength pr, sm;
int max_e;
Py_ssize_t i;
@@ -2513,49 +2513,42 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
return max;
}
frexp(max, &max_e);
- if (max_e >= -1023) {
- scale = ldexp(1.0, -max_e);
- assert(max * scale >= 0.5);
- assert(max * scale < 1.0);
+ if (max_e < -1023) {
+ /* When max_e < -1023, ldexp(1.0, -max_e) would overflow.
+ So we first perform lossless scaling from subnormals back to normals,
+ then recurse back to vector_norm(), and then finally undo the scaling.
+ */
for (i=0 ; i < n ; i++) {
- x = vec[i];
- assert(Py_IS_FINITE(x) && fabs(x) <= max);
+ vec[i] /= DBL_MIN;
+ }
+ return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
+ }
+ scale = ldexp(1.0, -max_e);
+ assert(max * scale >= 0.5);
+ assert(max * scale < 1.0);
+ for (i=0 ; i < n ; i++) {
+ x = vec[i];
+ assert(Py_IS_FINITE(x) && fabs(x) <= max);
- x *= scale;
- assert(fabs(x) < 1.0);
+ x *= scale;
+ assert(fabs(x) < 1.0);
- pr = dl_mul(x, x);
- assert(pr.hi <= 1.0);
+ pr = dl_mul(x, x);
+ assert(pr.hi <= 1.0);
- sm = dl_fast_sum(csum, pr.hi);
- csum = sm.hi;
- frac1 += pr.lo;
- frac2 += sm.lo;
- }
- h = sqrt(csum - 1.0 + (frac1 + frac2));
- pr = dl_mul(-h, h);
sm = dl_fast_sum(csum, pr.hi);
csum = sm.hi;
frac1 += pr.lo;
frac2 += sm.lo;
- x = csum - 1.0 + (frac1 + frac2);
- return (h + x / (2.0 * h)) / scale;
}
- /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
- So instead of multiplying by a scale, we just divide by *max*.
- */
- for (i=0 ; i < n ; i++) {
- x = vec[i];
- assert(Py_IS_FINITE(x) && fabs(x) <= max);
- x /= max;
- x = x*x;
- assert(x <= 1.0);
- assert(fabs(csum) >= fabs(x));
- oldcsum = csum;
- csum += x;
- frac1 += (oldcsum - csum) + x;
- }
- return max * sqrt(csum - 1.0 + frac1);
+ h = sqrt(csum - 1.0 + (frac1 + frac2));
+ pr = dl_mul(-h, h);
+ sm = dl_fast_sum(csum, pr.hi);
+ csum = sm.hi;
+ frac1 += pr.lo;
+ frac2 += sm.lo;
+ x = csum - 1.0 + (frac1 + frac2);
+ return (h + x / (2.0 * h)) / scale;
}
#define NUM_STACK_ELEMS 16