diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2023-03-17 19:06:52 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-17 19:06:52 (GMT) |
commit | 72186aa637bc88cd5f5e234803af64acab25994c (patch) | |
tree | d5fc84040009ef2fb390d285d792fd81fd2ffd4b | |
parent | 174c4bfd0fee4622657a604af7a2e7d20a3f0dbc (diff) | |
download | cpython-72186aa637bc88cd5f5e234803af64acab25994c.zip cpython-72186aa637bc88cd5f5e234803af64acab25994c.tar.gz cpython-72186aa637bc88cd5f5e234803af64acab25994c.tar.bz2 |
Simplify and improve accuracy for subnormals in hypot() (GH-102785)
-rw-r--r-- | Modules/mathmodule.c | 63 |
1 files changed, 28 insertions, 35 deletions
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ae9e321..48cd9a6 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2498,7 +2498,7 @@ References: static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { - double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0; + double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0; DoubleLength pr, sm; int max_e; Py_ssize_t i; @@ -2513,49 +2513,42 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) return max; } frexp(max, &max_e); - if (max_e >= -1023) { - scale = ldexp(1.0, -max_e); - assert(max * scale >= 0.5); - assert(max * scale < 1.0); + if (max_e < -1023) { + /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. + So we first perform lossless scaling from subnormals back to normals, + then recurse back to vector_norm(), and then finally undo the scaling. + */ for (i=0 ; i < n ; i++) { - x = vec[i]; - assert(Py_IS_FINITE(x) && fabs(x) <= max); + vec[i] /= DBL_MIN; + } + return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan); + } + scale = ldexp(1.0, -max_e); + assert(max * scale >= 0.5); + assert(max * scale < 1.0); + for (i=0 ; i < n ; i++) { + x = vec[i]; + assert(Py_IS_FINITE(x) && fabs(x) <= max); - x *= scale; - assert(fabs(x) < 1.0); + x *= scale; + assert(fabs(x) < 1.0); - pr = dl_mul(x, x); - assert(pr.hi <= 1.0); + pr = dl_mul(x, x); + assert(pr.hi <= 1.0); - sm = dl_fast_sum(csum, pr.hi); - csum = sm.hi; - frac1 += pr.lo; - frac2 += sm.lo; - } - h = sqrt(csum - 1.0 + (frac1 + frac2)); - pr = dl_mul(-h, h); sm = dl_fast_sum(csum, pr.hi); csum = sm.hi; frac1 += pr.lo; frac2 += sm.lo; - x = csum - 1.0 + (frac1 + frac2); - return (h + x / (2.0 * h)) / scale; } - /* When max_e < -1023, ldexp(1.0, -max_e) overflows. - So instead of multiplying by a scale, we just divide by *max*. - */ - for (i=0 ; i < n ; i++) { - x = vec[i]; - assert(Py_IS_FINITE(x) && fabs(x) <= max); - x /= max; - x = x*x; - assert(x <= 1.0); - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac1 += (oldcsum - csum) + x; - } - return max * sqrt(csum - 1.0 + frac1); + h = sqrt(csum - 1.0 + (frac1 + frac2)); + pr = dl_mul(-h, h); + sm = dl_fast_sum(csum, pr.hi); + csum = sm.hi; + frac1 += pr.lo; + frac2 += sm.lo; + x = csum - 1.0 + (frac1 + frac2); + return (h + x / (2.0 * h)) / scale; } #define NUM_STACK_ELEMS 16 |