summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2023-02-04 23:54:44 (GMT)
committerGitHub <noreply@github.com>2023-02-04 23:54:44 (GMT)
commit5a2b984568f72f0d7ff7c7b4ee8ce31af9fd1b7e (patch)
tree85f0b9aa3fcb80a41332f7adc94f57c41bd70285
parenta89e6713c4de99d4be5a1304b134e57a24ab10ac (diff)
downloadcpython-5a2b984568f72f0d7ff7c7b4ee8ce31af9fd1b7e.zip
cpython-5a2b984568f72f0d7ff7c7b4ee8ce31af9fd1b7e.tar.gz
cpython-5a2b984568f72f0d7ff7c7b4ee8ce31af9fd1b7e.tar.bz2
GH-100485: Create an alternative code path when an accurate fma() implementation is not available (#101567)
-rw-r--r--Lib/test/test_math.py5
-rw-r--r--Modules/mathmodule.c43
2 files changed, 48 insertions, 0 deletions
diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py
index 2c84e55..8d84904 100644
--- a/Lib/test/test_math.py
+++ b/Lib/test/test_math.py
@@ -1450,6 +1450,11 @@ class MathTests(unittest.TestCase):
n = 20 # Length of vectors
c = 1e30 # Target condition number
+ # If the following test fails, it means that the C math library
+ # implementation of fma() is not compliant with the C99 standard
+ # and is inaccurate. To solve this problem, make a new build
+ # with the symbol UNRELIABLE_FMA defined. That will enable a
+ # slower but accurate code path that avoids the fma() call.
relative_err = median(Trial(math.sumprod, c, n) for i in range(times))
self.assertLess(relative_err, 1e-16)
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index e6cdb3b..654336d 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2851,6 +2851,8 @@ dl_sum(double a, double b)
return (DoubleLength) {x, y};
}
+#ifndef UNRELIABLE_FMA
+
static DoubleLength
dl_mul(double x, double y)
{
@@ -2860,6 +2862,47 @@ dl_mul(double x, double y)
return (DoubleLength) {z, zz};
}
+#else
+
+/*
+ The default implementation of dl_mul() depends on the C math library
+ having an accurate fma() function as required by ยง 7.12.13.1 of the
+ C99 standard.
+
+ The UNRELIABLE_FMA option is provided as a slower but accurate
+ alternative for builds where the fma() function is found wanting.
+ The speed penalty may be modest (17% slower on an Apple M1 Max),
+ so don't hesitate to enable this build option.
+
+ The algorithms are from the T. J. Dekker paper:
+ A Floating-Point Technique for Extending the Available Precision
+ https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
+*/
+
+static DoubleLength
+dl_split(double x) {
+ // Dekker (5.5) and (5.6).
+ double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
+ double hi = t - (t - x);
+ double lo = x - hi;
+ return (DoubleLength) {hi, lo};
+}
+
+static DoubleLength
+dl_mul(double x, double y)
+{
+ // Dekker (5.12) and mul12()
+ DoubleLength xx = dl_split(x);
+ DoubleLength yy = dl_split(y);
+ double p = xx.hi * yy.hi;
+ double q = xx.hi * yy.lo + xx.lo * yy.hi;
+ double z = p + q;
+ double zz = p - z + q + xx.lo * yy.lo;
+ return (DoubleLength) {z, zz};
+}
+
+#endif
+
typedef struct { double hi; double lo; double tiny; } TripleLength;
static const TripleLength tl_zero = {0.0, 0.0, 0.0};