diff options
Diffstat (limited to 'Modules/mathmodule.c')
-rw-r--r-- | Modules/mathmodule.c | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index e6cdb3b..654336d 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2851,6 +2851,8 @@ dl_sum(double a, double b) return (DoubleLength) {x, y}; } +#ifndef UNRELIABLE_FMA + static DoubleLength dl_mul(double x, double y) { @@ -2860,6 +2862,47 @@ dl_mul(double x, double y) return (DoubleLength) {z, zz}; } +#else + +/* + The default implementation of dl_mul() depends on the C math library + having an accurate fma() function as required by ยง 7.12.13.1 of the + C99 standard. + + The UNRELIABLE_FMA option is provided as a slower but accurate + alternative for builds where the fma() function is found wanting. + The speed penalty may be modest (17% slower on an Apple M1 Max), + so don't hesitate to enable this build option. + + The algorithms are from the T. J. Dekker paper: + A Floating-Point Technique for Extending the Available Precision + https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf +*/ + +static DoubleLength +dl_split(double x) { + // Dekker (5.5) and (5.6). + double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 + double hi = t - (t - x); + double lo = x - hi; + return (DoubleLength) {hi, lo}; +} + +static DoubleLength +dl_mul(double x, double y) +{ + // Dekker (5.12) and mul12() + DoubleLength xx = dl_split(x); + DoubleLength yy = dl_split(y); + double p = xx.hi * yy.hi; + double q = xx.hi * yy.lo + xx.lo * yy.hi; + double z = p + q; + double zz = p - z + q + xx.lo * yy.lo; + return (DoubleLength) {z, zz}; +} + +#endif + typedef struct { double hi; double lo; double tiny; } TripleLength; static const TripleLength tl_zero = {0.0, 0.0, 0.0}; |