1 files changed, 17 insertions, 36 deletions
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index 2da50bb..481e299 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2832,12 +2832,7 @@ long_add_would_overflow(long a, long b)
 }
 
 /*
-Double and triple length extended precision floating point arithmetic
-based on:
-
-  A Floating-Point Technique for Extending the Available Precision
-  by T. J. Dekker
-  https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
+Double and triple length extended precision algorithms from:
 
   Accurate Sum and Dot Product
   by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
@@ -2848,36 +2843,22 @@ based on:
 
 typedef struct{ double hi; double lo; } DoubleLength;
 
-static inline DoubleLength
-twosum(double a, double b)
+static DoubleLength
+dl_sum(double a, double b)
 {
-    // Rump Algorithm 3.1 Error-free transformation of the sum
+    /* Algorithm 3.1 Error-free transformation of the sum */
     double x = a + b;
     double z = x - a;
     double y = (a - (x - z)) + (b - z);
     return (DoubleLength) {x, y};
 }
 
-static inline DoubleLength
-dl_split(double x) {
-    // Rump Algorithm 3.2 Error-free splitting of a floating point number
-    // Dekker (5.5) and (5.6).
-    double t = x * 134217729.0;  // Veltkamp constant = 2.0 ** 27 + 1
-    double hi = t - (t - x);
-    double lo = x - hi;
-    return (DoubleLength) {hi, lo};
-}
-
-static inline DoubleLength
+static DoubleLength
 dl_mul(double x, double y)
 {
-    // Dekker (5.12) and mul12()
-    DoubleLength xx = dl_split(x);
-    DoubleLength yy = dl_split(y);
-    double p = xx.hi * yy.hi;
-    double q = xx.hi * yy.lo + xx.lo * yy.hi;
-    double z = p + q;
-    double zz = p - z + q + xx.lo * yy.lo;
+    /* Algorithm 3.5. Error-free transformation of a product */
+    double z = x * y;
+    double zz = fma(x, y, -z);
     return (DoubleLength) {z, zz};
 }
 
@@ -2885,21 +2866,21 @@ typedef struct { double hi; double lo; double tiny; } TripleLength;
 
 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
 
-static inline TripleLength
-tl_fma(TripleLength total, double x, double y)
+static TripleLength
+tl_fma(double x, double y, TripleLength total)
 {
-    // Rump Algorithm 5.10 with K=3 and using SumKVert
+    /* Algorithm 5.10 with SumKVert for K=3 */
     DoubleLength pr = dl_mul(x, y);
-    DoubleLength sm = twosum(total.hi, pr.hi);
-    DoubleLength r1 = twosum(total.lo, pr.lo);
-    DoubleLength r2 = twosum(r1.hi, sm.lo);
+    DoubleLength sm = dl_sum(total.hi, pr.hi);
+    DoubleLength r1 = dl_sum(total.lo, pr.lo);
+    DoubleLength r2 = dl_sum(r1.hi, sm.lo);
     return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
 }
 
-static inline double
+static double
 tl_to_d(TripleLength total)
 {
-    DoubleLength last = twosum(total.lo, total.hi);
+    DoubleLength last = dl_sum(total.lo, total.hi);
     return total.tiny + last.lo + last.hi;
 }
 
@@ -3066,7 +3047,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
                 } else {
                     goto finalize_flt_path;
                 }
-                TripleLength new_flt_total = tl_fma(flt_total, flt_p, flt_q);
+                TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
                 if (isfinite(new_flt_total.hi)) {
                     flt_total = new_flt_total;
                     flt_total_in_use = true;