summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2023-01-08 03:37:08 (GMT)
committerGitHub <noreply@github.com>2023-01-08 03:37:08 (GMT)
commitdf3851fe4ab8c9013aeb5dfd75d6e8edf2ece9a8 (patch)
treee5b68fa27d9f57bdff12549c357777df50fa7a31 /Modules
parent951303fd855838d47765dcd05471e14311dc9fdd (diff)
downloadcpython-df3851fe4ab8c9013aeb5dfd75d6e8edf2ece9a8.zip
cpython-df3851fe4ab8c9013aeb5dfd75d6e8edf2ece9a8.tar.gz
cpython-df3851fe4ab8c9013aeb5dfd75d6e8edf2ece9a8.tar.bz2
GH-100485: Convert from Fast2Sum to 2Sum (GH-100836)
Diffstat (limited to 'Modules')
-rw-r--r--Modules/mathmodule.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index 0bcb336..9545ad2 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2847,7 +2847,7 @@ based on ideas from three sources:
The double length routines allow for quite a bit of instruction
level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental
-cost of increasing the input vector size by one is 6.25 nsec.
+cost of increasing the input vector size by one is 6.0 nsec.
dl_zero() returns an extended precision zero
dl_split() exactly splits a double into two half precision components.
@@ -2860,22 +2860,25 @@ dl_to_d() converts from extended precision to double precision.
typedef struct{ double hi; double lo; } DoubleLength;
+static const DoubleLength dl_zero = {0.0, 0.0};
+
static inline DoubleLength
-dl_zero()
+twosum(double a, double b)
{
- return (DoubleLength) {0.0, 0.0};
+ double s = a + b;
+ double ap = s - b;
+ double bp = s - a;
+ double da = a - ap;
+ double db = b - bp;
+ double t = da + db;
+ return (DoubleLength) {s, t};
}
+
static inline DoubleLength
dl_add(DoubleLength total, double x)
{
- double s = total.hi + x;
- double c = total.lo;
- if (fabs(total.hi) >= fabs(x)) {
- c += (total.hi - s) + x;
- } else {
- c += (x - s) + total.hi;
- }
- return (DoubleLength) {s, c};
+ DoubleLength s = twosum(total.hi, x);
+ return (DoubleLength) {s.hi, total.lo + s.lo};
}
static inline DoubleLength
@@ -2941,7 +2944,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
bool int_path_enabled = true, int_total_in_use = false;
bool flt_path_enabled = true, flt_total_in_use = false;
long int_total = 0;
- DoubleLength flt_total = dl_zero();
+ DoubleLength flt_total = dl_zero;
p_it = PyObject_GetIter(p);
if (p_it == NULL) {
@@ -3101,7 +3104,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
Py_SETREF(total, new_total);
new_total = NULL;
Py_CLEAR(term_i);
- flt_total = dl_zero();
+ flt_total = dl_zero;
flt_total_in_use = false;
}
}