From 745c0f3980c56e88db6a57311fb20ea11005565b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 31 Aug 2018 11:22:13 -0700 Subject: Simplify vector_norm() by eliminating special cases in the main loop (GH-9006) The *max* value is no longer treated as a special case in the main loop. Besides making the main loop simpler and branchless, this also lets us relax the input restriction of *vec* to contain only non-negative values. --- Modules/mathmodule.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 37934f6..8015a95 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2032,14 +2032,14 @@ math_fmod_impl(PyObject *module, double x, double y) } /* -Given an *n* length *vec* of non-negative values -where *max* is the largest value in the vector, compute: +Given an *n* length *vec* of values and a value *max*, compute: max * sqrt(sum((x / max) ** 2 for x in vec)) -The value of the *max* variable must be present in *vec* -or should equal to 0.0 when n==0. Likewise, *max* will -be INF if an infinity is present in the vec. +The value of the *max* variable must be non-negative and +at least equal to the absolute value of the largest magnitude +entry in the vector. If n==0, then *max* should be 0.0. +If an infinity is present in the vec, *max* should be INF. The *found_nan* variable indicates whether some member of the *vec* is a NaN. @@ -2053,16 +2053,19 @@ The *csum* variable tracks the cumulative sum and *frac* tracks the cumulative fractional errors at each step. Since this variant assumes that |csum| >= |x| at each step, we establish the precondition by starting the accumulation from 1.0 which -represents an entry equal to *max*. This also provides a nice -side benefit in that it lets us skip over a *max* entry (which -is swapped into *last*) saving us one iteration through the loop. +represents the largest possible value of (x/max)**2. + +After the loop is finished, the initial 1.0 is subtracted out +for a net zero effect on the final sum. Since *csum* will be +greater than 1.0, the subtraction of 1.0 will not cause +fractional digits to be dropped from *csum*. */ static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { - double x, csum = 1.0, oldcsum, frac = 0.0, last; + double x, csum = 1.0, oldcsum, frac = 0.0; Py_ssize_t i; if (Py_IS_INFINITY(max)) { @@ -2071,27 +2074,20 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) if (found_nan) { return Py_NAN; } - if (max == 0.0) { - return 0.0; + if (max == 0.0 || n == 1) { + return max; } - assert(n > 0); - last = vec[n-1]; - for (i=0 ; i < n-1 ; i++) { + for (i=0 ; i < n ; i++) { x = vec[i]; - assert(Py_IS_FINITE(x) && x >= 0.0 && x <= max); - if (x == max) { - x = last; - last = max; - } + assert(Py_IS_FINITE(x) && fabs(x) <= max); x /= max; x = x*x; - assert(csum >= x); oldcsum = csum; csum += x; + assert(csum >= x); frac += (oldcsum - csum) + x; } - assert(last == max); - return max * sqrt(csum + frac); + return max * sqrt(csum - 1.0 + frac); } #define NUM_STACK_ELEMS 16 -- cgit v0.12