summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/statistics.py26
-rw-r--r--Lib/test/test_statistics.py12
-rw-r--r--Misc/NEWS.d/next/Library/2023-03-13-18-27-00.gh-issue-102670.GyoThv.rst2
3 files changed, 27 insertions, 13 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 7d5d750..6bd214b 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1036,7 +1036,7 @@ def covariance(x, y, /):
raise StatisticsError('covariance requires at least two data points')
xbar = fsum(x) / n
ybar = fsum(y) / n
- sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
+ sxy = sumprod((xi - xbar for xi in x), (yi - ybar for yi in y))
return sxy / (n - 1)
@@ -1074,11 +1074,14 @@ def correlation(x, y, /, *, method='linear'):
start = (n - 1) / -2 # Center rankings around zero
x = _rank(x, start=start)
y = _rank(y, start=start)
- xbar = fsum(x) / n
- ybar = fsum(y) / n
- sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
- sxx = fsum((d := xi - xbar) * d for xi in x)
- syy = fsum((d := yi - ybar) * d for yi in y)
+ else:
+ xbar = fsum(x) / n
+ ybar = fsum(y) / n
+ x = [xi - xbar for xi in x]
+ y = [yi - ybar for yi in y]
+ sxy = sumprod(x, y)
+ sxx = sumprod(x, x)
+ syy = sumprod(y, y)
try:
return sxy / sqrt(sxx * syy)
except ZeroDivisionError:
@@ -1131,14 +1134,13 @@ def linear_regression(x, y, /, *, proportional=False):
raise StatisticsError('linear regression requires that both inputs have same number of data points')
if n < 2:
raise StatisticsError('linear regression requires at least two data points')
- if proportional:
- sxy = fsum(xi * yi for xi, yi in zip(x, y))
- sxx = fsum(xi * xi for xi in x)
- else:
+ if not proportional:
xbar = fsum(x) / n
ybar = fsum(y) / n
- sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
- sxx = fsum((d := xi - xbar) * d for xi in x)
+ x = [xi - xbar for xi in x] # List because used three times below
+ y = (yi - ybar for yi in y) # Generator because only used once below
+ sxy = sumprod(x, y) + 0.0 # Add zero to coerce result to a float
+ sxx = sumprod(x, x)
try:
slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x)
except ZeroDivisionError:
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 31a3cb6..f0fa645 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -1,4 +1,4 @@
-"""Test suite for statistics module, including helper NumericTestCase and
+x = """Test suite for statistics module, including helper NumericTestCase and
approx_equal function.
"""
@@ -2610,6 +2610,16 @@ class TestLinearRegression(unittest.TestCase):
self.assertAlmostEqual(slope, 20 + 1/150)
self.assertEqual(intercept, 0.0)
+ def test_float_output(self):
+ x = [Fraction(2, 3), Fraction(3, 4)]
+ y = [Fraction(4, 5), Fraction(5, 6)]
+ slope, intercept = statistics.linear_regression(x, y)
+ self.assertTrue(isinstance(slope, float))
+ self.assertTrue(isinstance(intercept, float))
+ slope, intercept = statistics.linear_regression(x, y, proportional=True)
+ self.assertTrue(isinstance(slope, float))
+ self.assertTrue(isinstance(intercept, float))
+
class TestNormalDist:
# General note on precision: The pdf(), cdf(), and overlap() methods
diff --git a/Misc/NEWS.d/next/Library/2023-03-13-18-27-00.gh-issue-102670.GyoThv.rst b/Misc/NEWS.d/next/Library/2023-03-13-18-27-00.gh-issue-102670.GyoThv.rst
new file mode 100644
index 0000000..3de09f8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-03-13-18-27-00.gh-issue-102670.GyoThv.rst
@@ -0,0 +1,2 @@
+Optimized fmean(), correlation(), covariance(), and linear_regression()
+using the new math.sumprod() function.