summaryrefslogtreecommitdiffstats
path: root/Lib/statistics.py
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2021-05-06 14:43:13 (GMT)
committerGitHub <noreply@github.com>2021-05-06 14:43:13 (GMT)
commit55b78ce3c4e23abe4f27bf16d7968f8851532e47 (patch)
tree1a9adefac69771279476b1f5d7e2dd0332b59318 /Lib/statistics.py
parente8525567dd325527e00b3c4ce7c4ce31ff3f1a8c (diff)
downloadcpython-55b78ce3c4e23abe4f27bf16d7968f8851532e47.zip
cpython-55b78ce3c4e23abe4f27bf16d7968f8851532e47.tar.gz
cpython-55b78ce3c4e23abe4f27bf16d7968f8851532e47.tar.bz2
Eliminate duplicated calculations and unnecessary work for linear regression (GH-25922)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r--Lib/statistics.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index edb11c8..db8c581 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -952,11 +952,16 @@ def linear_regression(regressor, dependent_variable, /):
raise StatisticsError('linear regression requires that both inputs have same number of data points')
if n < 2:
raise StatisticsError('linear regression requires at least two data points')
+ x, y = regressor, dependent_variable
+ xbar = fsum(x) / n
+ ybar = fsum(y) / n
+ sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
+ s2x = fsum((xi - xbar) ** 2.0 for xi in x)
try:
- slope = covariance(regressor, dependent_variable) / variance(regressor)
+ slope = sxy / s2x
except ZeroDivisionError:
raise StatisticsError('regressor is constant')
- intercept = fmean(dependent_variable) - slope * fmean(regressor)
+ intercept = ybar - slope * xbar
return LinearRegression(intercept=intercept, slope=slope)