summaryrefslogtreecommitdiffstats
path: root/Lib/statistics.py
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2021-05-25 01:11:12 (GMT)
committerGitHub <noreply@github.com>2021-05-25 01:11:12 (GMT)
commit86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7 (patch)
treedd1e7141aad2006d9863236dea71617efe297c52 /Lib/statistics.py
parent1c454eb2e4eb9e08ee94920c0e1ca7c8896371ec (diff)
downloadcpython-86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7.zip
cpython-86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7.tar.gz
cpython-86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7.tar.bz2
bpo-44151: linear_regression() minor API improvements (GH-26199) (GH-26338)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r--Lib/statistics.py29
1 files changed, 14 insertions, 15 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index c2f8dcd..f164210a 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -94,7 +94,7 @@ for two inputs:
>>> correlation(x, y) #doctest: +ELLIPSIS
0.31622776601...
>>> linear_regression(x, y) #doctest:
-LinearRegression(intercept=1.5, slope=0.1)
+LinearRegression(slope=0.1, intercept=1.5)
Exceptions
@@ -919,18 +919,18 @@ def correlation(x, y, /):
raise StatisticsError('at least one of the inputs is constant')
-LinearRegression = namedtuple('LinearRegression', ['intercept', 'slope'])
+LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
-def linear_regression(regressor, dependent_variable, /):
+def linear_regression(x, y, /):
"""Intercept and slope for simple linear regression
Return the intercept and slope of simple linear regression
parameters estimated using ordinary least squares. Simple linear
- regression describes relationship between *regressor* and
- *dependent variable* in terms of linear function:
+ regression describes relationship between *x* and
+ *y* in terms of linear function:
- dependent_variable = intercept + slope * regressor + noise
+ y = intercept + slope * x + noise
where *intercept* and *slope* are the regression parameters that are
estimated, and noise represents the variability of the data that was
@@ -940,19 +940,18 @@ def linear_regression(regressor, dependent_variable, /):
The parameters are returned as a named tuple.
- >>> regressor = [1, 2, 3, 4, 5]
+ >>> x = [1, 2, 3, 4, 5]
>>> noise = NormalDist().samples(5, seed=42)
- >>> dependent_variable = [2 + 3 * regressor[i] + noise[i] for i in range(5)]
- >>> linear_regression(regressor, dependent_variable) #doctest: +ELLIPSIS
- LinearRegression(intercept=1.75684970486..., slope=3.09078914170...)
+ >>> y = [2 + 3 * x[i] + noise[i] for i in range(5)]
+ >>> linear_regression(x, y) #doctest: +ELLIPSIS
+ LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
"""
- n = len(regressor)
- if len(dependent_variable) != n:
+ n = len(x)
+ if len(y) != n:
raise StatisticsError('linear regression requires that both inputs have same number of data points')
if n < 2:
raise StatisticsError('linear regression requires at least two data points')
- x, y = regressor, dependent_variable
xbar = fsum(x) / n
ybar = fsum(y) / n
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
@@ -960,9 +959,9 @@ def linear_regression(regressor, dependent_variable, /):
try:
slope = sxy / s2x
except ZeroDivisionError:
- raise StatisticsError('regressor is constant')
+ raise StatisticsError('x is constant')
intercept = ybar - slope * xbar
- return LinearRegression(intercept=intercept, slope=slope)
+ return LinearRegression(slope=slope, intercept=intercept)
## Normal Distribution #####################################################