diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2021-05-25 01:11:12 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-25 01:11:12 (GMT) |
commit | 86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7 (patch) | |
tree | dd1e7141aad2006d9863236dea71617efe297c52 /Lib/statistics.py | |
parent | 1c454eb2e4eb9e08ee94920c0e1ca7c8896371ec (diff) | |
download | cpython-86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7.zip cpython-86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7.tar.gz cpython-86779878dfc0bcb74b4721aba7fd9a84e9cbd5c7.tar.bz2 |
bpo-44151: linear_regression() minor API improvements (GH-26199) (GH-26338)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r-- | Lib/statistics.py | 29 |
1 files changed, 14 insertions, 15 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index c2f8dcd..f164210a 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -94,7 +94,7 @@ for two inputs: >>> correlation(x, y) #doctest: +ELLIPSIS 0.31622776601... >>> linear_regression(x, y) #doctest: -LinearRegression(intercept=1.5, slope=0.1) +LinearRegression(slope=0.1, intercept=1.5) Exceptions @@ -919,18 +919,18 @@ def correlation(x, y, /): raise StatisticsError('at least one of the inputs is constant') -LinearRegression = namedtuple('LinearRegression', ['intercept', 'slope']) +LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept')) -def linear_regression(regressor, dependent_variable, /): +def linear_regression(x, y, /): """Intercept and slope for simple linear regression Return the intercept and slope of simple linear regression parameters estimated using ordinary least squares. Simple linear - regression describes relationship between *regressor* and - *dependent variable* in terms of linear function: + regression describes relationship between *x* and + *y* in terms of linear function: - dependent_variable = intercept + slope * regressor + noise + y = intercept + slope * x + noise where *intercept* and *slope* are the regression parameters that are estimated, and noise represents the variability of the data that was @@ -940,19 +940,18 @@ def linear_regression(regressor, dependent_variable, /): The parameters are returned as a named tuple. - >>> regressor = [1, 2, 3, 4, 5] + >>> x = [1, 2, 3, 4, 5] >>> noise = NormalDist().samples(5, seed=42) - >>> dependent_variable = [2 + 3 * regressor[i] + noise[i] for i in range(5)] - >>> linear_regression(regressor, dependent_variable) #doctest: +ELLIPSIS - LinearRegression(intercept=1.75684970486..., slope=3.09078914170...) + >>> y = [2 + 3 * x[i] + noise[i] for i in range(5)] + >>> linear_regression(x, y) #doctest: +ELLIPSIS + LinearRegression(slope=3.09078914170..., intercept=1.75684970486...) """ - n = len(regressor) - if len(dependent_variable) != n: + n = len(x) + if len(y) != n: raise StatisticsError('linear regression requires that both inputs have same number of data points') if n < 2: raise StatisticsError('linear regression requires at least two data points') - x, y = regressor, dependent_variable xbar = fsum(x) / n ybar = fsum(y) / n sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) @@ -960,9 +959,9 @@ def linear_regression(regressor, dependent_variable, /): try: slope = sxy / s2x except ZeroDivisionError: - raise StatisticsError('regressor is constant') + raise StatisticsError('x is constant') intercept = ybar - slope * xbar - return LinearRegression(intercept=intercept, slope=slope) + return LinearRegression(slope=slope, intercept=intercept) ## Normal Distribution ##################################################### |