diff options
author | Zack Kneupper <zachary.kneupper@gmail.com> | 2021-05-25 00:30:58 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-25 00:30:58 (GMT) |
commit | 2f3a87856c7033227577b9ed0c77ed75311430b7 (patch) | |
tree | b3c06f5f87a54c4e4464426c7ba6af8071e2d4f3 /Lib/statistics.py | |
parent | 8450e8a81f6d54f45e1fc5c13a03878c9978750d (diff) | |
download | cpython-2f3a87856c7033227577b9ed0c77ed75311430b7.zip cpython-2f3a87856c7033227577b9ed0c77ed75311430b7.tar.gz cpython-2f3a87856c7033227577b9ed0c77ed75311430b7.tar.bz2 |
bpo-44151: linear_regression() minor API improvements (GH-26199)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r-- | Lib/statistics.py | 29 |
1 files changed, 14 insertions, 15 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index bd3813c..c505a05 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -94,7 +94,7 @@ for two inputs: >>> correlation(x, y) #doctest: +ELLIPSIS 0.31622776601... >>> linear_regression(x, y) #doctest: -LinearRegression(intercept=1.5, slope=0.1) +LinearRegression(slope=0.1, intercept=1.5) Exceptions @@ -932,18 +932,18 @@ def correlation(x, y, /): raise StatisticsError('at least one of the inputs is constant') -LinearRegression = namedtuple('LinearRegression', ['intercept', 'slope']) +LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept')) -def linear_regression(regressor, dependent_variable, /): +def linear_regression(x, y, /): """Intercept and slope for simple linear regression Return the intercept and slope of simple linear regression parameters estimated using ordinary least squares. Simple linear - regression describes relationship between *regressor* and - *dependent variable* in terms of linear function: + regression describes relationship between *x* and + *y* in terms of linear function: - dependent_variable = intercept + slope * regressor + noise + y = intercept + slope * x + noise where *intercept* and *slope* are the regression parameters that are estimated, and noise represents the variability of the data that was @@ -953,19 +953,18 @@ def linear_regression(regressor, dependent_variable, /): The parameters are returned as a named tuple. - >>> regressor = [1, 2, 3, 4, 5] + >>> x = [1, 2, 3, 4, 5] >>> noise = NormalDist().samples(5, seed=42) - >>> dependent_variable = [2 + 3 * regressor[i] + noise[i] for i in range(5)] - >>> linear_regression(regressor, dependent_variable) #doctest: +ELLIPSIS - LinearRegression(intercept=1.75684970486..., slope=3.09078914170...) + >>> y = [2 + 3 * x[i] + noise[i] for i in range(5)] + >>> linear_regression(x, y) #doctest: +ELLIPSIS + LinearRegression(slope=3.09078914170..., intercept=1.75684970486...) """ - n = len(regressor) - if len(dependent_variable) != n: + n = len(x) + if len(y) != n: raise StatisticsError('linear regression requires that both inputs have same number of data points') if n < 2: raise StatisticsError('linear regression requires at least two data points') - x, y = regressor, dependent_variable xbar = fsum(x) / n ybar = fsum(y) / n sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) @@ -973,9 +972,9 @@ def linear_regression(regressor, dependent_variable, /): try: slope = sxy / s2x # equivalent to: covariance(x, y) / variance(x) except ZeroDivisionError: - raise StatisticsError('regressor is constant') + raise StatisticsError('x is constant') intercept = ybar - slope * xbar - return LinearRegression(intercept=intercept, slope=slope) + return LinearRegression(slope=slope, intercept=intercept) ## Normal Distribution ##################################################### |