diff options
| author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2021-05-06 15:26:55 (GMT) |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-05-06 15:26:55 (GMT) |
| commit | 8e3cb61da9981847d5ac846f32f817c8dbfbeef3 (patch) | |
| tree | 9d0c850bf90e85c6650be2a10652dd4a4a3d4cc4 /Lib/statistics.py | |
| parent | f8778f96e8b2864093bc8b283598e82c0dd00133 (diff) | |
| download | cpython-8e3cb61da9981847d5ac846f32f817c8dbfbeef3.zip cpython-8e3cb61da9981847d5ac846f32f817c8dbfbeef3.tar.gz cpython-8e3cb61da9981847d5ac846f32f817c8dbfbeef3.tar.bz2 | |
Eliminate duplicated calculations and unnecessary work for linear regression (GH-25922) (GH-25945)
Diffstat (limited to 'Lib/statistics.py')
| -rw-r--r-- | Lib/statistics.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index edb11c8..db8c581 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -952,11 +952,16 @@ def linear_regression(regressor, dependent_variable, /): raise StatisticsError('linear regression requires that both inputs have same number of data points') if n < 2: raise StatisticsError('linear regression requires at least two data points') + x, y = regressor, dependent_variable + xbar = fsum(x) / n + ybar = fsum(y) / n + sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) + s2x = fsum((xi - xbar) ** 2.0 for xi in x) try: - slope = covariance(regressor, dependent_variable) / variance(regressor) + slope = sxy / s2x except ZeroDivisionError: raise StatisticsError('regressor is constant') - intercept = fmean(dependent_variable) - slope * fmean(regressor) + intercept = ybar - slope * xbar return LinearRegression(intercept=intercept, slope=slope) |
