diff options
-rw-r--r-- | Doc/library/statistics.rst | 12 | ||||
-rw-r--r-- | Lib/statistics.py | 31 | ||||
-rw-r--r-- | Lib/test/test_statistics.py | 6 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst | 1 |
4 files changed, 42 insertions, 8 deletions
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index bb03a2c..8638abf 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -643,7 +643,7 @@ However, for reading convenience, most of the examples show sorted sequences. .. versionadded:: 3.10 -.. function:: linear_regression(x, y, /) +.. function:: linear_regression(x, y, /, *, proportional=False) Return the slope and intercept of `simple linear regression <https://en.wikipedia.org/wiki/Simple_linear_regression>`_ @@ -677,8 +677,18 @@ However, for reading convenience, most of the examples show sorted sequences. >>> round(slope * 2019 + intercept) 16 + If *proportional* is true, the independent variable *x* and the + dependent variable *y* are assumed to be directly proportional. + The data is fit to a line passing through the origin. + Since the *intercept* will always be 0.0, the underlying linear + function simplifies to: + + *y = slope \* x + noise* + .. versionadded:: 3.10 + .. versionchanged:: 3.11 + Added support for *proportional*. Exceptions ---------- diff --git a/Lib/statistics.py b/Lib/statistics.py index 4f3ab49..5c3f77d 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -937,13 +937,13 @@ def correlation(x, y, /): LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept')) -def linear_regression(x, y, /): +def linear_regression(x, y, /, *, proportional=False): """Slope and intercept for simple linear regression. Return the slope and intercept of simple linear regression parameters estimated using ordinary least squares. Simple linear regression describes relationship between an independent variable - *x* and a dependent variable *y* in terms of linear function: + *x* and a dependent variable *y* in terms of a linear function: y = slope * x + intercept + noise @@ -961,21 +961,38 @@ def linear_regression(x, y, /): >>> linear_regression(x, y) #doctest: +ELLIPSIS LinearRegression(slope=3.09078914170..., intercept=1.75684970486...) + If *proportional* is true, the independent variable *x* and the + dependent variable *y* are assumed to be directly proportional. + The data is fit to a line passing through the origin. + + Since the *intercept* will always be 0.0, the underlying linear + function simplifies to: + + y = slope * x + noise + + >>> y = [3 * x[i] + noise[i] for i in range(5)] + >>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS + LinearRegression(slope=3.02447542484..., intercept=0.0) + """ n = len(x) if len(y) != n: raise StatisticsError('linear regression requires that both inputs have same number of data points') if n < 2: raise StatisticsError('linear regression requires at least two data points') - xbar = fsum(x) / n - ybar = fsum(y) / n - sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) - sxx = fsum((d := xi - xbar) * d for xi in x) + if proportional: + sxy = fsum(xi * yi for xi, yi in zip(x, y)) + sxx = fsum(xi * xi for xi in x) + else: + xbar = fsum(x) / n + ybar = fsum(y) / n + sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) + sxx = fsum((d := xi - xbar) * d for xi in x) try: slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x) except ZeroDivisionError: raise StatisticsError('x is constant') - intercept = ybar - slope * xbar + intercept = 0.0 if proportional else ybar - slope * xbar return LinearRegression(slope=slope, intercept=intercept) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index fbc6a07..c0e427d 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2527,6 +2527,12 @@ class TestLinearRegression(unittest.TestCase): self.assertAlmostEqual(intercept, true_intercept) self.assertAlmostEqual(slope, true_slope) + def test_proportional(self): + x = [10, 20, 30, 40] + y = [180, 398, 610, 799] + slope, intercept = statistics.linear_regression(x, y, proportional=True) + self.assertAlmostEqual(slope, 20 + 1/150) + self.assertEqual(intercept, 0.0) class TestNormalDist: diff --git a/Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst b/Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst new file mode 100644 index 0000000..b2e9c7e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst @@ -0,0 +1 @@ +Added *proportional* option to :meth:`statistics.linear_regression`. |