summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2023-08-27 13:59:40 (GMT)
committerGitHub <noreply@github.com>2023-08-27 13:59:40 (GMT)
commit042aa88bcc6541cb8b312f1119452f7a58a5b4df (patch)
treea9cc25f87cf66e82fbd0277db1ab1644a0b14d27
parent09343dba44cdb5c279ec51df34552ef451434958 (diff)
downloadcpython-042aa88bcc6541cb8b312f1119452f7a58a5b4df.zip
cpython-042aa88bcc6541cb8b312f1119452f7a58a5b4df.tar.gz
cpython-042aa88bcc6541cb8b312f1119452f7a58a5b4df.tar.bz2
gh-108322: Optimize statistics.NormalDist.samples() (gh-108324)
-rw-r--r--Doc/library/statistics.rst5
-rw-r--r--Lib/statistics.py12
-rw-r--r--Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst2
3 files changed, 14 insertions, 5 deletions
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 483ebea..368b2a1 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -828,6 +828,11 @@ of applications in statistics.
number generator. This is useful for creating reproducible results,
even in a multi-threading context.
+ .. versionchanged:: 3.13
+
+ Switched to a faster algorithm. To reproduce samples from previous
+ versions, use :func:`random.seed` and :func:`random.gauss`.
+
.. method:: NormalDist.pdf(x)
Using a `probability density function (pdf)
diff --git a/Lib/statistics.py b/Lib/statistics.py
index a8036e9..96c8034 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1135,7 +1135,7 @@ def linear_regression(x, y, /, *, proportional=False):
>>> noise = NormalDist().samples(5, seed=42)
>>> y = [3 * x[i] + 2 + noise[i] for i in range(5)]
>>> linear_regression(x, y) #doctest: +ELLIPSIS
- LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
+ LinearRegression(slope=3.17495..., intercept=1.00925...)
If *proportional* is true, the independent variable *x* and the
dependent variable *y* are assumed to be directly proportional.
@@ -1148,7 +1148,7 @@ def linear_regression(x, y, /, *, proportional=False):
>>> y = [3 * x[i] + noise[i] for i in range(5)]
>>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS
- LinearRegression(slope=3.02447542484..., intercept=0.0)
+ LinearRegression(slope=2.90475..., intercept=0.0)
"""
n = len(x)
@@ -1279,9 +1279,11 @@ class NormalDist:
def samples(self, n, *, seed=None):
"Generate *n* samples for a given mean and standard deviation."
- gauss = random.gauss if seed is None else random.Random(seed).gauss
- mu, sigma = self._mu, self._sigma
- return [gauss(mu, sigma) for _ in repeat(None, n)]
+ rnd = random.random if seed is None else random.Random(seed).random
+ inv_cdf = _normal_dist_inv_cdf
+ mu = self._mu
+ sigma = self._sigma
+ return [inv_cdf(rnd(), mu, sigma) for _ in repeat(None, n)]
def pdf(self, x):
"Probability density function. P(x <= X < x+dx) / dx"
diff --git a/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst b/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst
new file mode 100644
index 0000000..5416c01
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst
@@ -0,0 +1,2 @@
+Speed-up NormalDist.samples() by using the inverse CDF method instead of
+calling random.gauss().