summaryrefslogtreecommitdiffstats
path: root/Doc
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2024-05-04 04:13:36 (GMT)
committerGitHub <noreply@github.com>2024-05-04 04:13:36 (GMT)
commit42dc5b4ace39a3983cd9853719527f4724693adc (patch)
tree9c19d33694a49367e64864111e57bafa8e86b88b /Doc
parent1b7e5e6e60e0d22b2a928cbbb36ebb989183450f (diff)
downloadcpython-42dc5b4ace39a3983cd9853719527f4724693adc.zip
cpython-42dc5b4ace39a3983cd9853719527f4724693adc.tar.gz
cpython-42dc5b4ace39a3983cd9853719527f4724693adc.tar.bz2
gh-115532 Add kde_random() to the statistic module (#118210)
Diffstat (limited to 'Doc')
-rw-r--r--Doc/library/statistics.rst84
-rw-r--r--Doc/whatsnew/3.13.rst3
2 files changed, 27 insertions, 60 deletions
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index cc72396..d5a316e 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -77,6 +77,7 @@ or sample.
:func:`geometric_mean` Geometric mean of data.
:func:`harmonic_mean` Harmonic mean of data.
:func:`kde` Estimate the probability density distribution of the data.
+:func:`kde_random` Random sampling from the PDF generated by kde().
:func:`median` Median (middle value) of data.
:func:`median_low` Low median of data.
:func:`median_high` High median of data.
@@ -311,6 +312,30 @@ However, for reading convenience, most of the examples show sorted sequences.
.. versionadded:: 3.13
+.. function:: kde_random(data, h, kernel='normal', *, seed=None)
+
+ Return a function that makes a random selection from the estimated
+ probability density function produced by ``kde(data, h, kernel)``.
+
+ Providing a *seed* allows reproducible selections. In the future, the
+ values may change slightly as more accurate kernel inverse CDF estimates
+ are implemented. The seed may be an integer, float, str, or bytes.
+
+ A :exc:`StatisticsError` will be raised if the *data* sequence is empty.
+
+ Continuing the example for :func:`kde`, we can use
+ :func:`kde_random` to generate new random selections from an
+ estimated probability density function:
+
+ >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
+ >>> rand = kde_random(data, h=1.5, seed=8675309)
+ >>> new_selections = [rand() for i in range(10)]
+ >>> [round(x, 1) for x in new_selections]
+ [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6]
+
+ .. versionadded:: 3.13
+
+
.. function:: median(data)
Return the median (middle value) of numeric data, using the common "mean of
@@ -1148,65 +1173,6 @@ The final prediction goes to the largest posterior. This is known as the
'female'
-Sampling from kernel density estimation
-***************************************
-
-The :func:`kde()` function creates a continuous probability density
-function from discrete samples. Some applications need a way to make
-random selections from that distribution.
-
-The technique is to pick a sample from a bandwidth scaled kernel
-function and recenter the result around a randomly chosen point from
-the input data. This can be done with any kernel that has a known or
-accurately approximated inverse cumulative distribution function.
-
-.. testcode::
-
- from random import choice, random, seed
- from math import sqrt, log, pi, tan, asin, cos, acos
- from statistics import NormalDist
-
- kernel_invcdfs = {
- 'normal': NormalDist().inv_cdf,
- 'logistic': lambda p: log(p / (1 - p)),
- 'sigmoid': lambda p: log(tan(p * pi/2)),
- 'rectangular': lambda p: 2*p - 1,
- 'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
- 'parabolic': lambda p: 2 * cos((acos(2*p-1) + pi) / 3),
- 'cosine': lambda p: 2*asin(2*p - 1)/pi,
- }
-
- def kde_random(data, h, kernel='normal'):
- 'Return a function that samples from kde() smoothed data.'
- kernel_invcdf = kernel_invcdfs[kernel]
- def rand():
- return h * kernel_invcdf(random()) + choice(data)
- return rand
-
-For example:
-
-.. doctest::
-
- >>> discrete_samples = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
- >>> rand = kde_random(discrete_samples, h=1.5)
- >>> seed(8675309)
- >>> selections = [rand() for i in range(10)]
- >>> [round(x, 1) for x in selections]
- [4.7, 7.4, 1.2, 7.8, 6.9, -1.3, 5.8, 0.2, -1.4, 5.7]
-
-.. testcode::
- :hide:
-
- from statistics import kde
- from math import isclose
-
- # Verify that cdf / invcdf will round trip
- xarr = [i/100 for i in range(-100, 101)]
- for kernel, invcdf in kernel_invcdfs.items():
- cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True)
- for x in xarr:
- assert isclose(invcdf(cdf(x)), x, abs_tol=1E-9)
-
..
# This modelines must appear within the last ten lines of the file.
kate: indent-width 3; remove-trailing-space on; replace-tabs on; encoding utf-8;
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index d996cf6..269a7cc 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -745,7 +745,8 @@ statistics
* Add :func:`statistics.kde` for kernel density estimation.
This makes it possible to estimate a continuous probability density function
- from a fixed number of discrete samples.
+ from a fixed number of discrete samples. Also added :func:`statistics.kde_random`
+ for sampling from the estimated probability density function.
(Contributed by Raymond Hettinger in :gh:`115863`.)
.. _whatsnew313-subprocess: