bpo-36018: Add the NormalDist class to the statistics module (GH-11973)

author: Raymond Hettinger <rhettinger@users.noreply.github.com> 2019-02-23 22:44:07 (GMT)
committer: GitHub <noreply@github.com> 2019-02-23 22:44:07 (GMT)
commit: 11c79531655a4aa3f82c20ff562ac571f40040cc (patch)
tree: 6af6cf3108204156c7b66022044514d75fca134e /Lib/statistics.py
parent: 64d6cc826dacebc2493b1bb5e8cb97828eb76f81 (diff)
download: cpython-11c79531655a4aa3f82c20ff562ac571f40040cc.zip
cpython-11c79531655a4aa3f82c20ff562ac571f40040cc.tar.gz
cpython-11c79531655a4aa3f82c20ff562ac571f40040cc.tar.bz2
1 files changed, 155 insertions, 1 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 8ecb906..a73001a 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -76,7 +76,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
 
 """
 
-__all__ = [ 'StatisticsError',
+__all__ = [ 'StatisticsError', 'NormalDist',
             'pstdev', 'pvariance', 'stdev', 'variance',
             'median',  'median_low', 'median_high', 'median_grouped',
             'mean', 'mode', 'harmonic_mean', 'fmean',
@@ -85,11 +85,13 @@ __all__ = [ 'StatisticsError',
 import collections
 import math
 import numbers
+import random
 
 from fractions import Fraction
 from decimal import Decimal
 from itertools import groupby
 from bisect import bisect_left, bisect_right
+from math import hypot, sqrt, fabs, exp, erf, tau
 
 
 
@@ -694,3 +696,155 @@ def pstdev(data, mu=None):
         return var.sqrt()
     except AttributeError:
         return math.sqrt(var)
+
+## Normal Distribution #####################################################
+
+class NormalDist:
+    'Normal distribution of a random variable'
+    # https://en.wikipedia.org/wiki/Normal_distribution
+    # https://en.wikipedia.org/wiki/Variance#Properties
+
+    __slots__ = ('mu', 'sigma')
+
+    def __init__(self, mu=0.0, sigma=1.0):
+        'NormalDist where mu is the mean and sigma is the standard deviation'
+        if sigma < 0.0:
+            raise StatisticsError('sigma must be non-negative')
+        self.mu = mu
+        self.sigma = sigma
+
+    @classmethod
+    def from_samples(cls, data):
+        'Make a normal distribution instance from sample data'
+        if not isinstance(data, (list, tuple)):
+            data = list(data)
+        xbar = fmean(data)
+        return cls(xbar, stdev(data, xbar))
+
+    def samples(self, n, seed=None):
+        'Generate *n* samples for a given mean and standard deviation'
+        gauss = random.gauss if seed is None else random.Random(seed).gauss
+        mu, sigma = self.mu, self.sigma
+        return [gauss(mu, sigma) for i in range(n)]
+
+    def pdf(self, x):
+        'Probability density function:  P(x <= X < x+dx) / dx'
+        variance = self.sigma ** 2.0
+        if not variance:
+            raise StatisticsError('pdf() not defined when sigma is zero')
+        return exp((x - self.mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance)
+
+    def cdf(self, x):
+        'Cumulative density function:  P(X <= x)'
+        if not self.sigma:
+            raise StatisticsError('cdf() not defined when sigma is zero')
+        return 0.5 * (1.0 + erf((x - self.mu) / (self.sigma * sqrt(2.0))))
+
+    @property
+    def variance(self):
+        'Square of the standard deviation'
+        return self.sigma ** 2.0
+
+    def __add__(x1, x2):
+        if isinstance(x2, NormalDist):
+            return NormalDist(x1.mu + x2.mu, hypot(x1.sigma, x2.sigma))
+        return NormalDist(x1.mu + x2, x1.sigma)
+
+    def __sub__(x1, x2):
+        if isinstance(x2, NormalDist):
+            return NormalDist(x1.mu - x2.mu, hypot(x1.sigma, x2.sigma))
+        return NormalDist(x1.mu - x2, x1.sigma)
+
+    def __mul__(x1, x2):
+        return NormalDist(x1.mu * x2, x1.sigma * fabs(x2))
+
+    def __truediv__(x1, x2):
+        return NormalDist(x1.mu / x2, x1.sigma / fabs(x2))
+
+    def __pos__(x1):
+        return x1
+
+    def __neg__(x1):
+        return NormalDist(-x1.mu, x1.sigma)
+
+    __radd__ = __add__
+
+    def __rsub__(x1, x2):
+        return -(x1 - x2)
+
+    __rmul__ = __mul__
+
+    def __eq__(x1, x2):
+        if not isinstance(x2, NormalDist):
+            return NotImplemented
+        return (x1.mu, x2.sigma) == (x2.mu, x2.sigma)
+
+    def __repr__(self):
+        return f'{type(self).__name__}(mu={self.mu!r}, sigma={self.sigma!r})'
+
+
+if __name__ == '__main__':
+
+    # Show math operations computed analytically in comparsion
+    # to a monte carlo simulation of the same operations
+
+    from math import isclose
+    from operator import add, sub, mul, truediv
+    from itertools import repeat
+
+    g1 = NormalDist(10, 20)
+    g2 = NormalDist(-5, 25)
+
+    # Test scaling by a constant
+    assert (g1 * 5 / 5).mu == g1.mu
+    assert (g1 * 5 / 5).sigma == g1.sigma
+
+    n = 100_000
+    G1 = g1.samples(n)
+    G2 = g2.samples(n)
+
+    for func in (add, sub):
+        print(f'\nTest {func.__name__} with another NormalDist:')
+        print(func(g1, g2))
+        print(NormalDist.from_samples(map(func, G1, G2)))
+
+    const = 11
+    for func in (add, sub, mul, truediv):
+        print(f'\nTest {func.__name__} with a constant:')
+        print(func(g1, const))
+        print(NormalDist.from_samples(map(func, G1, repeat(const))))
+
+    const = 19
+    for func in (add, sub, mul):
+        print(f'\nTest constant with {func.__name__}:')
+        print(func(const, g1))
+        print(NormalDist.from_samples(map(func, repeat(const), G1)))
+
+    def assert_close(G1, G2):
+        assert isclose(G1.mu, G1.mu, rel_tol=0.01), (G1, G2)
+        assert isclose(G1.sigma, G2.sigma, rel_tol=0.01), (G1, G2)
+
+    X = NormalDist(-105, 73)
+    Y = NormalDist(31, 47)
+    s = 32.75
+    n = 100_000
+
+    S = NormalDist.from_samples([x + s for x in X.samples(n)])
+    assert_close(X + s, S)
+
+    S = NormalDist.from_samples([x - s for x in X.samples(n)])
+    assert_close(X - s, S)
+
+    S = NormalDist.from_samples([x * s for x in X.samples(n)])
+    assert_close(X * s, S)
+
+    S = NormalDist.from_samples([x / s for x in X.samples(n)])
+    assert_close(X / s, S)
+
+    S = NormalDist.from_samples([x + y for x, y in zip(X.samples(n),
+                                                       Y.samples(n))])
+    assert_close(X + Y, S)
+
+    S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),
+                                                       Y.samples(n))])
+    assert_close(X - Y, S)
author	Raymond Hettinger <rhettinger@users.noreply.github.com>	2019-02-23 22:44:07 (GMT)
committer	GitHub <noreply@github.com>	2019-02-23 22:44:07 (GMT)
commit	11c79531655a4aa3f82c20ff562ac571f40040cc (patch)
tree	6af6cf3108204156c7b66022044514d75fca134e /Lib/statistics.py
parent	64d6cc826dacebc2493b1bb5e8cb97828eb76f81 (diff)
download	cpython-11c79531655a4aa3f82c20ff562ac571f40040cc.zip cpython-11c79531655a4aa3f82c20ff562ac571f40040cc.tar.gz cpython-11c79531655a4aa3f82c20ff562ac571f40040cc.tar.bz2