summaryrefslogtreecommitdiffstats
path: root/Lib/statistics.py
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2019-03-07 06:59:40 (GMT)
committerGitHub <noreply@github.com>2019-03-07 06:59:40 (GMT)
commit318d537daabf2bd5f781255c7e25bfce260cf227 (patch)
tree05255317e7fd489c1fc22bd4164285e9234d1a11 /Lib/statistics.py
parente942e7b5c91995ae1ad967ef2c0f116a5d8555de (diff)
downloadcpython-318d537daabf2bd5f781255c7e25bfce260cf227.zip
cpython-318d537daabf2bd5f781255c7e25bfce260cf227.tar.gz
cpython-318d537daabf2bd5f781255c7e25bfce260cf227.tar.bz2
bpo-36169 : Add overlap() method to statistics.NormalDist (GH-12149)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r--Lib/statistics.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index e917a5d..e85aaa9 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -91,7 +91,7 @@ from fractions import Fraction
from decimal import Decimal
from itertools import groupby
from bisect import bisect_left, bisect_right
-from math import hypot, sqrt, fabs, exp, erf, tau
+from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
@@ -740,6 +740,41 @@ class NormalDist:
raise StatisticsError('cdf() not defined when sigma is zero')
return 0.5 * (1.0 + erf((x - self.mu) / (self.sigma * sqrt(2.0))))
+ def overlap(self, other):
+ '''Compute the overlapping coefficient (OVL) between two normal distributions.
+
+ Measures the agreement between two normal probability distributions.
+ Returns a value between 0.0 and 1.0 giving the overlapping area in
+ the two underlying probability density functions.
+
+ >>> N1 = NormalDist(2.4, 1.6)
+ >>> N2 = NormalDist(3.2, 2.0)
+ >>> N1.overlap(N2)
+ 0.8035050657330205
+
+ '''
+ # See: "The overlapping coefficient as a measure of agreement between
+ # probability distributions and point estimation of the overlap of two
+ # normal densities" -- Henry F. Inman and Edwin L. Bradley Jr
+ # http://dx.doi.org/10.1080/03610928908830127
+ if not isinstance(other, NormalDist):
+ raise TypeError('Expected another NormalDist instance')
+ X, Y = self, other
+ if (Y.sigma, Y.mu) < (X.sigma, X.mu): # sort to assure commutativity
+ X, Y = Y, X
+ X_var, Y_var = X.variance, Y.variance
+ if not X_var or not Y_var:
+ raise StatisticsError('overlap() not defined when sigma is zero')
+ dv = Y_var - X_var
+ dm = fabs(Y.mu - X.mu)
+ if not dv:
+ return 2.0 * NormalDist(dm, 2.0 * X.sigma).cdf(0)
+ a = X.mu * Y_var - Y.mu * X_var
+ b = X.sigma * Y.sigma * sqrt(dm**2.0 + dv * log(Y_var / X_var))
+ x1 = (a + b) / dv
+ x2 = (a - b) / dv
+ return 1.0 - (fabs(Y.cdf(x1) - X.cdf(x1)) + fabs(Y.cdf(x2) - X.cdf(x2)))
+
@property
def mean(self):
'Arithmetic mean of the normal distribution'