bpo-37798: Add C fastpath for statistics.NormalDist.inv_cdf() (GH-15266)

author: Dong-hee Na <donghee.na92@gmail.com> 2019-08-23 22:20:30 (GMT)
committer: Raymond Hettinger <rhettinger@users.noreply.github.com> 2019-08-23 22:20:30 (GMT)
commit: 0a18ee4be7ba215f414bef04598e0849504f9f1e (patch)
tree: 02b4a3f5f9cd481ce73e4aa934b5bf13b600504a /Lib/statistics.py
parent: 5be666010e4df65dc4d831435cc92340ea369f94 (diff)
download: cpython-0a18ee4be7ba215f414bef04598e0849504f9f1e.zip
cpython-0a18ee4be7ba215f414bef04598e0849504f9f1e.tar.gz
cpython-0a18ee4be7ba215f414bef04598e0849504f9f1e.tar.bz2
1 files changed, 82 insertions, 73 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 77291dd6..c7d6568 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -824,6 +824,81 @@ def pstdev(data, mu=None):
 
 ## Normal Distribution #####################################################
 
+
+def _normal_dist_inv_cdf(p, mu, sigma):
+    # There is no closed-form solution to the inverse CDF for the normal
+    # distribution, so we use a rational approximation instead:
+    # Wichura, M.J. (1988). "Algorithm AS241: The Percentage Points of the
+    # Normal Distribution".  Applied Statistics. Blackwell Publishing. 37
+    # (3): 477–484. doi:10.2307/2347330. JSTOR 2347330.
+    q = p - 0.5
+    if fabs(q) <= 0.425:
+        r = 0.180625 - q * q
+        # Hash sum: 55.88319_28806_14901_4439
+        num = (((((((2.50908_09287_30122_6727e+3 * r +
+                     3.34305_75583_58812_8105e+4) * r +
+                     6.72657_70927_00870_0853e+4) * r +
+                     4.59219_53931_54987_1457e+4) * r +
+                     1.37316_93765_50946_1125e+4) * r +
+                     1.97159_09503_06551_4427e+3) * r +
+                     1.33141_66789_17843_7745e+2) * r +
+                     3.38713_28727_96366_6080e+0) * q
+        den = (((((((5.22649_52788_52854_5610e+3 * r +
+                     2.87290_85735_72194_2674e+4) * r +
+                     3.93078_95800_09271_0610e+4) * r +
+                     2.12137_94301_58659_5867e+4) * r +
+                     5.39419_60214_24751_1077e+3) * r +
+                     6.87187_00749_20579_0830e+2) * r +
+                     4.23133_30701_60091_1252e+1) * r +
+                     1.0)
+        x = num / den
+        return mu + (x * sigma)
+    r = p if q <= 0.0 else 1.0 - p
+    r = sqrt(-log(r))
+    if r <= 5.0:
+        r = r - 1.6
+        # Hash sum: 49.33206_50330_16102_89036
+        num = (((((((7.74545_01427_83414_07640e-4 * r +
+                     2.27238_44989_26918_45833e-2) * r +
+                     2.41780_72517_74506_11770e-1) * r +
+                     1.27045_82524_52368_38258e+0) * r +
+                     3.64784_83247_63204_60504e+0) * r +
+                     5.76949_72214_60691_40550e+0) * r +
+                     4.63033_78461_56545_29590e+0) * r +
+                     1.42343_71107_49683_57734e+0)
+        den = (((((((1.05075_00716_44416_84324e-9 * r +
+                     5.47593_80849_95344_94600e-4) * r +
+                     1.51986_66563_61645_71966e-2) * r +
+                     1.48103_97642_74800_74590e-1) * r +
+                     6.89767_33498_51000_04550e-1) * r +
+                     1.67638_48301_83803_84940e+0) * r +
+                     2.05319_16266_37758_82187e+0) * r +
+                     1.0)
+    else:
+        r = r - 5.0
+        # Hash sum: 47.52583_31754_92896_71629
+        num = (((((((2.01033_43992_92288_13265e-7 * r +
+                     2.71155_55687_43487_57815e-5) * r +
+                     1.24266_09473_88078_43860e-3) * r +
+                     2.65321_89526_57612_30930e-2) * r +
+                     2.96560_57182_85048_91230e-1) * r +
+                     1.78482_65399_17291_33580e+0) * r +
+                     5.46378_49111_64114_36990e+0) * r +
+                     6.65790_46435_01103_77720e+0)
+        den = (((((((2.04426_31033_89939_78564e-15 * r +
+                     1.42151_17583_16445_88870e-7) * r +
+                     1.84631_83175_10054_68180e-5) * r +
+                     7.86869_13114_56132_59100e-4) * r +
+                     1.48753_61290_85061_48525e-2) * r +
+                     1.36929_88092_27358_05310e-1) * r +
+                     5.99832_20655_58879_37690e-1) * r +
+                     1.0)
+    x = num / den
+    if q < 0.0:
+        x = -x
+    return mu + (x * sigma)
+
+
 class NormalDist:
     "Normal distribution of a random variable"
     # https://en.wikipedia.org/wiki/Normal_distribution
@@ -882,79 +957,7 @@ class NormalDist:
             raise StatisticsError('p must be in the range 0.0 < p < 1.0')
         if self._sigma <= 0.0:
             raise StatisticsError('cdf() not defined when sigma at or below zero')
-
-        # There is no closed-form solution to the inverse CDF for the normal
-        # distribution, so we use a rational approximation instead:
-        # Wichura, M.J. (1988). "Algorithm AS241: The Percentage Points of the
-        # Normal Distribution".  Applied Statistics. Blackwell Publishing. 37
-        # (3): 477–484. doi:10.2307/2347330. JSTOR 2347330.
-
-        q = p - 0.5
-        if fabs(q) <= 0.425:
-            r = 0.180625 - q * q
-            # Hash sum: 55.88319_28806_14901_4439
-            num = (((((((2.50908_09287_30122_6727e+3 * r +
-                         3.34305_75583_58812_8105e+4) * r +
-                         6.72657_70927_00870_0853e+4) * r +
-                         4.59219_53931_54987_1457e+4) * r +
-                         1.37316_93765_50946_1125e+4) * r +
-                         1.97159_09503_06551_4427e+3) * r +
-                         1.33141_66789_17843_7745e+2) * r +
-                         3.38713_28727_96366_6080e+0) * q
-            den = (((((((5.22649_52788_52854_5610e+3 * r +
-                         2.87290_85735_72194_2674e+4) * r +
-                         3.93078_95800_09271_0610e+4) * r +
-                         2.12137_94301_58659_5867e+4) * r +
-                         5.39419_60214_24751_1077e+3) * r +
-                         6.87187_00749_20579_0830e+2) * r +
-                         4.23133_30701_60091_1252e+1) * r +
-                         1.0)
-            x = num / den
-            return self._mu + (x * self._sigma)
-        r = p if q <= 0.0 else 1.0 - p
-        r = sqrt(-log(r))
-        if r <= 5.0:
-            r = r - 1.6
-            # Hash sum: 49.33206_50330_16102_89036
-            num = (((((((7.74545_01427_83414_07640e-4 * r +
-                         2.27238_44989_26918_45833e-2) * r +
-                         2.41780_72517_74506_11770e-1) * r +
-                         1.27045_82524_52368_38258e+0) * r +
-                         3.64784_83247_63204_60504e+0) * r +
-                         5.76949_72214_60691_40550e+0) * r +
-                         4.63033_78461_56545_29590e+0) * r +
-                         1.42343_71107_49683_57734e+0)
-            den = (((((((1.05075_00716_44416_84324e-9 * r +
-                         5.47593_80849_95344_94600e-4) * r +
-                         1.51986_66563_61645_71966e-2) * r +
-                         1.48103_97642_74800_74590e-1) * r +
-                         6.89767_33498_51000_04550e-1) * r +
-                         1.67638_48301_83803_84940e+0) * r +
-                         2.05319_16266_37758_82187e+0) * r +
-                         1.0)
-        else:
-            r = r - 5.0
-            # Hash sum: 47.52583_31754_92896_71629
-            num = (((((((2.01033_43992_92288_13265e-7 * r +
-                         2.71155_55687_43487_57815e-5) * r +
-                         1.24266_09473_88078_43860e-3) * r +
-                         2.65321_89526_57612_30930e-2) * r +
-                         2.96560_57182_85048_91230e-1) * r +
-                         1.78482_65399_17291_33580e+0) * r +
-                         5.46378_49111_64114_36990e+0) * r +
-                         6.65790_46435_01103_77720e+0)
-            den = (((((((2.04426_31033_89939_78564e-15 * r +
-                         1.42151_17583_16445_88870e-7) * r +
-                         1.84631_83175_10054_68180e-5) * r +
-                         7.86869_13114_56132_59100e-4) * r +
-                         1.48753_61290_85061_48525e-2) * r +
-                         1.36929_88092_27358_05310e-1) * r +
-                         5.99832_20655_58879_37690e-1) * r +
-                         1.0)
-        x = num / den
-        if q < 0.0:
-            x = -x
-        return self._mu + (x * self._sigma)
+        return _normal_dist_inv_cdf(p, self._mu, self._sigma)
 
     def overlap(self, other):
         """Compute the overlapping coefficient (OVL) between two normal distributions.
@@ -1078,6 +1081,12 @@ class NormalDist:
     def __repr__(self):
         return f'{type(self).__name__}(mu={self._mu!r}, sigma={self._sigma!r})'
 
+# If available, use C implementation
+try:
+    from _statistics import _normal_dist_inv_cdf
+except ImportError:
+    pass
+
 
 if __name__ == '__main__':
author	Dong-hee Na <donghee.na92@gmail.com>	2019-08-23 22:20:30 (GMT)
committer	Raymond Hettinger <rhettinger@users.noreply.github.com>	2019-08-23 22:20:30 (GMT)
commit	0a18ee4be7ba215f414bef04598e0849504f9f1e (patch)
tree	02b4a3f5f9cd481ce73e4aa934b5bf13b600504a /Lib/statistics.py
parent	5be666010e4df65dc4d831435cc92340ea369f94 (diff)
download	cpython-0a18ee4be7ba215f414bef04598e0849504f9f1e.zip cpython-0a18ee4be7ba215f414bef04598e0849504f9f1e.tar.gz cpython-0a18ee4be7ba215f414bef04598e0849504f9f1e.tar.bz2