summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2019-09-08 23:57:58 (GMT)
committerGitHub <noreply@github.com>2019-09-08 23:57:58 (GMT)
commit4db25d5c39e369f4b55eab52dc8f87f390233892 (patch)
treeb7e29ea0eb1eb55bbe9a5a5b888e76d436ffb551
parent3c87a667bb367ace1de6bd1577fdb4f66947da52 (diff)
downloadcpython-4db25d5c39e369f4b55eab52dc8f87f390233892.zip
cpython-4db25d5c39e369f4b55eab52dc8f87f390233892.tar.gz
cpython-4db25d5c39e369f4b55eab52dc8f87f390233892.tar.bz2
bpo-36018: Address more reviewer feedback (GH-15733)
-rw-r--r--Doc/library/statistics.rst41
-rw-r--r--Lib/statistics.py32
-rw-r--r--Lib/test/test_statistics.py35
3 files changed, 69 insertions, 39 deletions
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 0798ae2..bdd706d 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -514,15 +514,14 @@ However, for reading convenience, most of the examples show sorted sequences.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set
*n* to 100 for percentiles which gives the 99 cuts points that separate
- *data* in to 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
+ *data* into 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
is not least 1.
- The *data* can be any iterable containing sample data or it can be an
- instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
+ The *data* can be any iterable containing sample data. For meaningful
results, the number of data points in *data* should be larger than *n*.
Raises :exc:`StatisticsError` if there are not at least two data points.
- For sample data, the cut points are linearly interpolated from the
+ The cut points are linearly interpolated from the
two nearest data points. For example, if a cut point falls one-third
of the distance between two sample values, ``100`` and ``112``, the
cut-point will evaluate to ``104``.
@@ -547,9 +546,6 @@ However, for reading convenience, most of the examples show sorted sequences.
values, the method sorts them and assigns the following percentiles:
0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.
- If *data* is an instance of a class that defines an
- :meth:`~inv_cdf` method, setting *method* has no effect.
-
.. doctest::
# Decile cut points for empirically sampled data
@@ -561,11 +557,6 @@ However, for reading convenience, most of the examples show sorted sequences.
>>> [round(q, 1) for q in quantiles(data, n=10)]
[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]
- >>> # Quartile cut points for the standard normal distribution
- >>> Z = NormalDist()
- >>> [round(q, 4) for q in quantiles(Z, n=4)]
- [-0.6745, 0.0, 0.6745]
-
.. versionadded:: 3.8
@@ -607,6 +598,18 @@ of applications in statistics.
<https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal
distribution.
+ .. attribute:: median
+
+ A read-only property for the `median
+ <https://en.wikipedia.org/wiki/Median>`_ of a normal
+ distribution.
+
+ .. attribute:: mode
+
+ A read-only property for the `mode
+ <https://en.wikipedia.org/wiki/Mode_(statistics)>`_ of a normal
+ distribution.
+
.. attribute:: stdev
A read-only property for the `standard deviation
@@ -678,6 +681,16 @@ of applications in statistics.
the two probability density functions
<https://www.rasch.org/rmt/rmt101r.htm>`_.
+ .. method:: NormalDist.quantiles()
+
+ Divide the normal distribution into *n* continuous intervals with
+ equal probability. Returns a list of (n - 1) cut points separating
+ the intervals.
+
+ Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
+ Set *n* to 100 for percentiles which gives the 99 cuts points that
+ separate the normal distribution into 100 equal sized groups.
+
Instances of :class:`NormalDist` support addition, subtraction,
multiplication and division by a constant. These operations
are used for translation and scaling. For example:
@@ -733,9 +746,9 @@ Find the `quartiles <https://en.wikipedia.org/wiki/Quartile>`_ and `deciles
.. doctest::
- >>> list(map(round, quantiles(sat)))
+ >>> list(map(round, sat.quantiles()))
[928, 1060, 1192]
- >>> list(map(round, quantiles(sat, n=10)))
+ >>> list(map(round, sat.quantiles(n=10)))
[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]
To estimate the distribution for a model than isn't easy to solve
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 4b17266..70c48d6 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -624,9 +624,8 @@ def quantiles(data, /, *, n=4, method='exclusive'):
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate *data* in to 100 equal sized groups.
- The *data* can be any iterable containing sample data or it can be
- an instance of a class that defines an inv_cdf() method. For sample
- data, the cut points are linearly interpolated between data points.
+ The *data* can be any iterable containing sample.
+ The cut points are linearly interpolated between data points.
If *method* is set to *inclusive*, *data* is treated as population
data. The minimum value is treated as the 0th percentile and the
@@ -634,8 +633,6 @@ def quantiles(data, /, *, n=4, method='exclusive'):
"""
if n < 1:
raise StatisticsError('n must be at least 1')
- if hasattr(data, 'inv_cdf'):
- return [data.inv_cdf(i / n) for i in range(1, n)]
data = sorted(data)
ld = len(data)
if ld < 2:
@@ -955,6 +952,17 @@ class NormalDist:
raise StatisticsError('cdf() not defined when sigma at or below zero')
return _normal_dist_inv_cdf(p, self._mu, self._sigma)
+ def quantiles(self, n=4):
+ """Divide into *n* continuous intervals with equal probability.
+
+ Returns a list of (n - 1) cut points separating the intervals.
+
+ Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
+ Set *n* to 100 for percentiles which gives the 99 cuts points that
+ separate the normal distribution in to 100 equal sized groups.
+ """
+ return [self.inv_cdf(i / n) for i in range(1, n)]
+
def overlap(self, other):
"""Compute the overlapping coefficient (OVL) between two normal distributions.
@@ -995,6 +1003,20 @@ class NormalDist:
return self._mu
@property
+ def median(self):
+ "Return the median of the normal distribution"
+ return self._mu
+
+ @property
+ def mode(self):
+ """Return the mode of the normal distribution
+
+ The mode is the value x where which the probability density
+ function (pdf) takes its maximum value.
+ """
+ return self._mu
+
+ @property
def stdev(self):
"Standard deviation of the normal distribution."
return self._sigma
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 01b317c..af26473 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2198,16 +2198,6 @@ class TestQuantiles(unittest.TestCase):
exp = list(map(f, expected))
act = quantiles(map(f, data), n=n)
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
- # Quartiles of a standard normal distribution
- for n, expected in [
- (1, []),
- (2, [0.0]),
- (3, [-0.4307, 0.4307]),
- (4 ,[-0.6745, 0.0, 0.6745]),
- ]:
- actual = quantiles(statistics.NormalDist(), n=n)
- self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
- for e, a in zip(expected, actual)))
# Q2 agrees with median()
for k in range(2, 60):
data = random.choices(range(100), k=k)
@@ -2248,16 +2238,6 @@ class TestQuantiles(unittest.TestCase):
exp = list(map(f, expected))
act = quantiles(map(f, data), n=n, method="inclusive")
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
- # Quartiles of a standard normal distribution
- for n, expected in [
- (1, []),
- (2, [0.0]),
- (3, [-0.4307, 0.4307]),
- (4 ,[-0.6745, 0.0, 0.6745]),
- ]:
- actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
- self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
- for e, a in zip(expected, actual)))
# Natural deciles
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
@@ -2546,6 +2526,19 @@ class TestNormalDist:
# Special values
self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
+ def test_quantiles(self):
+ # Quartiles of a standard normal distribution
+ Z = self.module.NormalDist()
+ for n, expected in [
+ (1, []),
+ (2, [0.0]),
+ (3, [-0.4307, 0.4307]),
+ (4 ,[-0.6745, 0.0, 0.6745]),
+ ]:
+ actual = Z.quantiles(n=n)
+ self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
+ for e, a in zip(expected, actual)))
+
def test_overlap(self):
NormalDist = self.module.NormalDist
@@ -2612,6 +2605,8 @@ class TestNormalDist:
def test_properties(self):
X = self.module.NormalDist(100, 15)
self.assertEqual(X.mean, 100)
+ self.assertEqual(X.median, 100)
+ self.assertEqual(X.mode, 100)
self.assertEqual(X.stdev, 15)
self.assertEqual(X.variance, 225)