summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/random.rst4
-rw-r--r--Doc/library/statistics.rst18
-rw-r--r--Doc/whatsnew/3.8.rst9
-rw-r--r--Lib/statistics.py29
-rw-r--r--Lib/test/test_statistics.py45
-rw-r--r--Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst2
6 files changed, 104 insertions, 3 deletions
diff --git a/Doc/library/random.rst b/Doc/library/random.rst
index 7d051e1..79a7bdd 100644
--- a/Doc/library/random.rst
+++ b/Doc/library/random.rst
@@ -404,7 +404,7 @@ with replacement to estimate a confidence interval for the mean of a sample of
size five::
# http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm
- from statistics import mean
+ from statistics import fmean as mean
from random import choices
data = 1, 2, 4, 4, 10
@@ -419,7 +419,7 @@ to determine the statistical significance or `p-value
between the effects of a drug versus a placebo::
# Example from "Statistics is Easy" by Dennis Shasha and Manda Wilson
- from statistics import mean
+ from statistics import fmean as mean
from random import shuffle
drug = [54, 73, 53, 70, 73, 68, 52, 65, 65]
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 26bb592..20a2c1c 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -39,6 +39,7 @@ or sample.
======================= =============================================
:func:`mean` Arithmetic mean ("average") of data.
+:func:`fmean` Fast, floating point arithmetic mean.
:func:`harmonic_mean` Harmonic mean of data.
:func:`median` Median (middle value) of data.
:func:`median_low` Low median of data.
@@ -111,6 +112,23 @@ However, for reading convenience, most of the examples show sorted sequences.
``mean(data)`` is equivalent to calculating the true population mean μ.
+.. function:: fmean(data)
+
+ Convert *data* to floats and compute the arithmetic mean.
+
+ This runs faster than the :func:`mean` function and it always returns a
+ :class:`float`. The result is highly accurate but not as perfect as
+ :func:`mean`. If the input dataset is empty, raises a
+ :exc:`StatisticsError`.
+
+ .. doctest::
+
+ >>> fmean([3.5, 4.0, 5.25])
+ 4.25
+
+ .. versionadded:: 3.8
+
+
.. function:: harmonic_mean(data)
Return the harmonic mean of *data*, a sequence or iterator of
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index 2f759f3..bf7300d 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -254,6 +254,15 @@ Added :attr:`SSLContext.post_handshake_auth` to enable and
post-handshake authentication.
(Contributed by Christian Heimes in :issue:`34670`.)
+
+statistics
+----------
+
+Added :func:`statistics.fmean` as a faster, floating point variant of
+:func:`statistics.mean()`. (Contributed by Raymond Hettinger and
+Steven D'Aprano in :issue:`35904`.)
+
+
tokenize
--------
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 47c2bb4..8ecb906 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -79,7 +79,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
__all__ = [ 'StatisticsError',
'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped',
- 'mean', 'mode', 'harmonic_mean',
+ 'mean', 'mode', 'harmonic_mean', 'fmean',
]
import collections
@@ -312,6 +312,33 @@ def mean(data):
assert count == n
return _convert(total/n, T)
+def fmean(data):
+ """ Convert data to floats and compute the arithmetic mean.
+
+ This runs faster than the mean() function and it always returns a float.
+ The result is highly accurate but not as perfect as mean().
+ If the input dataset is empty, it raises a StatisticsError.
+
+ >>> fmean([3.5, 4.0, 5.25])
+ 4.25
+
+ """
+ try:
+ n = len(data)
+ except TypeError:
+ # Handle iterators that do not define __len__().
+ n = 0
+ def count(x):
+ nonlocal n
+ n += 1
+ return x
+ total = math.fsum(map(count, data))
+ else:
+ total = math.fsum(data)
+ try:
+ return total / n
+ except ZeroDivisionError:
+ raise StatisticsError('fmean requires at least one data point') from None
def harmonic_mean(data):
"""Return the harmonic mean of data.
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index b577433..e351446 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -1810,6 +1810,51 @@ class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
# counts, this should raise.
self.assertRaises(statistics.StatisticsError, self.func, data)
+class TestFMean(unittest.TestCase):
+
+ def test_basics(self):
+ fmean = statistics.fmean
+ D = Decimal
+ F = Fraction
+ for data, expected_mean, kind in [
+ ([3.5, 4.0, 5.25], 4.25, 'floats'),
+ ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
+ ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
+ ([True, False, True, True, False], 0.60, 'booleans'),
+ ([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
+ ((3.5, 4.0, 5.25), 4.25, 'tuple'),
+ (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
+ ]:
+ actual_mean = fmean(data)
+ self.assertIs(type(actual_mean), float, kind)
+ self.assertEqual(actual_mean, expected_mean, kind)
+
+ def test_error_cases(self):
+ fmean = statistics.fmean
+ StatisticsError = statistics.StatisticsError
+ with self.assertRaises(StatisticsError):
+ fmean([]) # empty input
+ with self.assertRaises(StatisticsError):
+ fmean(iter([])) # empty iterator
+ with self.assertRaises(TypeError):
+ fmean(None) # non-iterable input
+ with self.assertRaises(TypeError):
+ fmean([10, None, 20]) # non-numeric input
+ with self.assertRaises(TypeError):
+ fmean() # missing data argument
+ with self.assertRaises(TypeError):
+ fmean([10, 20, 60], 70) # too many arguments
+
+ def test_special_values(self):
+ # Rules for special values are inherited from math.fsum()
+ fmean = statistics.fmean
+ NaN = float('Nan')
+ Inf = float('Inf')
+ self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
+ self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
+ self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
+ with self.assertRaises(ValueError):
+ fmean([Inf, -Inf])
# === Tests for variances and standard deviations ===
diff --git a/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst b/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst
new file mode 100644
index 0000000..c40c861
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst
@@ -0,0 +1,2 @@
+Added statistics.fmean() as a faster, floating point variant of the existing
+mean() function.