From 47d9987247bcc45983a6d51fd1ae46d5d356d0f8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 21 Feb 2019 15:06:29 -0800 Subject: bpo-35904: Add statistics.fmean() (GH-11892) --- Doc/library/random.rst | 4 +- Doc/library/statistics.rst | 18 +++++++++ Doc/whatsnew/3.8.rst | 9 +++++ Lib/statistics.py | 29 +++++++++++++- Lib/test/test_statistics.py | 45 ++++++++++++++++++++++ .../2019-02-16-00-55-52.bpo-35904.V88MCD.rst | 2 + 6 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst diff --git a/Doc/library/random.rst b/Doc/library/random.rst index 7d051e1..79a7bdd 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -404,7 +404,7 @@ with replacement to estimate a confidence interval for the mean of a sample of size five:: # http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm - from statistics import mean + from statistics import fmean as mean from random import choices data = 1, 2, 4, 4, 10 @@ -419,7 +419,7 @@ to determine the statistical significance or `p-value between the effects of a drug versus a placebo:: # Example from "Statistics is Easy" by Dennis Shasha and Manda Wilson - from statistics import mean + from statistics import fmean as mean from random import shuffle drug = [54, 73, 53, 70, 73, 68, 52, 65, 65] diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 26bb592..20a2c1c 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -39,6 +39,7 @@ or sample. ======================= ============================================= :func:`mean` Arithmetic mean ("average") of data. +:func:`fmean` Fast, floating point arithmetic mean. :func:`harmonic_mean` Harmonic mean of data. :func:`median` Median (middle value) of data. :func:`median_low` Low median of data. @@ -111,6 +112,23 @@ However, for reading convenience, most of the examples show sorted sequences. ``mean(data)`` is equivalent to calculating the true population mean μ. +.. function:: fmean(data) + + Convert *data* to floats and compute the arithmetic mean. + + This runs faster than the :func:`mean` function and it always returns a + :class:`float`. The result is highly accurate but not as perfect as + :func:`mean`. If the input dataset is empty, raises a + :exc:`StatisticsError`. + + .. doctest:: + + >>> fmean([3.5, 4.0, 5.25]) + 4.25 + + .. versionadded:: 3.8 + + .. function:: harmonic_mean(data) Return the harmonic mean of *data*, a sequence or iterator of diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 2f759f3..bf7300d 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -254,6 +254,15 @@ Added :attr:`SSLContext.post_handshake_auth` to enable and post-handshake authentication. (Contributed by Christian Heimes in :issue:`34670`.) + +statistics +---------- + +Added :func:`statistics.fmean` as a faster, floating point variant of +:func:`statistics.mean()`. (Contributed by Raymond Hettinger and +Steven D'Aprano in :issue:`35904`.) + + tokenize -------- diff --git a/Lib/statistics.py b/Lib/statistics.py index 47c2bb4..8ecb906 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -79,7 +79,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError. __all__ = [ 'StatisticsError', 'pstdev', 'pvariance', 'stdev', 'variance', 'median', 'median_low', 'median_high', 'median_grouped', - 'mean', 'mode', 'harmonic_mean', + 'mean', 'mode', 'harmonic_mean', 'fmean', ] import collections @@ -312,6 +312,33 @@ def mean(data): assert count == n return _convert(total/n, T) +def fmean(data): + """ Convert data to floats and compute the arithmetic mean. + + This runs faster than the mean() function and it always returns a float. + The result is highly accurate but not as perfect as mean(). + If the input dataset is empty, it raises a StatisticsError. + + >>> fmean([3.5, 4.0, 5.25]) + 4.25 + + """ + try: + n = len(data) + except TypeError: + # Handle iterators that do not define __len__(). + n = 0 + def count(x): + nonlocal n + n += 1 + return x + total = math.fsum(map(count, data)) + else: + total = math.fsum(data) + try: + return total / n + except ZeroDivisionError: + raise StatisticsError('fmean requires at least one data point') from None def harmonic_mean(data): """Return the harmonic mean of data. diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index b577433..e351446 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -1810,6 +1810,51 @@ class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin): # counts, this should raise. self.assertRaises(statistics.StatisticsError, self.func, data) +class TestFMean(unittest.TestCase): + + def test_basics(self): + fmean = statistics.fmean + D = Decimal + F = Fraction + for data, expected_mean, kind in [ + ([3.5, 4.0, 5.25], 4.25, 'floats'), + ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'), + ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'), + ([True, False, True, True, False], 0.60, 'booleans'), + ([3.5, 4, F(21, 4)], 4.25, 'mixed types'), + ((3.5, 4.0, 5.25), 4.25, 'tuple'), + (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'), + ]: + actual_mean = fmean(data) + self.assertIs(type(actual_mean), float, kind) + self.assertEqual(actual_mean, expected_mean, kind) + + def test_error_cases(self): + fmean = statistics.fmean + StatisticsError = statistics.StatisticsError + with self.assertRaises(StatisticsError): + fmean([]) # empty input + with self.assertRaises(StatisticsError): + fmean(iter([])) # empty iterator + with self.assertRaises(TypeError): + fmean(None) # non-iterable input + with self.assertRaises(TypeError): + fmean([10, None, 20]) # non-numeric input + with self.assertRaises(TypeError): + fmean() # missing data argument + with self.assertRaises(TypeError): + fmean([10, 20, 60], 70) # too many arguments + + def test_special_values(self): + # Rules for special values are inherited from math.fsum() + fmean = statistics.fmean + NaN = float('Nan') + Inf = float('Inf') + self.assertTrue(math.isnan(fmean([10, NaN])), 'nan') + self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity') + self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity') + with self.assertRaises(ValueError): + fmean([Inf, -Inf]) # === Tests for variances and standard deviations === diff --git a/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst b/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst new file mode 100644 index 0000000..c40c861 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-02-16-00-55-52.bpo-35904.V88MCD.rst @@ -0,0 +1,2 @@ +Added statistics.fmean() as a faster, floating point variant of the existing +mean() function. -- cgit v0.12