bpo-35904: Add statistics.fmean() (GH-11892)

author: Raymond Hettinger <rhettinger@users.noreply.github.com> 2019-02-21 23:06:29 (GMT)
committer: GitHub <noreply@github.com> 2019-02-21 23:06:29 (GMT)
commit: 47d9987247bcc45983a6d51fd1ae46d5d356d0f8 (patch)
tree: 16b7e88590f9a28ff47e8a0e041510c4a2d86756 /Lib
parent: f36f89257b30e0bf88e8aaff6da14a9a96f57b9e (diff)
download: cpython-47d9987247bcc45983a6d51fd1ae46d5d356d0f8.zip
cpython-47d9987247bcc45983a6d51fd1ae46d5d356d0f8.tar.gz
cpython-47d9987247bcc45983a6d51fd1ae46d5d356d0f8.tar.bz2
2 files changed, 73 insertions, 1 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 47c2bb4..8ecb906 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -79,7 +79,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
 __all__ = [ 'StatisticsError',
             'pstdev', 'pvariance', 'stdev', 'variance',
             'median',  'median_low', 'median_high', 'median_grouped',
-            'mean', 'mode', 'harmonic_mean',
+            'mean', 'mode', 'harmonic_mean', 'fmean',
           ]
 
 import collections
@@ -312,6 +312,33 @@ def mean(data):
     assert count == n
     return _convert(total/n, T)
 
+def fmean(data):
+    """ Convert data to floats and compute the arithmetic mean.
+
+    This runs faster than the mean() function and it always returns a float.
+    The result is highly accurate but not as perfect as mean().
+    If the input dataset is empty, it raises a StatisticsError.
+
+    >>> fmean([3.5, 4.0, 5.25])
+    4.25
+
+    """
+    try:
+        n = len(data)
+    except TypeError:
+        # Handle iterators that do not define __len__().
+        n = 0
+        def count(x):
+            nonlocal n
+            n += 1
+            return x
+        total = math.fsum(map(count, data))
+    else:
+        total = math.fsum(data)
+    try:
+        return total / n
+    except ZeroDivisionError:
+        raise StatisticsError('fmean requires at least one data point') from None
 
 def harmonic_mean(data):
     """Return the harmonic mean of data.
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index b577433..e351446 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -1810,6 +1810,51 @@ class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin):
         # counts, this should raise.
         self.assertRaises(statistics.StatisticsError, self.func, data)
 
+class TestFMean(unittest.TestCase):
+
+    def test_basics(self):
+        fmean = statistics.fmean
+        D = Decimal
+        F = Fraction
+        for data, expected_mean, kind in [
+            ([3.5, 4.0, 5.25], 4.25, 'floats'),
+            ([D('3.5'), D('4.0'), D('5.25')], 4.25, 'decimals'),
+            ([F(7, 2), F(4, 1), F(21, 4)], 4.25, 'fractions'),
+            ([True, False, True, True, False], 0.60, 'booleans'),
+            ([3.5, 4, F(21, 4)], 4.25, 'mixed types'),
+            ((3.5, 4.0, 5.25), 4.25, 'tuple'),
+            (iter([3.5, 4.0, 5.25]), 4.25, 'iterator'),
+                ]:
+            actual_mean = fmean(data)
+            self.assertIs(type(actual_mean), float, kind)
+            self.assertEqual(actual_mean, expected_mean, kind)
+
+    def test_error_cases(self):
+        fmean = statistics.fmean
+        StatisticsError = statistics.StatisticsError
+        with self.assertRaises(StatisticsError):
+            fmean([])                               # empty input
+        with self.assertRaises(StatisticsError):
+            fmean(iter([]))                         # empty iterator
+        with self.assertRaises(TypeError):
+            fmean(None)                             # non-iterable input
+        with self.assertRaises(TypeError):
+            fmean([10, None, 20])                   # non-numeric input
+        with self.assertRaises(TypeError):
+            fmean()                                 # missing data argument
+        with self.assertRaises(TypeError):
+            fmean([10, 20, 60], 70)                 # too many arguments
+
+    def test_special_values(self):
+        # Rules for special values are inherited from math.fsum()
+        fmean = statistics.fmean
+        NaN = float('Nan')
+        Inf = float('Inf')
+        self.assertTrue(math.isnan(fmean([10, NaN])), 'nan')
+        self.assertTrue(math.isnan(fmean([NaN, Inf])), 'nan and infinity')
+        self.assertTrue(math.isinf(fmean([10, Inf])), 'infinity')
+        with self.assertRaises(ValueError):
+            fmean([Inf, -Inf])
 
 
 # === Tests for variances and standard deviations ===
author	Raymond Hettinger <rhettinger@users.noreply.github.com>	2019-02-21 23:06:29 (GMT)
committer	GitHub <noreply@github.com>	2019-02-21 23:06:29 (GMT)
commit	47d9987247bcc45983a6d51fd1ae46d5d356d0f8 (patch)
tree	16b7e88590f9a28ff47e8a0e041510c4a2d86756 /Lib
parent	f36f89257b30e0bf88e8aaff6da14a9a96f57b9e (diff)
download	cpython-47d9987247bcc45983a6d51fd1ae46d5d356d0f8.zip cpython-47d9987247bcc45983a6d51fd1ae46d5d356d0f8.tar.gz cpython-47d9987247bcc45983a6d51fd1ae46d5d356d0f8.tar.bz2