diff options
-rw-r--r-- | Lib/statistics.py | 27 | ||||
-rw-r--r-- | Lib/test/test_statistics.py | 6 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst | 3 |
3 files changed, 22 insertions, 14 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index 54f4e13..2d66b05 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -611,7 +611,7 @@ def median_high(data): return data[n // 2] -def median_grouped(data, interval=1): +def median_grouped(data, interval=1.0): """Estimates the median for numeric data binned around the midpoints of consecutive, fixed-width intervals. @@ -650,35 +650,34 @@ def median_grouped(data, interval=1): by exact multiples of *interval*. This is essential for getting a correct result. The function does not check this precondition. + Inputs may be any numeric type that can be coerced to a float during + the interpolation step. + """ data = sorted(data) n = len(data) - if n == 0: + if not n: raise StatisticsError("no median for empty data") - elif n == 1: - return data[0] # Find the value at the midpoint. Remember this corresponds to the # midpoint of the class interval. x = data[n // 2] - # Generate a clear error message for non-numeric data - for obj in (x, interval): - if isinstance(obj, (str, bytes)): - raise TypeError(f'expected a number but got {obj!r}') - # Using O(log n) bisection, find where all the x values occur in the data. # All x will lie within data[i:j]. i = bisect_left(data, x) j = bisect_right(data, x, lo=i) + # Coerce to floats, raising a TypeError if not possible + try: + interval = float(interval) + x = float(x) + except ValueError: + raise TypeError(f'Value cannot be converted to a float') + # Interpolate the median using the formula found at: # https://www.cuemath.com/data/median-of-grouped-data/ - try: - L = x - interval / 2 # The lower limit of the median interval. - except TypeError: - # Coerce mixed types to float. - L = float(x) - float(interval) / 2 + L = x - interval / 2.0 # Lower limit of the median interval cf = i # Cumulative frequency of the preceding interval f = j - i # Number of elements in the median internal return L + interval * (n / 2 - cf) / f diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index ed6021d..6de9824 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -1742,6 +1742,12 @@ class TestMedianGrouped(TestMedian): data = [x]*count self.assertEqual(self.func(data), float(x)) + def test_single_value(self): + # Override method from AverageMixin. + # Average of a single value is the value as a float. + for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')): + self.assertEqual(self.func([x]), float(x)) + def test_odd_fractions(self): # Test median_grouped works with an odd number of Fractions. F = Fraction diff --git a/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst new file mode 100644 index 0000000..574fa6c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst @@ -0,0 +1,3 @@ +The statistics.median_grouped() function now always return a float. +Formerly, it did not convert the input type when for sequences of length +one. |