diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2022-05-09 07:08:41 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-09 07:08:41 (GMT) |
commit | e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0 (patch) | |
tree | ddbd5234dd8bc3083003567836c2699c3696b19a /Lib/statistics.py | |
parent | 5bc2390229bbcb4f13359e867fd8a140a1d5496b (diff) | |
download | cpython-e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0.zip cpython-e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0.tar.gz cpython-e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0.tar.bz2 |
Fix inconsistent return type for statistics median_grouped() gh-92531 (#92533)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r-- | Lib/statistics.py | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index 54f4e13..2d66b05 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -611,7 +611,7 @@ def median_high(data): return data[n // 2] -def median_grouped(data, interval=1): +def median_grouped(data, interval=1.0): """Estimates the median for numeric data binned around the midpoints of consecutive, fixed-width intervals. @@ -650,35 +650,34 @@ def median_grouped(data, interval=1): by exact multiples of *interval*. This is essential for getting a correct result. The function does not check this precondition. + Inputs may be any numeric type that can be coerced to a float during + the interpolation step. + """ data = sorted(data) n = len(data) - if n == 0: + if not n: raise StatisticsError("no median for empty data") - elif n == 1: - return data[0] # Find the value at the midpoint. Remember this corresponds to the # midpoint of the class interval. x = data[n // 2] - # Generate a clear error message for non-numeric data - for obj in (x, interval): - if isinstance(obj, (str, bytes)): - raise TypeError(f'expected a number but got {obj!r}') - # Using O(log n) bisection, find where all the x values occur in the data. # All x will lie within data[i:j]. i = bisect_left(data, x) j = bisect_right(data, x, lo=i) + # Coerce to floats, raising a TypeError if not possible + try: + interval = float(interval) + x = float(x) + except ValueError: + raise TypeError(f'Value cannot be converted to a float') + # Interpolate the median using the formula found at: # https://www.cuemath.com/data/median-of-grouped-data/ - try: - L = x - interval / 2 # The lower limit of the median interval. - except TypeError: - # Coerce mixed types to float. - L = float(x) - float(interval) / 2 + L = x - interval / 2.0 # Lower limit of the median interval cf = i # Cumulative frequency of the preceding interval f = j - i # Number of elements in the median internal return L + interval * (n / 2 - cf) / f |