summaryrefslogtreecommitdiffstats
path: root/Lib/statistics.py
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2022-05-09 07:08:41 (GMT)
committerGitHub <noreply@github.com>2022-05-09 07:08:41 (GMT)
commite01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0 (patch)
treeddbd5234dd8bc3083003567836c2699c3696b19a /Lib/statistics.py
parent5bc2390229bbcb4f13359e867fd8a140a1d5496b (diff)
downloadcpython-e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0.zip
cpython-e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0.tar.gz
cpython-e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0.tar.bz2
Fix inconsistent return type for statistics median_grouped() gh-92531 (#92533)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r--Lib/statistics.py27
1 files changed, 13 insertions, 14 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 54f4e13..2d66b05 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -611,7 +611,7 @@ def median_high(data):
return data[n // 2]
-def median_grouped(data, interval=1):
+def median_grouped(data, interval=1.0):
"""Estimates the median for numeric data binned around the midpoints
of consecutive, fixed-width intervals.
@@ -650,35 +650,34 @@ def median_grouped(data, interval=1):
by exact multiples of *interval*. This is essential for getting a
correct result. The function does not check this precondition.
+ Inputs may be any numeric type that can be coerced to a float during
+ the interpolation step.
+
"""
data = sorted(data)
n = len(data)
- if n == 0:
+ if not n:
raise StatisticsError("no median for empty data")
- elif n == 1:
- return data[0]
# Find the value at the midpoint. Remember this corresponds to the
# midpoint of the class interval.
x = data[n // 2]
- # Generate a clear error message for non-numeric data
- for obj in (x, interval):
- if isinstance(obj, (str, bytes)):
- raise TypeError(f'expected a number but got {obj!r}')
-
# Using O(log n) bisection, find where all the x values occur in the data.
# All x will lie within data[i:j].
i = bisect_left(data, x)
j = bisect_right(data, x, lo=i)
+ # Coerce to floats, raising a TypeError if not possible
+ try:
+ interval = float(interval)
+ x = float(x)
+ except ValueError:
+ raise TypeError(f'Value cannot be converted to a float')
+
# Interpolate the median using the formula found at:
# https://www.cuemath.com/data/median-of-grouped-data/
- try:
- L = x - interval / 2 # The lower limit of the median interval.
- except TypeError:
- # Coerce mixed types to float.
- L = float(x) - float(interval) / 2
+ L = x - interval / 2.0 # Lower limit of the median interval
cf = i # Cumulative frequency of the preceding interval
f = j - i # Number of elements in the median internal
return L + interval * (n / 2 - cf) / f