diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2020-06-14 02:17:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-14 02:17:28 (GMT) |
commit | 5aad027db9618f22f6fa2274e05dd50f928d2ed7 (patch) | |
tree | 52d6c6fedf6a2b4a0609fe33248bd02debab9c9b /Lib/statistics.py | |
parent | d71ab4f73887a6e2b380ddbbfe35b600d236fd4a (diff) | |
download | cpython-5aad027db9618f22f6fa2274e05dd50f928d2ed7.zip cpython-5aad027db9618f22f6fa2274e05dd50f928d2ed7.tar.gz cpython-5aad027db9618f22f6fa2274e05dd50f928d2ed7.tar.bz2 |
Some reformatting (suggested by Black) and minor factoring. (GH-20865)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r-- | Lib/statistics.py | 60 |
1 files changed, 29 insertions, 31 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index 93a4633..f9d3802 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -163,7 +163,7 @@ def _sum(data, start=0): T = _coerce(int, type(start)) for typ, values in groupby(data, type): T = _coerce(T, typ) # or raise TypeError - for n,d in map(_exact_ratio, values): + for n, d in map(_exact_ratio, values): count += 1 partials[d] = partials_get(d, 0) + n if None in partials: @@ -261,7 +261,7 @@ def _convert(value, T): return T(value) except TypeError: if issubclass(T, Decimal): - return T(value.numerator)/T(value.denominator) + return T(value.numerator) / T(value.denominator) else: raise @@ -277,8 +277,8 @@ def _find_lteq(a, x): def _find_rteq(a, l, x): 'Locate the rightmost value exactly equal to x' i = bisect_right(a, x, lo=l) - if i != (len(a)+1) and a[i-1] == x: - return i-1 + if i != (len(a) + 1) and a[i - 1] == x: + return i - 1 raise ValueError @@ -315,7 +315,7 @@ def mean(data): raise StatisticsError('mean requires at least one data point') T, total, count = _sum(data) assert count == n - return _convert(total/n, T) + return _convert(total / n, T) def fmean(data): @@ -403,11 +403,11 @@ def harmonic_mean(data): else: raise TypeError('unsupported type') try: - T, total, count = _sum(1/x for x in _fail_neg(data, errmsg)) + T, total, count = _sum(1 / x for x in _fail_neg(data, errmsg)) except ZeroDivisionError: return 0 assert count == n - return _convert(n/total, T) + return _convert(n / total, T) # FIXME: investigate ways to calculate medians without sorting? Quickselect? @@ -428,11 +428,11 @@ def median(data): n = len(data) if n == 0: raise StatisticsError("no median for empty data") - if n%2 == 1: - return data[n//2] + if n % 2 == 1: + return data[n // 2] else: - i = n//2 - return (data[i - 1] + data[i])/2 + i = n // 2 + return (data[i - 1] + data[i]) / 2 def median_low(data): @@ -451,10 +451,10 @@ def median_low(data): n = len(data) if n == 0: raise StatisticsError("no median for empty data") - if n%2 == 1: - return data[n//2] + if n % 2 == 1: + return data[n // 2] else: - return data[n//2 - 1] + return data[n // 2 - 1] def median_high(data): @@ -473,7 +473,7 @@ def median_high(data): n = len(data) if n == 0: raise StatisticsError("no median for empty data") - return data[n//2] + return data[n // 2] def median_grouped(data, interval=1): @@ -510,15 +510,15 @@ def median_grouped(data, interval=1): return data[0] # Find the value at the midpoint. Remember this corresponds to the # centre of the class interval. - x = data[n//2] + x = data[n // 2] for obj in (x, interval): if isinstance(obj, (str, bytes)): raise TypeError('expected number but got %r' % obj) try: - L = x - interval/2 # The lower limit of the median interval. + L = x - interval / 2 # The lower limit of the median interval. except TypeError: # Mixed type. For now we just coerce to float. - L = float(x) - float(interval)/2 + L = float(x) - float(interval) / 2 # Uses bisection search to search for x in data with log(n) time complexity # Find the position of leftmost occurrence of x in data @@ -528,7 +528,7 @@ def median_grouped(data, interval=1): l2 = _find_rteq(data, l1, x) cf = l1 f = l2 - l1 + 1 - return L + interval*(n/2 - cf)/f + return L + interval * (n / 2 - cf) / f def mode(data): @@ -554,8 +554,7 @@ def mode(data): If *data* is empty, ``mode``, raises StatisticsError. """ - data = iter(data) - pairs = Counter(data).most_common(1) + pairs = Counter(iter(data)).most_common(1) try: return pairs[0][0] except IndexError: @@ -597,7 +596,7 @@ def multimode(data): # For sample data where there is a positive probability for values # beyond the range of the data, the R6 exclusive method is a # reasonable choice. Consider a random sample of nine values from a -# population with a uniform distribution from 0.0 to 100.0. The +# population with a uniform distribution from 0.0 to 1.0. The # distribution of the third ranked sample point is described by # betavariate(alpha=3, beta=7) which has mode=0.250, median=0.286, and # mean=0.300. Only the latter (which corresponds with R6) gives the @@ -643,9 +642,8 @@ def quantiles(data, *, n=4, method='exclusive'): m = ld - 1 result = [] for i in range(1, n): - j = i * m // n - delta = i*m - j*n - interpolated = (data[j] * (n - delta) + data[j+1] * delta) / n + j, delta = divmod(i * m, n) + interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n result.append(interpolated) return result if method == 'exclusive': @@ -655,7 +653,7 @@ def quantiles(data, *, n=4, method='exclusive'): j = i * m // n # rescale i to m/n j = 1 if j < 1 else ld-1 if j > ld-1 else j # clamp to 1 .. ld-1 delta = i*m - j*n # exact integer math - interpolated = (data[j-1] * (n - delta) + data[j] * delta) / n + interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n result.append(interpolated) return result raise ValueError(f'Unknown method: {method!r}') @@ -689,9 +687,9 @@ def _ss(data, c=None): T, total, count = _sum((x-c)**2 for x in data) # The following sum should mathematically equal zero, but due to rounding # error may not. - U, total2, count2 = _sum((x-c) for x in data) + U, total2, count2 = _sum((x - c) for x in data) assert T == U and count == count2 - total -= total2**2/len(data) + total -= total2 ** 2 / len(data) assert not total < 0, 'negative sum of square deviations: %f' % total return (T, total) @@ -740,7 +738,7 @@ def variance(data, xbar=None): if n < 2: raise StatisticsError('variance requires at least two data points') T, ss = _ss(data, xbar) - return _convert(ss/(n-1), T) + return _convert(ss / (n - 1), T) def pvariance(data, mu=None): @@ -784,7 +782,7 @@ def pvariance(data, mu=None): if n < 1: raise StatisticsError('pvariance requires at least one data point') T, ss = _ss(data, mu) - return _convert(ss/n, T) + return _convert(ss / n, T) def stdev(data, xbar=None): @@ -993,7 +991,7 @@ class NormalDist: if not isinstance(other, NormalDist): raise TypeError('Expected another NormalDist instance') X, Y = self, other - if (Y._sigma, Y._mu) < (X._sigma, X._mu): # sort to assure commutativity + if (Y._sigma, Y._mu) < (X._sigma, X._mu): # sort to assure commutativity X, Y = Y, X X_var, Y_var = X.variance, Y.variance if not X_var or not Y_var: |