diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2022-08-29 17:19:48 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-29 17:19:48 (GMT) |
commit | 3d180e3ab21c5d41d1c46e3ef349b30ba409f300 (patch) | |
tree | 919e38af24350c09fbe4f91d45268d6aa79c8b9e /Lib/statistics.py | |
parent | 873554ef84011773618911ffa698cea181cec9fd (diff) | |
download | cpython-3d180e3ab21c5d41d1c46e3ef349b30ba409f300.zip cpython-3d180e3ab21c5d41d1c46e3ef349b30ba409f300.tar.gz cpython-3d180e3ab21c5d41d1c46e3ef349b30ba409f300.tar.bz2 |
Improve accuracy for Spearman's rank correlation coefficient. (#96392)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r-- | Lib/statistics.py | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index b4676fe..b4adabd 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -379,7 +379,7 @@ def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[ [2.0, 1.0, 3.0] Ranks are conventionally numbered starting from one; however, - setting *start* to zero allow the ranks to be used as array indices: + setting *start* to zero allows the ranks to be used as array indices: >>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate'] >>> scores = [8.1, 7.3, 9.4, 8.3] @@ -1073,8 +1073,9 @@ def correlation(x, y, /, *, method='linear'): if method not in {'linear', 'ranked'}: raise ValueError(f'Unknown method: {method!r}') if method == 'ranked': - x = _rank(x) - y = _rank(y) + start = (n - 1) / -2 # Center rankings around zero + x = _rank(x, start=start) + y = _rank(y, start=start) xbar = fsum(x) / n ybar = fsum(y) / n sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) |