summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2022-08-29 17:19:48 (GMT)
committerGitHub <noreply@github.com>2022-08-29 17:19:48 (GMT)
commit3d180e3ab21c5d41d1c46e3ef349b30ba409f300 (patch)
tree919e38af24350c09fbe4f91d45268d6aa79c8b9e /Lib
parent873554ef84011773618911ffa698cea181cec9fd (diff)
downloadcpython-3d180e3ab21c5d41d1c46e3ef349b30ba409f300.zip
cpython-3d180e3ab21c5d41d1c46e3ef349b30ba409f300.tar.gz
cpython-3d180e3ab21c5d41d1c46e3ef349b30ba409f300.tar.bz2
Improve accuracy for Spearman's rank correlation coefficient. (#96392)
Diffstat (limited to 'Lib')
-rw-r--r--Lib/statistics.py7
1 files changed, 4 insertions, 3 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index b4676fe..b4adabd 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -379,7 +379,7 @@ def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[
[2.0, 1.0, 3.0]
Ranks are conventionally numbered starting from one; however,
- setting *start* to zero allow the ranks to be used as array indices:
+ setting *start* to zero allows the ranks to be used as array indices:
>>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate']
>>> scores = [8.1, 7.3, 9.4, 8.3]
@@ -1073,8 +1073,9 @@ def correlation(x, y, /, *, method='linear'):
if method not in {'linear', 'ranked'}:
raise ValueError(f'Unknown method: {method!r}')
if method == 'ranked':
- x = _rank(x)
- y = _rank(y)
+ start = (n - 1) / -2 # Center rankings around zero
+ x = _rank(x, start=start)
+ y = _rank(y, start=start)
xbar = fsum(x) / n
ybar = fsum(y) / n
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))