diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2024-05-04 04:13:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-04 04:13:36 (GMT) |
commit | 42dc5b4ace39a3983cd9853719527f4724693adc (patch) | |
tree | 9c19d33694a49367e64864111e57bafa8e86b88b /Lib/test | |
parent | 1b7e5e6e60e0d22b2a928cbbb36ebb989183450f (diff) | |
download | cpython-42dc5b4ace39a3983cd9853719527f4724693adc.zip cpython-42dc5b4ace39a3983cd9853719527f4724693adc.tar.gz cpython-42dc5b4ace39a3983cd9853719527f4724693adc.tar.bz2 |
gh-115532 Add kde_random() to the statistic module (#118210)
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_statistics.py | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 204787a..fe6c59c 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2426,6 +2426,86 @@ class TestKDE(unittest.TestCase): self.assertEqual(f_hat(-1.0), 1/2) self.assertEqual(f_hat(1.0), 1/2) + def test_kde_kernel_invcdfs(self): + kernel_invcdfs = statistics._kernel_invcdfs + kde = statistics.kde + + # Verify that cdf / invcdf will round trip + xarr = [i/100 for i in range(-100, 101)] + for kernel, invcdf in kernel_invcdfs.items(): + with self.subTest(kernel=kernel): + cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True) + for x in xarr: + self.assertAlmostEqual(invcdf(cdf(x)), x, places=5) + + def test_kde_random(self): + kde_random = statistics.kde_random + StatisticsError = statistics.StatisticsError + kernels = ['normal', 'gauss', 'logistic', 'sigmoid', 'rectangular', + 'uniform', 'triangular', 'parabolic', 'epanechnikov', + 'quartic', 'biweight', 'triweight', 'cosine'] + sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] + + # Smoke test + + for kernel in kernels: + with self.subTest(kernel=kernel): + rand = kde_random(sample, h=1.5, kernel=kernel) + selections = [rand() for i in range(10)] + + # Check error cases + + with self.assertRaises(StatisticsError): + kde_random([], h=1.0) # Empty dataset + with self.assertRaises(TypeError): + kde_random(['abc', 'def'], 1.5) # Non-numeric data + with self.assertRaises(TypeError): + kde_random(iter(sample), 1.5) # Data is not a sequence + with self.assertRaises(StatisticsError): + kde_random(sample, h=0.0) # Zero bandwidth + with self.assertRaises(StatisticsError): + kde_random(sample, h=0.0) # Negative bandwidth + with self.assertRaises(TypeError): + kde_random(sample, h='str') # Wrong bandwidth type + with self.assertRaises(StatisticsError): + kde_random(sample, h=1.0, kernel='bogus') # Invalid kernel + + # Test name and docstring of the generated function + + h = 1.5 + kernel = 'cosine' + prng = kde_random(sample, h, kernel) + self.assertEqual(prng.__name__, 'rand') + self.assertIn(kernel, prng.__doc__) + self.assertIn(repr(h), prng.__doc__) + + # Approximate distribution test: Compare a random sample to the expected distribution + + data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2, 7.8, 14.3, 15.1, 15.3, 15.8, 17.0] + n = 1_000_000 + h = 1.75 + dx = 0.1 + + def p_expected(x): + return F_hat(x + dx) - F_hat(x - dx) + + def p_observed(x): + # P(x-dx <= X < x+dx) / (2*dx) + i = bisect.bisect_left(big_sample, x - dx) + j = bisect.bisect_right(big_sample, x + dx) + return (j - i) / len(big_sample) + + for kernel in kernels: + with self.subTest(kernel=kernel): + + F_hat = statistics.kde(data, h, kernel, cumulative=True) + rand = kde_random(data, h, kernel, seed=8675309**2) + big_sample = sorted([rand() for i in range(n)]) + + for x in range(-40, 190): + x /= 10 + self.assertTrue(math.isclose(p_observed(x), p_expected(x), abs_tol=0.001)) + class TestQuantiles(unittest.TestCase): |