diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2024-02-25 23:46:47 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-25 23:46:47 (GMT) |
commit | 6d34eb0e36d3a7edd9e7629f21da39b6a74b8f68 (patch) | |
tree | 0ac0ed92cdfe54d2f9ff6b0e387c424c8324dcf0 /Lib/test/test_statistics.py | |
parent | 6a3236fe2e61673cf9f819534afbf14a18678408 (diff) | |
download | cpython-6d34eb0e36d3a7edd9e7629f21da39b6a74b8f68.zip cpython-6d34eb0e36d3a7edd9e7629f21da39b6a74b8f68.tar.gz cpython-6d34eb0e36d3a7edd9e7629f21da39b6a74b8f68.tar.bz2 |
gh-115532: Add kernel density estimation to the statistics module (gh-115863)
Diffstat (limited to 'Lib/test/test_statistics.py')
-rw-r--r-- | Lib/test/test_statistics.py | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index bf2c254..1cf4163 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2353,6 +2353,66 @@ class TestGeometricMean(unittest.TestCase): self.assertAlmostEqual(actual_mean, expected_mean, places=5) +class TestKDE(unittest.TestCase): + + def test_kde(self): + kde = statistics.kde + StatisticsError = statistics.StatisticsError + + kernels = ['normal', 'gauss', 'logistic', 'sigmoid', 'rectangular', + 'uniform', 'triangular', 'parabolic', 'epanechnikov', + 'quartic', 'biweight', 'triweight', 'cosine'] + + sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] + + # The approximate integral of a PDF should be close to 1.0 + + def integrate(func, low, high, steps=10_000): + "Numeric approximation of a definite function integral." + dx = (high - low) / steps + midpoints = (low + (i + 1/2) * dx for i in range(steps)) + return sum(map(func, midpoints)) * dx + + for kernel in kernels: + with self.subTest(kernel=kernel): + f_hat = kde(sample, h=1.5, kernel=kernel) + area = integrate(f_hat, -20, 20) + self.assertAlmostEqual(area, 1.0, places=4) + + # Check error cases + + with self.assertRaises(StatisticsError): + kde([], h=1.0) # Empty dataset + with self.assertRaises(TypeError): + kde(['abc', 'def'], 1.5) # Non-numeric data + with self.assertRaises(TypeError): + kde(iter(sample), 1.5) # Data is not a sequence + with self.assertRaises(StatisticsError): + kde(sample, h=0.0) # Zero bandwidth + with self.assertRaises(StatisticsError): + kde(sample, h=0.0) # Negative bandwidth + with self.assertRaises(TypeError): + kde(sample, h='str') # Wrong bandwidth type + with self.assertRaises(StatisticsError): + kde(sample, h=1.0, kernel='bogus') # Invalid kernel + + # Test name and docstring of the generated function + + h = 1.5 + kernel = 'cosine' + f_hat = kde(sample, h, kernel) + self.assertEqual(f_hat.__name__, 'pdf') + self.assertIn(kernel, f_hat.__doc__) + self.assertIn(str(h), f_hat.__doc__) + + # Test closed interval for the support boundaries. + # In particular, 'uniform' should non-zero at the boundaries. + + f_hat = kde([0], 1.0, 'uniform') + self.assertEqual(f_hat(-1.0), 1/2) + self.assertEqual(f_hat(1.0), 1/2) + + class TestQuantiles(unittest.TestCase): def test_specific_cases(self): |