summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Coghlan <ncoghlan@gmail.com>2014-02-08 09:58:04 (GMT)
committerNick Coghlan <ncoghlan@gmail.com>2014-02-08 09:58:04 (GMT)
commit73afe2a972e30a3e0f87401be2fa38c67e2cb964 (patch)
treebf900c405b1584f00248558dad103050d5a31aaa
parent57e41277813163a90129a8c4d5f62f0ca68793b6 (diff)
downloadcpython-73afe2a972e30a3e0f87401be2fa38c67e2cb964.zip
cpython-73afe2a972e30a3e0f87401be2fa38c67e2cb964.tar.gz
cpython-73afe2a972e30a3e0f87401be2fa38c67e2cb964.tar.bz2
Close #20481: Disallow mixed type input in statistics
The most appropriate coercion rules are not yet clear, so simply disallowing mixed type input for 3.4. (Committed on Steven's behalf)
-rw-r--r--Doc/library/statistics.rst10
-rw-r--r--Lib/statistics.py67
-rw-r--r--Lib/test/test_statistics.py59
-rw-r--r--Misc/NEWS6
4 files changed, 66 insertions, 76 deletions
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index e6c5959..4e77838 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -20,6 +20,16 @@
This module provides functions for calculating mathematical statistics of
numeric (:class:`Real`-valued) data.
+.. note::
+
+ Unless explicitly noted otherwise, these functions support :class:`int`,
+ :class:`float`, :class:`decimal.Decimal` and :class:`fractions.Fraction`.
+ Behaviour with other types (whether in the numeric tower or not) is
+ currently unsupported. Mixed types are also undefined and
+ implementation-dependent. If your input data consists of mixed types,
+ you may be able to use :func:`map` to ensure a consistent result, e.g.
+ ``map(float, input_data)``.
+
Averages and measures of central location
-----------------------------------------
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 9359ed7..e1dfbd4 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -144,19 +144,31 @@ def _sum(data, start=0):
>>> _sum(data)
Decimal('0.6963')
+ Mixed types are currently treated as an error, except that int is
+ allowed.
"""
+ # We fail as soon as we reach a value that is not an int or the type of
+ # the first value which is not an int. E.g. _sum([int, int, float, int])
+ # is okay, but sum([int, int, float, Fraction]) is not.
+ allowed_types = set([int, type(start)])
n, d = _exact_ratio(start)
- T = type(start)
partials = {d: n} # map {denominator: sum of numerators}
# Micro-optimizations.
- coerce_types = _coerce_types
exact_ratio = _exact_ratio
partials_get = partials.get
- # Add numerators for each denominator, and track the "current" type.
+ # Add numerators for each denominator.
for x in data:
- T = _coerce_types(T, type(x))
+ _check_type(type(x), allowed_types)
n, d = exact_ratio(x)
partials[d] = partials_get(d, 0) + n
+ # Find the expected result type. If allowed_types has only one item, it
+ # will be int; if it has two, use the one which isn't int.
+ assert len(allowed_types) in (1, 2)
+ if len(allowed_types) == 1:
+ assert allowed_types.pop() is int
+ T = int
+ else:
+ T = (allowed_types - set([int])).pop()
if None in partials:
assert issubclass(T, (float, Decimal))
assert not math.isfinite(partials[None])
@@ -172,6 +184,15 @@ def _sum(data, start=0):
return T(total)
+def _check_type(T, allowed):
+ if T not in allowed:
+ if len(allowed) == 1:
+ allowed.add(T)
+ else:
+ types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
+ raise TypeError("unsupported mixed types: %s" % types)
+
+
def _exact_ratio(x):
"""Convert Real number x exactly to (numerator, denominator) pair.
@@ -228,44 +249,6 @@ def _decimal_to_ratio(d):
return (num, den)
-def _coerce_types(T1, T2):
- """Coerce types T1 and T2 to a common type.
-
- >>> _coerce_types(int, float)
- <class 'float'>
-
- Coercion is performed according to this table, where "N/A" means
- that a TypeError exception is raised.
-
- +----------+-----------+-----------+-----------+----------+
- | | int | Fraction | Decimal | float |
- +----------+-----------+-----------+-----------+----------+
- | int | int | Fraction | Decimal | float |
- | Fraction | Fraction | Fraction | N/A | float |
- | Decimal | Decimal | N/A | Decimal | float |
- | float | float | float | float | float |
- +----------+-----------+-----------+-----------+----------+
-
- Subclasses trump their parent class; two subclasses of the same
- base class will be coerced to the second of the two.
-
- """
- # Get the common/fast cases out of the way first.
- if T1 is T2: return T1
- if T1 is int: return T2
- if T2 is int: return T1
- # Subclasses trump their parent class.
- if issubclass(T2, T1): return T2
- if issubclass(T1, T2): return T1
- # Floats trump everything else.
- if issubclass(T2, float): return T2
- if issubclass(T1, float): return T1
- # Subclasses of the same base class give priority to the second.
- if T1.__base__ is T2.__base__: return T2
- # Otherwise, just give up.
- raise TypeError('cannot coerce types %r and %r' % (T1, T2))
-
-
def _counts(data):
# Generate a table of sorted (value, frequency) pairs.
table = collections.Counter(iter(data)).most_common()
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 6db821f..49b8597 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -687,6 +687,26 @@ class DecimalToRatioTest(unittest.TestCase):
self.assertRaises(ValueError, statistics._decimal_to_ratio, d)
+class CheckTypeTest(unittest.TestCase):
+ # Test _check_type private function.
+
+ def test_allowed(self):
+ # Test that a type which should be allowed is allowed.
+ allowed = set([int, float])
+ statistics._check_type(int, allowed)
+ statistics._check_type(float, allowed)
+
+ def test_not_allowed(self):
+ # Test that a type which should not be allowed raises.
+ allowed = set([int, float])
+ self.assertRaises(TypeError, statistics._check_type, Decimal, allowed)
+
+ def test_add_to_allowed(self):
+ # Test that a second type will be added to the allowed set.
+ allowed = set([int])
+ statistics._check_type(float, allowed)
+ self.assertEqual(allowed, set([int, float]))
+
# === Tests for public functions ===
@@ -881,40 +901,11 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
def test_mixed_sum(self):
- # Mixed sums are allowed.
-
- # Careful here: order matters. Can't mix Fraction and Decimal directly,
- # only after they're converted to float.
- data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")]
- self.assertEqual(self.func(data), 6.75)
-
-
-class SumInternalsTest(NumericTestCase):
- # Test internals of the sum function.
-
- def test_ignore_instance_float_method(self):
- # Test that __float__ methods on data instances are ignored.
-
- # Python typically calls __dunder__ methods on the class, not the
- # instance. The ``sum`` implementation calls __float__ directly. To
- # better match the behaviour of Python, we call it only on the class,
- # not the instance. This test will fail if somebody "fixes" that code.
-
- # Create a fake __float__ method.
- def __float__(self):
- raise AssertionError('test fails')
-
- # Inject it into an instance.
- class MyNumber(Fraction):
- pass
- x = MyNumber(3)
- x.__float__ = types.MethodType(__float__, x)
-
- # Check it works as expected.
- self.assertRaises(AssertionError, x.__float__)
- self.assertEqual(float(x), 3.0)
- # And now test the function.
- self.assertEqual(statistics._sum([1.0, 2.0, x, 4.0]), 10.0)
+ # Mixed input types are not (currently) allowed.
+ # Check that mixed data types fail.
+ self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)])
+ # And so does mixed start argument.
+ self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
class SumTortureTest(NumericTestCase):
diff --git a/Misc/NEWS b/Misc/NEWS
index 9f8f43c..c0da6aa 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -24,6 +24,12 @@ Core and Builtins
Library
-------
+- Issue #20481: For at least Python 3.4, the statistics module will require
+ that all inputs for a single operation be of a single consistent type, or
+ else a mixed of ints and a single other consistent type. This avoids
+ some interoperability issues that arose with the previous approach of
+ coercing to a suitable common type.
+
- Issue #20478: the statistics module now treats collections.Counter inputs
like any other iterable.