diff options
-rw-r--r-- | Doc/reference/datamodel.rst | 14 | ||||
-rw-r--r-- | Lib/tarfile.py | 2 | ||||
-rw-r--r-- | Lib/test/test_format.py | 5 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 29 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Objects/setobject.c | 15 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 35 |
7 files changed, 86 insertions, 18 deletions
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 26d93a6..4f19b37 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -2080,9 +2080,17 @@ left undefined. .. method:: object.__index__(self) - Called to implement :func:`operator.index`. Also called whenever Python needs - an integer object (such as in slicing, or in the built-in :func:`bin`, - :func:`hex` and :func:`oct` functions). Must return an integer. + Called to implement :func:`operator.index`, and whenever Python needs to + losslessly convert the numeric object to an integer object (such as in + slicing, or in the built-in :func:`bin`, :func:`hex` and :func:`oct` + functions). Presence of this method indicates that the numeric object is + an integer type. Must return an integer. + + .. note:: + + When :meth:`__index__` is defined, :meth:`__int__` should also be defined, + and both shuld return the same value, in order to have a coherent integer + type class. .. _context-managers: diff --git a/Lib/tarfile.py b/Lib/tarfile.py index f4df6c7..aec7009 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -196,7 +196,7 @@ def itn(n, digits=8, format=DEFAULT_FORMAT): # A 0o200 byte indicates a positive number, a 0o377 byte a negative # number. if 0 <= n < 8 ** (digits - 1): - s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL + s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): if n >= 0: s = bytearray([0o200]) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 29330f9..4b1fdf9 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -142,7 +142,6 @@ class FormatTest(unittest.TestCase): testformat("%#+027.23X", big, "+0X0001234567890ABCDEF12345") # same, except no 0 flag testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345") - testformat("%x", float(big), "123456_______________", 6) big = 0o12345670123456701234567012345670 # 32 octal digits testformat("%o", big, "12345670123456701234567012345670") testformat("%o", -big, "-12345670123456701234567012345670") @@ -182,7 +181,6 @@ class FormatTest(unittest.TestCase): testformat("%034.33o", big, "0012345670123456701234567012345670") # base marker shouldn't change that testformat("%0#34.33o", big, "0o012345670123456701234567012345670") - testformat("%o", float(big), "123456__________________________", 6) # Some small ints, in both Python int and flavors). testformat("%d", 42, "42") testformat("%d", -42, "-42") @@ -193,7 +191,6 @@ class FormatTest(unittest.TestCase): testformat("%#x", 1, "0x1") testformat("%#X", 1, "0X1") testformat("%#X", 1, "0X1") - testformat("%#x", 1.0, "0x1") testformat("%#o", 1, "0o1") testformat("%#o", 1, "0o1") testformat("%#o", 0, "0o0") @@ -210,12 +207,10 @@ class FormatTest(unittest.TestCase): testformat("%x", -0x42, "-42") testformat("%x", 0x42, "42") testformat("%x", -0x42, "-42") - testformat("%x", float(0x42), "42") testformat("%o", 0o42, "42") testformat("%o", -0o42, "-42") testformat("%o", 0o42, "42") testformat("%o", -0o42, "-42") - testformat("%o", float(0o42), "42") testformat("%r", "\u0378", "'\\u0378'") # non printable testformat("%a", "\u0378", "'\\u0378'") # non printable testformat("%r", "\u0374", "'\u0374'") # printable diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index c0d5dae..f64a962 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1126,6 +1126,35 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual('%.1s' % "a\xe9\u20ac", 'a') self.assertEqual('%.2s' % "a\xe9\u20ac", 'a\xe9') + #issue 19995 + class PsuedoInt: + def __init__(self, value): + self.value = int(value) + def __int__(self): + return self.value + def __index__(self): + return self.value + class PsuedoFloat: + def __init__(self, value): + self.value = float(value) + def __int__(self): + return int(self.value) + pi = PsuedoFloat(3.1415) + letter_m = PsuedoInt(109) + self.assertEquals('%x' % 42, '2a') + self.assertEquals('%X' % 15, 'F') + self.assertEquals('%o' % 9, '11') + self.assertEquals('%c' % 109, 'm') + self.assertEquals('%x' % letter_m, '6d') + self.assertEquals('%X' % letter_m, '6D') + self.assertEquals('%o' % letter_m, '155') + self.assertEquals('%c' % letter_m, 'm') + self.assertRaises(TypeError, '%x'.__mod__, pi) + self.assertRaises(TypeError, '%x'.__mod__, 3.14) + self.assertRaises(TypeError, '%X'.__mod__, 2.11) + self.assertRaises(TypeError, '%o'.__mod__, 1.79) + self.assertRaises(TypeError, '%c'.__mod__, pi) + def test_formatting_with_enum(self): # issue18780 import enum @@ -30,6 +30,10 @@ Core and Builtins - Issue #19969: PyBytes_FromFormatV() now raises an OverflowError if "%c" argument is not in range [0; 255]. +- Issue #19995: %c, %o, %x, and %X now raise TypeError on non-integer input; + reworded docs to clarify that an integer type should define both __int__ + and __index__. + - Issue #19787: PyThread_set_key_value() now always set the value. In Python 3.3, the function did nothing if the key already exists (if the current value is a non-NULL pointer). diff --git a/Objects/setobject.c b/Objects/setobject.c index b0803f6..fa6a6d0 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -738,6 +738,17 @@ set_traverse(PySetObject *so, visitproc visit, void *arg) static Py_hash_t frozenset_hash(PyObject *self) { + /* Most of the constants in this hash algorithm are randomly choosen + large primes with "interesting bit patterns" and that passed + tests for good collision statistics on a variety of problematic + datasets such as: + + ps = [] + for r in range(21): + ps += itertools.combinations(range(20), r) + num_distinct_hashes = len({hash(frozenset(s)) for s in ps}) + + */ PySetObject *so = (PySetObject *)self; Py_uhash_t h, hash = 1927868237UL; setentry *entry; @@ -754,8 +765,10 @@ frozenset_hash(PyObject *self) hashes so that many distinct combinations collapse to only a handful of distinct hash values. */ h = entry->hash; - hash ^= (h ^ (h << 16) ^ 89869747UL) * 3644798167UL; + hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; } + /* Make the final result spread-out in a different pattern + than the algorithem for tuples or other python objects. */ hash = hash * 69069U + 907133923UL; if (hash == -1) hash = 590923713UL; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cdbaa0c..fc6f0d0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13988,7 +13988,7 @@ formatlong(PyObject *val, struct unicode_format_arg_t *arg) return result; } -/* Format an integer. +/* Format an integer or a float as an integer. * Return 1 if the number has been formatted into the writer, * 0 if the number has been formatted into *p_output * -1 and raise an exception on error */ @@ -14005,11 +14005,19 @@ mainformatlong(PyObject *v, goto wrongtype; if (!PyLong_Check(v)) { - iobj = PyNumber_Long(v); - if (iobj == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - goto wrongtype; - return -1; + if (type == 'o' || type == 'x' || type == 'X') { + iobj = PyNumber_Index(v); + if (iobj == NULL) { + return -1; + } + } + else { + iobj = PyNumber_Long(v); + if (iobj == NULL ) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + goto wrongtype; + return -1; + } } assert(PyLong_Check(iobj)); } @@ -14079,8 +14087,18 @@ formatchar(PyObject *v) goto onError; } else { - /* Integer input truncated to a character */ + PyObject *iobj; long x; + /* make sure number is a type of integer */ + if (!PyLong_Check(v)) { + iobj = PyNumber_Index(v); + if (iobj == NULL) { + goto onError; + } + v = iobj; + Py_DECREF(iobj); + } + /* Integer input truncated to a character */ x = PyLong_AsLong(v); if (x == -1 && PyErr_Occurred()) goto onError; @@ -14282,7 +14300,8 @@ unicode_format_arg_parse(struct unicode_formatter_t *ctx, /* Format one argument. Supported conversion specifiers: - "s", "r", "a": any type - - "i", "d", "u", "o", "x", "X": int + - "i", "d", "u": int or float + - "o", "x", "X": int - "e", "E", "f", "F", "g", "G": float - "c": int or str (1 character) |