diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2014-04-05 09:56:37 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2014-04-05 09:56:37 (GMT) |
commit | 4ff33af2574aa09bc8926c0deb116c886cb745d1 (patch) | |
tree | 6e0569571539ed8db712fc7da72fe84ecdcab088 | |
parent | 89a76abf20889551ec1ed64dee1a4161a435db5b (diff) | |
download | cpython-4ff33af2574aa09bc8926c0deb116c886cb745d1.zip cpython-4ff33af2574aa09bc8926c0deb116c886cb745d1.tar.gz cpython-4ff33af2574aa09bc8926c0deb116c886cb745d1.tar.bz2 |
Issue #21118: Add unit test for invalid character replacement (code point higher than U+10ffff)
-rw-r--r-- | Lib/test/test_unicode.py | 8 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 20 |
2 files changed, 18 insertions, 10 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 58dfa20..7fda51c 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -280,6 +280,14 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})), "[]") + # invalid Unicode characters + invalid_char = 0x10ffff+1 + for before in "a\xe9\u20ac\U0010ffff": + mapping = str.maketrans({before: invalid_char}) + text = "[%s]" % before + self.assertRaises(ValueError, text.translate, mapping) + + # errors self.assertRaises(TypeError, self.type2test.maketrans) self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg') self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0386a87..2183773 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8473,10 +8473,10 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result) } else if (PyLong_Check(x)) { long value = PyLong_AS_LONG(x); - long max = PyUnicode_GetMax(); - if (value < 0 || value > max) { - PyErr_Format(PyExc_TypeError, - "character mapping must be in range(0x%x)", max+1); + if (value < 0 || value > MAX_UNICODE) { + PyErr_Format(PyExc_ValueError, + "character mapping must be in range(0x%x)", + MAX_UNICODE+1); Py_DECREF(x); return -1; } @@ -8522,7 +8522,9 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping, } if (PyLong_Check(item)) { - Py_UCS4 ch = (Py_UCS4)PyLong_AS_LONG(item); + long ch = (Py_UCS4)PyLong_AS_LONG(item); + /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already + used it */ if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) { Py_DECREF(item); return -1; @@ -8570,11 +8572,9 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch, if (PyLong_Check(item)) { long replace = (Py_UCS4)PyLong_AS_LONG(item); - if (replace == -1) { - Py_DECREF(item); - return -1; - } - if (replace < 0 || 127 < replace) { + /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already + used it */ + if (127 < replace) { /* invalid character or character outside ASCII: skip the fast translate */ goto exit; |