summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2014-04-05 09:56:37 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2014-04-05 09:56:37 (GMT)
commit4ff33af2574aa09bc8926c0deb116c886cb745d1 (patch)
tree6e0569571539ed8db712fc7da72fe84ecdcab088
parent89a76abf20889551ec1ed64dee1a4161a435db5b (diff)
downloadcpython-4ff33af2574aa09bc8926c0deb116c886cb745d1.zip
cpython-4ff33af2574aa09bc8926c0deb116c886cb745d1.tar.gz
cpython-4ff33af2574aa09bc8926c0deb116c886cb745d1.tar.bz2
Issue #21118: Add unit test for invalid character replacement (code point higher than U+10ffff)
-rw-r--r--Lib/test/test_unicode.py8
-rw-r--r--Objects/unicodeobject.c20
2 files changed, 18 insertions, 10 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 58dfa20..7fda51c 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -280,6 +280,14 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})),
"[]")
+ # invalid Unicode characters
+ invalid_char = 0x10ffff+1
+ for before in "a\xe9\u20ac\U0010ffff":
+ mapping = str.maketrans({before: invalid_char})
+ text = "[%s]" % before
+ self.assertRaises(ValueError, text.translate, mapping)
+
+ # errors
self.assertRaises(TypeError, self.type2test.maketrans)
self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0386a87..2183773 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8473,10 +8473,10 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
}
else if (PyLong_Check(x)) {
long value = PyLong_AS_LONG(x);
- long max = PyUnicode_GetMax();
- if (value < 0 || value > max) {
- PyErr_Format(PyExc_TypeError,
- "character mapping must be in range(0x%x)", max+1);
+ if (value < 0 || value > MAX_UNICODE) {
+ PyErr_Format(PyExc_ValueError,
+ "character mapping must be in range(0x%x)",
+ MAX_UNICODE+1);
Py_DECREF(x);
return -1;
}
@@ -8522,7 +8522,9 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
}
if (PyLong_Check(item)) {
- Py_UCS4 ch = (Py_UCS4)PyLong_AS_LONG(item);
+ long ch = (Py_UCS4)PyLong_AS_LONG(item);
+ /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
+ used it */
if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) {
Py_DECREF(item);
return -1;
@@ -8570,11 +8572,9 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
if (PyLong_Check(item)) {
long replace = (Py_UCS4)PyLong_AS_LONG(item);
- if (replace == -1) {
- Py_DECREF(item);
- return -1;
- }
- if (replace < 0 || 127 < replace) {
+ /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already
+ used it */
+ if (127 < replace) {
/* invalid character or character outside ASCII:
skip the fast translate */
goto exit;