From 5e61f14c6dd29982da9364696bc864604b143a66 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 10 Feb 2013 17:36:00 +0200 Subject: Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError and a full traceback including line number. --- Lib/test/test_strlit.py | 36 ++++++++++++++++++++++++++++++++++++ Misc/NEWS | 3 +++ Objects/bytesobject.c | 5 +++-- Python/ast.c | 18 +++++++++++------- 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py index 6bdc6e4..a4ae198 100644 --- a/Lib/test/test_strlit.py +++ b/Lib/test/test_strlit.py @@ -50,6 +50,10 @@ f = '\u1881' assert ord(f) == 0x1881 g = r'\u1881' assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49] +h = '\U0001d120' +assert ord(h) == 0x1d120 +i = r'\U0001d120' +assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48] """ @@ -82,6 +86,24 @@ class TestLiterals(unittest.TestCase): self.assertEqual(eval(""" '\x81' """), chr(0x81)) self.assertEqual(eval(r""" '\u1881' """), chr(0x1881)) self.assertEqual(eval(""" '\u1881' """), chr(0x1881)) + self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120)) + self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120)) + + def test_eval_str_incomplete(self): + self.assertRaises(SyntaxError, eval, r""" '\x' """) + self.assertRaises(SyntaxError, eval, r""" '\x0' """) + self.assertRaises(SyntaxError, eval, r""" '\u' """) + self.assertRaises(SyntaxError, eval, r""" '\u0' """) + self.assertRaises(SyntaxError, eval, r""" '\u00' """) + self.assertRaises(SyntaxError, eval, r""" '\u000' """) + self.assertRaises(SyntaxError, eval, r""" '\U' """) + self.assertRaises(SyntaxError, eval, r""" '\U0' """) + self.assertRaises(SyntaxError, eval, r""" '\U00' """) + self.assertRaises(SyntaxError, eval, r""" '\U000' """) + self.assertRaises(SyntaxError, eval, r""" '\U0000' """) + self.assertRaises(SyntaxError, eval, r""" '\U00000' """) + self.assertRaises(SyntaxError, eval, r""" '\U000000' """) + self.assertRaises(SyntaxError, eval, r""" '\U0000000' """) def test_eval_str_raw(self): self.assertEqual(eval(""" r'x' """), 'x') @@ -91,6 +113,8 @@ class TestLiterals(unittest.TestCase): self.assertEqual(eval(""" r'\x81' """), chr(0x81)) self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881') self.assertEqual(eval(""" r'\u1881' """), chr(0x1881)) + self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120') + self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120)) def test_eval_bytes_normal(self): self.assertEqual(eval(""" b'x' """), b'x') @@ -100,6 +124,12 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, eval, """ b'\x81' """) self.assertEqual(eval(r""" b'\u1881' """), b'\\' + b'u1881') self.assertRaises(SyntaxError, eval, """ b'\u1881' """) + self.assertEqual(eval(r""" b'\U0001d120' """), b'\\' + b'U0001d120') + self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """) + + def test_eval_bytes_incomplete(self): + self.assertRaises(SyntaxError, eval, r""" b'\x' """) + self.assertRaises(SyntaxError, eval, r""" b'\x0' """) def test_eval_bytes_raw(self): self.assertEqual(eval(""" br'x' """), b'x') @@ -109,6 +139,12 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, eval, """ br'\x81' """) self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881") self.assertRaises(SyntaxError, eval, """ br'\u1881' """) + self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120") + self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """) + self.assertRaises(SyntaxError, eval, """ rb'' """) + self.assertRaises(SyntaxError, eval, """ bb'' """) + self.assertRaises(SyntaxError, eval, """ rr'' """) + self.assertRaises(SyntaxError, eval, """ brr'' """) def check_encoding(self, encoding, extra=""): modname = "xx_" + encoding.replace("-", "_") diff --git a/Misc/NEWS b/Misc/NEWS index c4793c7..6ce6f47 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.2.4 Core and Builtins ----------------- +- Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError + and a full traceback including line number. + - Issue #17173: Remove uses of locale-dependent C functions (isalpha() etc.) in the interpreter. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b60a8b0..cb5679b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -469,8 +469,9 @@ PyObject *PyBytes_DecodeEscape(const char *s, break; } if (!errors || strcmp(errors, "strict") == 0) { - PyErr_SetString(PyExc_ValueError, - "invalid \\x escape"); + PyErr_Format(PyExc_ValueError, + "invalid \\x escape at position %d", + s - 2 - (end - len)); goto failed; } if (strcmp(errors, "replace") == 0) { diff --git a/Python/ast.c b/Python/ast.c index e395c5a..edcd18b 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -1368,20 +1368,24 @@ ast_for_atom(struct compiling *c, const node *n) case STRING: { PyObject *str = parsestrplus(c, n, &bytesmode); if (!str) { - if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { + const char *errtype = NULL; + if (PyErr_ExceptionMatches(PyExc_UnicodeError)) + errtype = "unicode error"; + else if (PyErr_ExceptionMatches(PyExc_ValueError)) + errtype = "value error"; + if (errtype) { + char buf[128]; PyObject *type, *value, *tback, *errstr; PyErr_Fetch(&type, &value, &tback); errstr = PyObject_Str(value); if (errstr) { - char *s = ""; - char buf[128]; - s = _PyUnicode_AsString(errstr); - PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s); - ast_error(n, buf); + char *s = _PyUnicode_AsString(errstr); + PyOS_snprintf(buf, sizeof(buf), "(%s) %s", errtype, s); Py_DECREF(errstr); } else { - ast_error(n, "(unicode error) unknown error"); + PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype); } + ast_error(n, buf); Py_DECREF(type); Py_DECREF(value); Py_XDECREF(tback); -- cgit v0.12