From 729ad5cf561ba644322952b79051269f07bb1ec0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 9 Jun 2013 16:54:56 +0300 Subject: Issue #18038: SyntaxError raised during compilation sources with illegal encoding now always contains an encoding name. --- Lib/test/test_pep263.py | 18 ++++++++++++++++++ Misc/NEWS | 3 +++ Parser/tokenizer.c | 14 +++++++------- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py index 9286467..4b60624 100644 --- a/Lib/test/test_pep263.py +++ b/Lib/test/test_pep263.py @@ -41,6 +41,24 @@ class PEP263Test(unittest.TestCase): # two bytes in common with the UTF-8 BOM self.assertRaises(SyntaxError, eval, '\xef\xbb\x20') + def test_error_message(self): + compile('# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec') + compile('\xef\xbb\xbf\n', 'dummy', 'exec') + compile('\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'fake'): + compile('# -*- coding: fake -*-\n', 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'iso-8859-15'): + compile('\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', + 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'BOM'): + compile('\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', + 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'fake'): + compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') + with self.assertRaisesRegexp(SyntaxError, 'BOM'): + compile('\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') + + def test_main(): test_support.run_unittest(PEP263Test) diff --git a/Misc/NEWS b/Misc/NEWS index 93d2c09..784011a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,9 @@ What's New in Python 2.7.6? Core and Builtins ----------------- +- Issue #18038: SyntaxError raised during compilation sources with illegal + encoding now always contains an encoding name. + - Issue #18019: Fix crash in the repr of dictionaries containing their own views. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index ee6313b..46cf9b2 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -277,8 +277,11 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, tok->encoding = cs; tok->decoding_state = -1; } - else + else { + PyErr_Format(PyExc_SyntaxError, + "encoding problem: %s", cs); PyMem_FREE(cs); + } #else /* Without Unicode support, we cannot process the coding spec. Since there @@ -289,15 +292,12 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, } } else { /* then, compare cs with BOM */ r = (strcmp(tok->encoding, cs) == 0); + if (!r) + PyErr_Format(PyExc_SyntaxError, + "encoding problem: %s with BOM", cs); PyMem_FREE(cs); } } - if (!r) { - cs = tok->encoding; - if (!cs) - cs = "with BOM"; - PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); - } return r; } -- cgit v0.12