From db83eb3170ebdf55bd1c1add94838a9aefa8c00b Mon Sep 17 00:00:00 2001 From: Neal Norwitz Date: Sun, 18 Dec 2005 05:29:30 +0000 Subject: Fix Bug #1378022, UTF-8 files with a leading BOM crashed the interpreter. Needs backport. --- Lib/test/bad_coding2.py | 2 ++ Lib/test/test_coding.py | 7 +++++++ Misc/NEWS | 2 ++ Parser/tokenizer.c | 6 ++++++ Python/pythonrun.c | 4 ++-- 5 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 Lib/test/bad_coding2.py diff --git a/Lib/test/bad_coding2.py b/Lib/test/bad_coding2.py new file mode 100644 index 0000000..604b122 --- /dev/null +++ b/Lib/test/bad_coding2.py @@ -0,0 +1,2 @@ +#coding: utf8 +print '我' \ No newline at end of file diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py index aa7241d..e83015e 100644 --- a/Lib/test/test_coding.py +++ b/Lib/test/test_coding.py @@ -5,6 +5,13 @@ import os class CodingTest(unittest.TestCase): def test_bad_coding(self): module_name = 'bad_coding' + self.verify_bad_module(module_name) + + def test_bad_coding2(self): + module_name = 'bad_coding2' + self.verify_bad_module(module_name) + + def verify_bad_module(self, module_name): self.assertRaises(SyntaxError, __import__, 'test.' + module_name) path = os.path.dirname(__file__) diff --git a/Misc/NEWS b/Misc/NEWS index 1db35f4..7b27e49 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.5 alpha 1? Core and builtins ----------------- +- Bug #1378022, UTF-8 files with a leading BOM crashed the interpreter. + - Support for converting hex strings to floats no longer works. This was not portable. float('0x3') now raises a ValueError. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 37e6c33..a79ea81 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -292,6 +292,12 @@ check_coding_spec(const char* line, int size, struct tok_state *tok, PyMem_DEL(cs); } } + if (!r) { + cs = tok->encoding; + if (!cs) + cs = "with BOM"; + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); + } return r; } diff --git a/Python/pythonrun.c b/Python/pythonrun.c index a7f4fe7..30cb518 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1439,8 +1439,8 @@ err_input(perrdetail *err) } if (msg == NULL) msg = "unknown decode error"; - Py_DECREF(type); - Py_DECREF(value); + Py_XDECREF(type); + Py_XDECREF(value); Py_XDECREF(tb); break; } -- cgit v0.12