From 5f5124e410b4fbd36a6620dfcedb0eaef346b3c4 Mon Sep 17 00:00:00 2001 From: Neal Norwitz Date: Tue, 21 Feb 2006 09:19:45 +0000 Subject: Backport 41753: Bug #1378022, UTF-8 files with a leading BOM crashed the interpreter. Also bug #1435487 (dup). --- Misc/NEWS | 2 ++ Parser/tokenizer.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/Misc/NEWS b/Misc/NEWS index 3721a72..d6f064b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.4.3c1? Core and builtins ----------------- +- Bug #1378022, UTF-8 files with a leading BOM crashed the interpreter. + - Patch #1400181, fix unicode string formatting to not use the locale. This is how string objects work. u'%f' could use , instead of . for the decimal point. Now both strings and unicode always use periods. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index e128e85..43c3ed6 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -289,6 +289,12 @@ check_coding_spec(const char* line, int size, struct tok_state *tok, PyMem_DEL(cs); } } + if (!r) { + cs = tok->encoding; + if (!cs) + cs = "with BOM"; + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); + } return r; } -- cgit v0.12