From 38d1715b0da55238e0c984177848f0005ebc98cf Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 21 Jan 2008 18:35:49 +0000 Subject: Issue #1882: when compiling code from a string, encoding cookies in the second line of code were not always recognized correctly. --- Lib/test/test_pep263.py | 10 +++++++++- Parser/tokenizer.c | 15 +++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py index 3b09c12..1a85f3b 100644 --- a/Lib/test/test_pep263.py +++ b/Lib/test/test_pep263.py @@ -1,5 +1,5 @@ #! -*- coding: koi8-r -*- -# This file is marked as binary in the CVS, to prevent MacCVS from recoding it. +# This file is marked as binary in SVN, to prevent MacCVS from recoding it. import unittest from test import test_support @@ -16,6 +16,14 @@ class PEP263Test(unittest.TestCase): '\\\xd0\x9f' ) + def test_compilestring(self): + # see #1882 + c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec") + d = {} + exec c in d + self.assertEqual(d['u'], u'\xf3') + + def test_main(): test_support.run_unittest(PEP263Test) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 0aaec19..bbfbe7d 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -586,6 +586,7 @@ decode_str(const char *str, struct tok_state *tok) { PyObject* utf8 = NULL; const char *s; + char *newl[2] = {NULL, NULL}; int lineno = 0; tok->enc = NULL; tok->str = str; @@ -604,13 +605,23 @@ decode_str(const char *str, struct tok_state *tok) for (s = str;; s++) { if (*s == '\0') break; else if (*s == '\n') { + newl[lineno] = s; lineno++; if (lineno == 2) break; } } tok->enc = NULL; - if (!check_coding_spec(str, s - str, tok, buf_setreadl)) - return error_ret(tok); + /* need to check line 1 and 2 separately since check_coding_spec + assumes a single line as input */ + if (newl[0]) { + if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) + return error_ret(tok); + if (tok->enc == NULL && newl[1]) { + if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], + tok, buf_setreadl)) + return error_ret(tok); + } + } #ifdef Py_USING_UNICODE if (tok->enc != NULL) { assert(utf8 == NULL); -- cgit v0.12