Issue #1882: when compiling code from a string, encoding cookies in the

second line of code were not always recognized correctly.
author: Georg Brandl <georg@python.org> 2008-01-21 18:35:49 (GMT)
committer: Georg Brandl <georg@python.org> 2008-01-21 18:35:49 (GMT)
commit: 38d1715b0da55238e0c984177848f0005ebc98cf (patch)
tree: 44de5e0473c478ee822fe412455a8fffacd2af7d
parent: 2bdc48c6e058f00892d006f36f51b3d1c0fcfc43 (diff)
download: cpython-38d1715b0da55238e0c984177848f0005ebc98cf.zip
cpython-38d1715b0da55238e0c984177848f0005ebc98cf.tar.gz
cpython-38d1715b0da55238e0c984177848f0005ebc98cf.tar.bz2
2 files changed, 22 insertions, 3 deletions
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index 3b09c12..1a85f3b 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -1,5 +1,5 @@
 #! -*- coding: koi8-r -*-
-# This file is marked as binary in the CVS, to prevent MacCVS from recoding it.
+# This file is marked as binary in SVN, to prevent MacCVS from recoding it.
 
 import unittest
 from test import test_support
@@ -16,6 +16,14 @@ class PEP263Test(unittest.TestCase):
             '\\\xd0\x9f'
         )
 
+    def test_compilestring(self):
+        # see #1882
+        c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
+        d = {}
+        exec c in d
+        self.assertEqual(d['u'], u'\xf3')
+
+
 def test_main():
     test_support.run_unittest(PEP263Test)
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 0aaec19..bbfbe7d 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -586,6 +586,7 @@ decode_str(const char *str, struct tok_state *tok)
 {
 	PyObject* utf8 = NULL;
 	const char *s;
+	char *newl[2] = {NULL, NULL};
 	int lineno = 0;
 	tok->enc = NULL;
 	tok->str = str;
@@ -604,13 +605,23 @@ decode_str(const char *str, struct tok_state *tok)
 	for (s = str;; s++) {
 		if (*s == '\0') break;
 		else if (*s == '\n') {
+			newl[lineno] = s;
 			lineno++;
 			if (lineno == 2) break;
 		}
 	}
 	tok->enc = NULL;
-	if (!check_coding_spec(str, s - str, tok, buf_setreadl))
-		return error_ret(tok);
+	/* need to check line 1 and 2 separately since check_coding_spec
+	   assumes a single line as input */
+	if (newl[0]) {
+		if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
+			return error_ret(tok);
+		if (tok->enc == NULL && newl[1]) {
+			if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
+					       tok, buf_setreadl))
+				return error_ret(tok);
+		}
+	}
 #ifdef Py_USING_UNICODE
 	if (tok->enc != NULL) {
 		assert(utf8 == NULL);
author	Georg Brandl <georg@python.org>	2008-01-21 18:35:49 (GMT)
committer	Georg Brandl <georg@python.org>	2008-01-21 18:35:49 (GMT)
commit	38d1715b0da55238e0c984177848f0005ebc98cf (patch)
tree	44de5e0473c478ee822fe412455a8fffacd2af7d
parent	2bdc48c6e058f00892d006f36f51b3d1c0fcfc43 (diff)
download	cpython-38d1715b0da55238e0c984177848f0005ebc98cf.zip cpython-38d1715b0da55238e0c984177848f0005ebc98cf.tar.gz cpython-38d1715b0da55238e0c984177848f0005ebc98cf.tar.bz2