merge 3.2: issue 14629

author: Martin v. Löwis <martin@v.loewis.de> 2012-04-20 12:37:17 (GMT)
committer: Martin v. Löwis <martin@v.loewis.de> 2012-04-20 12:37:17 (GMT)
commit: 63c39fe38e54c986a70dd9f97acf444837d1d244 (patch)
tree: 038aad8667c580ef236848618548010de9e14b4d /Lib/test/test_tokenize.py
parent: 7b17a4e117fa6ad9f0063aa2f039930f40d91820 (diff)
parent: 63674f4b52aa7c2832fec09a026e24cd521e491b (diff)
download: cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.zip
cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.tar.gz
cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.tar.bz2
1 files changed, 10 insertions, 0 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index db87e11..11590ea 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -838,6 +838,16 @@ class TestDetectEncoding(TestCase):
                 found, consumed_lines = detect_encoding(rl)
                 self.assertEqual(found, "iso-8859-1")
 
+    def test_syntaxerror_latin1(self):
+        # Issue 14629: need to raise SyntaxError if the first
+        # line(s) have non-UTF-8 characters
+        lines = (
+            b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
+            )
+        readline = self.get_readline(lines)
+        self.assertRaises(SyntaxError, detect_encoding, readline)
+
+
     def test_utf8_normalization(self):
         # See get_normal_name() in tokenizer.c.
         encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
author	Martin v. Löwis <martin@v.loewis.de>	2012-04-20 12:37:17 (GMT)
committer	Martin v. Löwis <martin@v.loewis.de>	2012-04-20 12:37:17 (GMT)
commit	63c39fe38e54c986a70dd9f97acf444837d1d244 (patch)
tree	038aad8667c580ef236848618548010de9e14b4d /Lib/test/test_tokenize.py
parent	7b17a4e117fa6ad9f0063aa2f039930f40d91820 (diff)
parent	63674f4b52aa7c2832fec09a026e24cd521e491b (diff)
download	cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.zip cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.tar.gz cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.tar.bz2