diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2012-04-20 12:37:17 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2012-04-20 12:37:17 (GMT) |
commit | 63c39fe38e54c986a70dd9f97acf444837d1d244 (patch) | |
tree | 038aad8667c580ef236848618548010de9e14b4d /Lib | |
parent | 7b17a4e117fa6ad9f0063aa2f039930f40d91820 (diff) | |
parent | 63674f4b52aa7c2832fec09a026e24cd521e491b (diff) | |
download | cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.zip cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.tar.gz cpython-63c39fe38e54c986a70dd9f97acf444837d1d244.tar.bz2 |
merge 3.2: issue 14629
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_tokenize.py | 10 | ||||
-rw-r--r-- | Lib/tokenize.py | 7 |
2 files changed, 15 insertions, 2 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index db87e11..11590ea 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -838,6 +838,16 @@ class TestDetectEncoding(TestCase): found, consumed_lines = detect_encoding(rl) self.assertEqual(found, "iso-8859-1") + def test_syntaxerror_latin1(self): + # Issue 14629: need to raise SyntaxError if the first + # line(s) have non-UTF-8 characters + lines = ( + b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S + ) + readline = self.get_readline(lines) + self.assertRaises(SyntaxError, detect_encoding, readline) + + def test_utf8_normalization(self): # See get_normal_name() in tokenizer.c. encodings = ("utf-8", "utf-8-mac", "utf-8-unix") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 741417a..c05f764 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -364,9 +364,12 @@ def detect_encoding(readline): def find_cookie(line): try: - line_string = line.decode('ascii') + # Decode as UTF-8. Either the line is an encoding declaration, + # in which case it should be pure ASCII, or it must be UTF-8 + # per default encoding. + line_string = line.decode('utf-8') except UnicodeDecodeError: - return None + raise SyntaxError("invalid or missing encoding declaration") matches = cookie_re.findall(line_string) if not matches: |