diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-01-09 16:36:09 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-01-09 16:36:09 (GMT) |
commit | 768c16ce0273a74fa846cc388753280b17b02cfc (patch) | |
tree | d2fc7f94a08fb20f882e3e0b299a59fea1251aa8 /Lib/test | |
parent | 21e7d4cd5eb5a1ee153baf4c7915db80e6ca59e1 (diff) | |
download | cpython-768c16ce0273a74fa846cc388753280b17b02cfc.zip cpython-768c16ce0273a74fa846cc388753280b17b02cfc.tar.gz cpython-768c16ce0273a74fa846cc388753280b17b02cfc.tar.bz2 |
Issue #18960: Fix bugs with Python source code encoding in the second line.
* The first line of Python script could be executed twice when the source
encoding (not equal to 'utf-8') was specified on the second line.
* Now the source encoding declaration on the second line isn't effective if
the first line contains anything except a comment.
* As a consequence, 'python -x' works now again with files with the source
encoding declarations specified on the second file, and can be used again
to make Python batch files on Windows.
* The tokenize module now ignore the source encoding declaration on the second
line if the first line contains anything except a comment.
* IDLE now ignores the source encoding declaration on the second line if the
first line contains anything except a comment.
* 2to3 and the findnocoding.py script now ignore the source encoding
declaration on the second line if the first line contains anything except
a comment.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_tokenize.py | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 1765085..6ed8597 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -885,6 +885,39 @@ class TestDetectEncoding(TestCase): readline = self.get_readline(lines) self.assertRaises(SyntaxError, detect_encoding, readline) + def test_cookie_second_line_noncommented_first_line(self): + lines = ( + b"print('\xc2\xa3')\n", + b'# vim: set fileencoding=iso8859-15 :\n', + b"print('\xe2\x82\xac')\n" + ) + encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'utf-8') + expected = [b"print('\xc2\xa3')\n"] + self.assertEqual(consumed_lines, expected) + + def test_cookie_second_line_commented_first_line(self): + lines = ( + b"#print('\xc2\xa3')\n", + b'# vim: set fileencoding=iso8859-15 :\n', + b"print('\xe2\x82\xac')\n" + ) + encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso8859-15') + expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n'] + self.assertEqual(consumed_lines, expected) + + def test_cookie_second_line_empty_first_line(self): + lines = ( + b'\n', + b'# vim: set fileencoding=iso8859-15 :\n', + b"print('\xe2\x82\xac')\n" + ) + encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso8859-15') + expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n'] + self.assertEqual(consumed_lines, expected) + def test_latin1_normalization(self): # See get_normal_name() in tokenizer.c. encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix", |