summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_tokenize.py
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2012-04-20 12:36:47 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2012-04-20 12:36:47 (GMT)
commit63674f4b52aa7c2832fec09a026e24cd521e491b (patch)
tree666adcb3196093e019e256d5ab9408df9436ea22 /Lib/test/test_tokenize.py
parent8e6e0fdb7fee3796df8b578c1311b5e46005f2d9 (diff)
downloadcpython-63674f4b52aa7c2832fec09a026e24cd521e491b.zip
cpython-63674f4b52aa7c2832fec09a026e24cd521e491b.tar.gz
cpython-63674f4b52aa7c2832fec09a026e24cd521e491b.tar.bz2
Issue #14629: Raise SyntaxError in tokenizer.detect_encoding
if the first two lines have non-UTF-8 characters without an encoding declaration.
Diffstat (limited to 'Lib/test/test_tokenize.py')
-rw-r--r--Lib/test/test_tokenize.py10
1 files changed, 10 insertions, 0 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 9e9656c..63d084d 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -825,6 +825,16 @@ class TestDetectEncoding(TestCase):
found, consumed_lines = detect_encoding(rl)
self.assertEqual(found, "iso-8859-1")
+ def test_syntaxerror_latin1(self):
+ # Issue 14629: need to raise SyntaxError if the first
+ # line(s) have non-UTF-8 characters
+ lines = (
+ b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
+ )
+ readline = self.get_readline(lines)
+ self.assertRaises(SyntaxError, detect_encoding, readline)
+
+
def test_utf8_normalization(self):
# See get_normal_name() in tokenizer.c.
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")