summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_source_encoding.py
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2022-09-07 21:49:17 (GMT)
committerGitHub <noreply@github.com>2022-09-07 21:49:17 (GMT)
commitffafa9b91da8731d21958209dd1478f48eaa2d09 (patch)
tree91145f611b810169911fa11620ebd838532f2484 /Lib/test/test_source_encoding.py
parent9fa21d050abf5ba2b39762e320cb6e6bb8b905c2 (diff)
downloadcpython-ffafa9b91da8731d21958209dd1478f48eaa2d09.zip
cpython-ffafa9b91da8731d21958209dd1478f48eaa2d09.tar.gz
cpython-ffafa9b91da8731d21958209dd1478f48eaa2d09.tar.bz2
gh-96268: Fix loading invalid UTF-8 (GH-96270)
This makes tokenizer.c:valid_utf8 match stringlib/codecs.h:decode_utf8. It also fixes an off-by-one error introduced in 3.10 for the line number when the tokenizer reports bad UTF8. (cherry picked from commit 8bc356a7dd50cbdb46d10b8c7e457832431f5d9e) Co-authored-by: Michael Droettboom <mdboom@gmail.com>
Diffstat (limited to 'Lib/test/test_source_encoding.py')
-rw-r--r--Lib/test/test_source_encoding.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index d37914d..e357264 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -248,8 +248,10 @@ class UTF8ValidatorTest(unittest.TestCase):
# test it is to write actual files to disk.
# Each example is put inside a string at the top of the file so
- # it's an otherwise valid Python source file.
- template = b'"%s"\n'
+ # it's an otherwise valid Python source file. Put some newlines
+ # beforehand so we can assert that the error is reported on the
+ # correct line.
+ template = b'\n\n\n"%s"\n'
fn = TESTFN
self.addCleanup(unlink, fn)
@@ -257,7 +259,12 @@ class UTF8ValidatorTest(unittest.TestCase):
def check(content):
with open(fn, 'wb') as fp:
fp.write(template % content)
- script_helper.assert_python_failure(fn)
+ rc, stdout, stderr = script_helper.assert_python_failure(fn)
+ # We want to assert that the python subprocess failed gracefully,
+ # not via a signal.
+ self.assertGreaterEqual(rc, 1)
+ self.assertIn(b"Non-UTF-8 code starting with", stderr)
+ self.assertIn(b"on line 4", stderr)
# continuation bytes in a sequence of 2, 3, or 4 bytes
continuation_bytes = [bytes([x]) for x in range(0x80, 0xC0)]