From ef0df5284f929719b2ef3955b1b569ade0a5193c Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Thu, 4 May 2023 14:26:23 +0200 Subject: gh-97556: Raise null bytes syntax error upon null in multiline string (GH-104136) --- Lib/test/test_cmd_line_script.py | 13 +++++++++++++ Parser/tokenizer.c | 9 ++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index d98e238..8bf2993 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -669,6 +669,19 @@ class CmdLineTest(unittest.TestCase): ], ) + def test_syntaxerror_null_bytes_in_multiline_string(self): + scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings + with os_helper.temp_dir() as script_dir: + for script in scripts: + script_name = _make_test_script(script_dir, 'script', script) + _, _, stderr = assert_python_failure(script_name) + self.assertEqual( + stderr.splitlines()[-2:], + [ b" multilinestring", + b'SyntaxError: source code cannot contain null bytes' + ] + ) + def test_consistent_sys_path_for_direct_execution(self): # This test case ensures that the following all give the same # sys.path configuration: diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d2f9fee..7c07d20 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2301,8 +2301,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t /* Get rest of string */ while (end_quote_size != quote_size) { c = tok_nextc(tok); - if (tok->done == E_DECODE) + if (tok->done == E_ERROR) { + return MAKE_TOKEN(ERRORTOKEN); + } + if (tok->done == E_DECODE) { break; + } if (c == EOF || (quote_size == 1 && c == '\n')) { assert(tok->multi_line_start != NULL); // shift the tok_state's location into @@ -2554,6 +2558,9 @@ f_string_middle: while (end_quote_size != current_tok->f_string_quote_size) { int c = tok_nextc(tok); + if (tok->done == E_ERROR) { + return MAKE_TOKEN(ERRORTOKEN); + } if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { if (tok->decoding_erred) { return MAKE_TOKEN(ERRORTOKEN); -- cgit v0.12