diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2021-11-20 18:28:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-20 18:28:28 (GMT) |
commit | 81f4e116ef7d30ef6e2041c2d6cf29af511a3a02 (patch) | |
tree | 73b411df7a7dc4c50c78b0fa19e063d1bdbd72f1 | |
parent | 7a1d9325287a39528b795b1e8037146777abfe3e (diff) | |
download | cpython-81f4e116ef7d30ef6e2041c2d6cf29af511a3a02.zip cpython-81f4e116ef7d30ef6e2041c2d6cf29af511a3a02.tar.gz cpython-81f4e116ef7d30ef6e2041c2d6cf29af511a3a02.tar.bz2 |
bpo-45811: Improve error message when source code contains invisible control characters (GH-29654)
-rw-r--r-- | Lib/test/test_syntax.py | 3 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 6 |
3 files changed, 11 insertions, 0 deletions
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 28414ba..fc3c629 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1566,6 +1566,9 @@ def func2(): for paren in ")]}": self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'") + def test_invisible_characters(self): + self._check_error('print\x17("Hello")', "invalid non-printable character") + def test_match_call_does_not_raise_syntax_error(self): code = """ def match(x): diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst new file mode 100644 index 0000000..4b31414 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst @@ -0,0 +1,2 @@ +Improve the tokenizer errors when encountering invisible control characters +in the parser. Patch by Pablo Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index f281c42..69d2c08 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2045,6 +2045,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) break; } + if (!Py_UNICODE_ISPRINTABLE(c)) { + char hex[9]; + (void)PyOS_snprintf(hex, sizeof(hex), "%04X", c); + return syntaxerror(tok, "invalid non-printable character U+%s", hex); + } + /* Punctuation character */ *p_start = tok->start; *p_end = tok->cur; |