summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2021-11-20 18:28:28 (GMT)
committerGitHub <noreply@github.com>2021-11-20 18:28:28 (GMT)
commit81f4e116ef7d30ef6e2041c2d6cf29af511a3a02 (patch)
tree73b411df7a7dc4c50c78b0fa19e063d1bdbd72f1
parent7a1d9325287a39528b795b1e8037146777abfe3e (diff)
downloadcpython-81f4e116ef7d30ef6e2041c2d6cf29af511a3a02.zip
cpython-81f4e116ef7d30ef6e2041c2d6cf29af511a3a02.tar.gz
cpython-81f4e116ef7d30ef6e2041c2d6cf29af511a3a02.tar.bz2
bpo-45811: Improve error message when source code contains invisible control characters (GH-29654)
-rw-r--r--Lib/test/test_syntax.py3
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst2
-rw-r--r--Parser/tokenizer.c6
3 files changed, 11 insertions, 0 deletions
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index 28414ba..fc3c629 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -1566,6 +1566,9 @@ def func2():
for paren in ")]}":
self._check_error(paren + "1 + 2", f"unmatched '\\{paren}'")
+ def test_invisible_characters(self):
+ self._check_error('print\x17("Hello")', "invalid non-printable character")
+
def test_match_call_does_not_raise_syntax_error(self):
code = """
def match(x):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst
new file mode 100644
index 0000000..4b31414
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-20-02-25-06.bpo-45811.B-1Gsr.rst
@@ -0,0 +1,2 @@
+Improve the tokenizer errors when encountering invisible control characters
+in the parser. Patch by Pablo Galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index f281c42..69d2c08 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2045,6 +2045,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
break;
}
+ if (!Py_UNICODE_ISPRINTABLE(c)) {
+ char hex[9];
+ (void)PyOS_snprintf(hex, sizeof(hex), "%04X", c);
+ return syntaxerror(tok, "invalid non-printable character U+%s", hex);
+ }
+
/* Punctuation character */
*p_start = tok->start;
*p_end = tok->cur;