summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_grammar.py4
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst3
-rw-r--r--Parser/tokenizer.c2
3 files changed, 8 insertions, 1 deletions
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index 7c15a23..8501006 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -236,6 +236,10 @@ class TokenTests(unittest.TestCase):
check(f"[{num}for x in ()]")
check(f"{num}spam", error=True)
+ # gh-88943: Invalid non-ASCII character following a numerical literal.
+ with self.assertRaisesRegex(SyntaxError, r"invalid character '⁄' \(U\+2044\)"):
+ compile(f"{num}⁄7", "<testcase>", "eval")
+
with self.assertWarnsRegex(SyntaxWarning, r'invalid \w+ literal'):
compile(f"{num}is x", "<testcase>", "eval")
with warnings.catch_warnings():
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst
new file mode 100644
index 0000000..a99830f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst
@@ -0,0 +1,3 @@
+Improve syntax error for non-ASCII character that follows a numerical
+literal. It now points on the invalid non-ASCII character, not on the valid
+numerical literal.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 6ec2489..46b7159 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1642,7 +1642,7 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
tok_nextc(tok);
}
else /* In future releases, only error will remain. */
- if (is_potential_identifier_char(c)) {
+ if (c < 128 && is_potential_identifier_char(c)) {
tok_backup(tok, c);
syntaxerror(tok, "invalid %s literal", kind);
return 0;