diff options
-rw-r--r-- | Lib/test/test_grammar.py | 4 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst | 3 | ||||
-rw-r--r-- | Parser/tokenizer.c | 2 |
3 files changed, 8 insertions, 1 deletions
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index 7c15a23..8501006 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -236,6 +236,10 @@ class TokenTests(unittest.TestCase): check(f"[{num}for x in ()]") check(f"{num}spam", error=True) + # gh-88943: Invalid non-ASCII character following a numerical literal. + with self.assertRaisesRegex(SyntaxError, r"invalid character '⁄' \(U\+2044\)"): + compile(f"{num}⁄7", "<testcase>", "eval") + with self.assertWarnsRegex(SyntaxWarning, r'invalid \w+ literal'): compile(f"{num}is x", "<testcase>", "eval") with warnings.catch_warnings(): diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst new file mode 100644 index 0000000..a99830f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst @@ -0,0 +1,3 @@ +Improve syntax error for non-ASCII character that follows a numerical +literal. It now points on the invalid non-ASCII character, not on the valid +numerical literal. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 6ec2489..46b7159 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1642,7 +1642,7 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind) { tok_nextc(tok); } else /* In future releases, only error will remain. */ - if (is_potential_identifier_char(c)) { + if (c < 128 && is_potential_identifier_char(c)) { tok_backup(tok, c); syntaxerror(tok, "invalid %s literal", kind); return 0; |