From 6ccfa31421393910b52936e0447625db06f2a655 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Apr 2022 18:35:28 +0300 Subject: gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665) re.error is now raised instead of TypeError. --- Lib/re/_parser.py | 4 ++-- Lib/test/test_re.py | 4 ++++ .../NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index f191f80..6588862 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -333,7 +333,7 @@ def _class_escape(source, escape): charname = source.getuntil('}', 'character name') try: c = ord(unicodedata.lookup(charname)) - except KeyError: + except (KeyError, TypeError): raise source.error("undefined character name %r" % charname, len(charname) + len(r'\N{}')) from None return LITERAL, c @@ -393,7 +393,7 @@ def _escape(source, escape, state): charname = source.getuntil('}', 'character name') try: c = ord(unicodedata.lookup(charname)) - except KeyError: + except (KeyError, TypeError): raise source.error("undefined character name %r" % charname, len(charname) + len(r'\N{}')) from None return LITERAL, c diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 781bfd6..2d3fef8 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -772,6 +772,10 @@ class ReTests(unittest.TestCase): "undefined character name 'SPAM'", 0) self.checkPatternError(r'[\N{SPAM}]', "undefined character name 'SPAM'", 1) + self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}', + "undefined character name 'KEYCAP NUMBER SIGN'", 0) + self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]', + "undefined character name 'KEYCAP NUMBER SIGN'", 1) self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0) self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1) diff --git a/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst new file mode 100644 index 0000000..4411c71 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst @@ -0,0 +1,3 @@ +Parsing ``\N`` escapes of Unicode Named Character Sequences in a +:mod:`regular expression ` raises now :exc:`re.error` instead of +``TypeError``. -- cgit v0.12