From d180b507c4929be399395bfd7946948f98ffc4f7 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Fri, 15 Mar 2024 07:38:13 -0700 Subject: gh-63283: IDNA prefix should be case insensitive (GH-17726) Any capitalization of "xn--" should be acceptable for the ACE prefix (see https://tools.ietf.org/html/rfc3490#section-5). Co-authored-by: Pepijn de Vos Co-authored-by: Erlend E. Aasland Co-authored-by: Petr Viktorin --- Lib/encodings/idna.py | 6 +++--- Lib/test/test_codecs.py | 7 +++++++ .../next/Library/2024-03-13-15-45-54.gh-issue-63283.OToJnG.rst | 2 ++ 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-03-13-15-45-54.gh-issue-63283.OToJnG.rst diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 5396047..d0f70c0 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -86,7 +86,7 @@ def ToASCII(label): raise UnicodeError("label empty or too long") # Step 5: Check ACE prefix - if label.startswith(sace_prefix): + if label[:4].lower() == sace_prefix: raise UnicodeError("Label starts with ACE prefix") # Step 6: Encode with PUNYCODE @@ -129,7 +129,7 @@ def ToUnicode(label): except UnicodeError: raise UnicodeError("Invalid character in IDN label") # Step 3: Check for ACE prefix - if not label.startswith(ace_prefix): + if not label[:4].lower() == ace_prefix: return str(label, "ascii") # Step 4: Remove ACE prefix @@ -202,7 +202,7 @@ class Codec(codecs.Codec): # XXX obviously wrong, see #3232 input = bytes(input) - if ace_prefix not in input: + if ace_prefix not in input.lower(): # Fast path try: return input.decode('ascii'), len(input) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index ff511a6..9585f94 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1547,6 +1547,13 @@ class IDNACodecTest(unittest.TestCase): self.assertEqual(str(b"python.org.", "idna"), "python.org.") self.assertEqual(str(b"xn--pythn-mua.org", "idna"), "pyth\xf6n.org") self.assertEqual(str(b"xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.") + self.assertEqual(str(b"XN--pythn-mua.org.", "idna"), "pyth\xf6n.org.") + self.assertEqual(str(b"xN--pythn-mua.org.", "idna"), "pyth\xf6n.org.") + self.assertEqual(str(b"Xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.") + self.assertEqual(str(b"bugs.xn--pythn-mua.org.", "idna"), + "bugs.pyth\xf6n.org.") + self.assertEqual(str(b"bugs.XN--pythn-mua.org.", "idna"), + "bugs.pyth\xf6n.org.") def test_builtin_encode(self): self.assertEqual("python.org".encode("idna"), b"python.org") diff --git a/Misc/NEWS.d/next/Library/2024-03-13-15-45-54.gh-issue-63283.OToJnG.rst b/Misc/NEWS.d/next/Library/2024-03-13-15-45-54.gh-issue-63283.OToJnG.rst new file mode 100644 index 0000000..bb4c3a4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-13-15-45-54.gh-issue-63283.OToJnG.rst @@ -0,0 +1,2 @@ +In :mod:`encodings.idna`, any capitalization of the the ACE prefix +(``xn--``) is now acceptable. Patch by Pepijn de Vos and Zackery Spytz. -- cgit v0.12