summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/encodings/idna.py42
-rw-r--r--Lib/test/test_codecs.py6
2 files changed, 31 insertions, 17 deletions
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index ea40585..5396047 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -39,23 +39,21 @@ def nameprep(label):
# Check bidi
RandAL = [stringprep.in_table_d1(x) for x in label]
- for c in RandAL:
- if c:
- # There is a RandAL char in the string. Must perform further
- # tests:
- # 1) The characters in section 5.8 MUST be prohibited.
- # This is table C.8, which was already checked
- # 2) If a string contains any RandALCat character, the string
- # MUST NOT contain any LCat character.
- if any(stringprep.in_table_d2(x) for x in label):
- raise UnicodeError("Violation of BIDI requirement 2")
-
- # 3) If a string contains any RandALCat character, a
- # RandALCat character MUST be the first character of the
- # string, and a RandALCat character MUST be the last
- # character of the string.
- if not RandAL[0] or not RandAL[-1]:
- raise UnicodeError("Violation of BIDI requirement 3")
+ if any(RandAL):
+ # There is a RandAL char in the string. Must perform further
+ # tests:
+ # 1) The characters in section 5.8 MUST be prohibited.
+ # This is table C.8, which was already checked
+ # 2) If a string contains any RandALCat character, the string
+ # MUST NOT contain any LCat character.
+ if any(stringprep.in_table_d2(x) for x in label):
+ raise UnicodeError("Violation of BIDI requirement 2")
+ # 3) If a string contains any RandALCat character, a
+ # RandALCat character MUST be the first character of the
+ # string, and a RandALCat character MUST be the last
+ # character of the string.
+ if not RandAL[0] or not RandAL[-1]:
+ raise UnicodeError("Violation of BIDI requirement 3")
return label
@@ -103,6 +101,16 @@ def ToASCII(label):
raise UnicodeError("label empty or too long")
def ToUnicode(label):
+ if len(label) > 1024:
+ # Protection from https://github.com/python/cpython/issues/98433.
+ # https://datatracker.ietf.org/doc/html/rfc5894#section-6
+ # doesn't specify a label size limit prior to NAMEPREP. But having
+ # one makes practical sense.
+ # This leaves ample room for nameprep() to remove Nothing characters
+ # per https://www.rfc-editor.org/rfc/rfc3454#section-3.1 while still
+ # preventing us from wasting time decoding a big thing that'll just
+ # hit the actual <= 63 length limit in Step 6.
+ raise UnicodeError("label way too long")
# Step 1: Check for ASCII
if isinstance(label, bytes):
pure_ascii = True
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 32a704f..e3add0c 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1552,6 +1552,12 @@ class IDNACodecTest(unittest.TestCase):
self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
+ def test_builtin_decode_length_limit(self):
+ with self.assertRaisesRegex(UnicodeError, "way too long"):
+ (b"xn--016c"+b"a"*1100).decode("idna")
+ with self.assertRaisesRegex(UnicodeError, "too long"):
+ (b"xn--016c"+b"a"*70).decode("idna")
+
def test_stream(self):
r = codecs.getreader("idna")(io.BytesIO(b"abc"))
r.read(3)