[3.14] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133942)

If the error handler is used, a new bytes object is created to set as the object attribute of UnicodeDecodeError, and that bytes object then replaces the original data. A pointer to the decoded data will became invalid after destroying that temporary bytes object. So we need other way to return the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal(). _PyBytes_DecodeEscape() does not have such issue, because it does not use the error handlers registry, but it should be changed for compatibility with _PyUnicode_DecodeUnicodeEscapeInternal(). (cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> 2025-05-13 13:25:08 (GMT)
committer: GitHub <noreply@github.com> 2025-05-13 13:25:08 (GMT)
commit: 69b4387f78f413e8c47572a85b3478c47eba8142 (patch)
tree: 30bc6e580011bda809575c500cf3692edcb6a220 /Lib/test/test_codecs.py
parent: f0a7a6c2cc066523fc03d312cfebff8135d81aa2 (diff)
download: cpython-69b4387f78f413e8c47572a85b3478c47eba8142.zip
cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.gz
cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.bz2
1 files changed, 42 insertions, 10 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 94fcf98..d42270d 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1196,23 +1196,39 @@ class EscapeDecodeTest(unittest.TestCase):
         check(br"[\1010]", b"[A0]")
         check(br"[\x41]", b"[A]")
         check(br"[\x410]", b"[A0]")
+
+    def test_warnings(self):
+        decode = codecs.escape_decode
+        check = coding_checker(self, decode)
         for i in range(97, 123):
             b = bytes([i])
             if b not in b'abfnrtvx':
-                with self.assertWarns(DeprecationWarning):
+                with self.assertWarnsRegex(DeprecationWarning,
+                        r'"\\%c" is an invalid escape sequence' % i):
                     check(b"\\" + b, b"\\" + b)
-            with self.assertWarns(DeprecationWarning):
+            with self.assertWarnsRegex(DeprecationWarning,
+                    r'"\\%c" is an invalid escape sequence' % (i-32)):
                 check(b"\\" + b.upper(), b"\\" + b.upper())
-        with self.assertWarns(DeprecationWarning):
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\8" is an invalid escape sequence'):
             check(br"\8", b"\\8")
         with self.assertWarns(DeprecationWarning):
             check(br"\9", b"\\9")
-        with self.assertWarns(DeprecationWarning):
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\\xfa" is an invalid escape sequence') as cm:
             check(b"\\\xfa", b"\\\xfa")
         for i in range(0o400, 0o1000):
-            with self.assertWarns(DeprecationWarning):
+            with self.assertWarnsRegex(DeprecationWarning,
+                    r'"\\%o" is an invalid octal escape sequence' % i):
                 check(rb'\%o' % i, bytes([i & 0o377]))
 
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\z" is an invalid escape sequence'):
+            self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\501" is an invalid octal escape sequence'):
+            self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))
+
     def test_errors(self):
         decode = codecs.escape_decode
         self.assertRaises(ValueError, decode, br"\x")
@@ -2661,24 +2677,40 @@ class UnicodeEscapeTest(ReadTest, unittest.TestCase):
         check(br"[\x410]", "[A0]")
         check(br"\u20ac", "\u20ac")
         check(br"\U0001d120", "\U0001d120")
+
+    def test_decode_warnings(self):
+        decode = codecs.unicode_escape_decode
+        check = coding_checker(self, decode)
         for i in range(97, 123):
             b = bytes([i])
             if b not in b'abfnrtuvx':
-                with self.assertWarns(DeprecationWarning):
+                with self.assertWarnsRegex(DeprecationWarning,
+                        r'"\\%c" is an invalid escape sequence' % i):
                     check(b"\\" + b, "\\" + chr(i))
             if b.upper() not in b'UN':
-                with self.assertWarns(DeprecationWarning):
+                with self.assertWarnsRegex(DeprecationWarning,
+                        r'"\\%c" is an invalid escape sequence' % (i-32)):
                     check(b"\\" + b.upper(), "\\" + chr(i-32))
-        with self.assertWarns(DeprecationWarning):
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\8" is an invalid escape sequence'):
             check(br"\8", "\\8")
         with self.assertWarns(DeprecationWarning):
             check(br"\9", "\\9")
-        with self.assertWarns(DeprecationWarning):
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\\xfa" is an invalid escape sequence') as cm:
             check(b"\\\xfa", "\\\xfa")
         for i in range(0o400, 0o1000):
-            with self.assertWarns(DeprecationWarning):
+            with self.assertWarnsRegex(DeprecationWarning,
+                    r'"\\%o" is an invalid octal escape sequence' % i):
                 check(rb'\%o' % i, chr(i))
 
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\z" is an invalid escape sequence'):
+            self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
+        with self.assertWarnsRegex(DeprecationWarning,
+                r'"\\501" is an invalid octal escape sequence'):
+            self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6))
+
     def test_decode_errors(self):
         decode = codecs.unicode_escape_decode
         for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	2025-05-13 13:25:08 (GMT)
committer	GitHub <noreply@github.com>	2025-05-13 13:25:08 (GMT)
commit	69b4387f78f413e8c47572a85b3478c47eba8142 (patch)
tree	30bc6e580011bda809575c500cf3692edcb6a220 /Lib/test/test_codecs.py
parent	f0a7a6c2cc066523fc03d312cfebff8135d81aa2 (diff)
download	cpython-69b4387f78f413e8c47572a85b3478c47eba8142.zip cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.gz cpython-69b4387f78f413e8c47572a85b3478c47eba8142.tar.bz2