summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNice Zombies <nineteendo19d0@gmail.com>2024-10-18 12:29:47 (GMT)
committerGitHub <noreply@github.com>2024-10-18 12:29:47 (GMT)
commitdf751363e386d1f77c5ba9515a5539902457d386 (patch)
treec2e52eb71b251d96a0f741de46351952e785d7fe
parentd358425e6968858e52908794d15f37e62abc74ec (diff)
downloadcpython-df751363e386d1f77c5ba9515a5539902457d386.zip
cpython-df751363e386d1f77c5ba9515a5539902457d386.tar.gz
cpython-df751363e386d1f77c5ba9515a5539902457d386.tar.bz2
gh-125660: Reject invalid unicode escapes for Python implementation of JSON decoder (GH-125683)
-rw-r--r--Lib/json/decoder.py9
-rw-r--r--Lib/test/test_json/test_scanstring.py10
-rw-r--r--Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst1
3 files changed, 16 insertions, 4 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index d69a45d..ff4bfcd 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -50,17 +50,18 @@ _CONSTANTS = {
}
+HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS)
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
'"': '"', '\\': '\\', '/': '/',
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
}
-def _decode_uXXXX(s, pos):
- esc = s[pos + 1:pos + 5]
- if len(esc) == 4 and esc[1] not in 'xX':
+def _decode_uXXXX(s, pos, _m=HEXDIGITS.match):
+ esc = _m(s, pos + 1)
+ if esc is not None:
try:
- return int(esc, 16)
+ return int(esc.group(), 16)
except ValueError:
pass
msg = "Invalid \\uXXXX escape"
diff --git a/Lib/test/test_json/test_scanstring.py b/Lib/test/test_json/test_scanstring.py
index 2d3ee8a..cca556a 100644
--- a/Lib/test/test_json/test_scanstring.py
+++ b/Lib/test/test_json/test_scanstring.py
@@ -116,6 +116,11 @@ class TestScanstring:
'"\\u012z"',
'"\\u0x12"',
'"\\u0X12"',
+ '"\\u{0}"'.format("\uff10" * 4),
+ '"\\u 123"',
+ '"\\u-123"',
+ '"\\u+123"',
+ '"\\u1_23"',
'"\\ud834\\"',
'"\\ud834\\u"',
'"\\ud834\\ud"',
@@ -127,6 +132,11 @@ class TestScanstring:
'"\\ud834\\udd2z"',
'"\\ud834\\u0x20"',
'"\\ud834\\u0X20"',
+ '"\\ud834\\u{0}"'.format("\uff10" * 4),
+ '"\\ud834\\u 123"',
+ '"\\ud834\\u-123"',
+ '"\\ud834\\u+123"',
+ '"\\ud834\\u1_23"',
]
for s in bad_escapes:
with self.assertRaises(self.JSONDecodeError, msg=s):
diff --git a/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst
new file mode 100644
index 0000000..74d76c7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst
@@ -0,0 +1 @@
+Reject invalid unicode escapes for Python implementation of :func:`json.loads`.