summaryrefslogtreecommitdiffstats
path: root/Lib/json
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-09-04 20:16:53 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-09-04 20:16:53 (GMT)
commit7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3 (patch)
tree3d355bd888b6e2aaba52f33807d183055d61b700 /Lib/json
parentd9107aadeda663173eb68c412396e5e3353a4059 (diff)
downloadcpython-7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3.zip
cpython-7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3.tar.gz
cpython-7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3.tar.bz2
Issue #7451: Improve decoding performance of JSON objects, and reduce
the memory consumption of said decoded objects when they use the same strings as keys.
Diffstat (limited to 'Lib/json')
-rw-r--r--Lib/json/decoder.py10
-rw-r--r--Lib/json/scanner.py10
-rw-r--r--Lib/json/tests/test_decode.py28
3 files changed, 45 insertions, 3 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index 3e7405b..6596154 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -147,10 +147,14 @@ WHITESPACE_STR = ' \t\n\r'
def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
- _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+ memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
s, end = s_and_end
pairs = []
pairs_append = pairs.append
+ # Backwards compatibility
+ if memo is None:
+ memo = {}
+ memo_get = memo.setdefault
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
@@ -167,6 +171,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
end += 1
while True:
key, end = scanstring(s, end, strict)
+ key = memo_get(key, key)
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':':
@@ -214,7 +219,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
pairs = object_hook(pairs)
return pairs, end
-def JSONArray(s_and_end, scan_once, context, _w=WHITESPACE.match):
+def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
s, end = s_and_end
values = []
nextchar = s[end:end + 1]
@@ -314,6 +319,7 @@ class JSONDecoder(object):
self.parse_object = JSONObject
self.parse_array = JSONArray
self.parse_string = scanstring
+ self.memo = {}
self.scan_once = make_scanner(self)
diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py
index b4f3561..23eef61 100644
--- a/Lib/json/scanner.py
+++ b/Lib/json/scanner.py
@@ -22,6 +22,8 @@ def py_make_scanner(context):
parse_int = context.parse_int
parse_constant = context.parse_constant
object_hook = context.object_hook
+ object_pairs_hook = context.object_pairs_hook
+ memo = context.memo
def _scan_once(string, idx):
try:
@@ -33,7 +35,7 @@ def py_make_scanner(context):
return parse_string(string, idx + 1, strict)
elif nextchar == '{':
return parse_object((string, idx + 1), strict,
- _scan_once, object_hook, object_pairs_hook)
+ _scan_once, object_hook, object_pairs_hook, memo)
elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
@@ -60,6 +62,12 @@ def py_make_scanner(context):
else:
raise StopIteration
+ def scan_once(string, idx):
+ try:
+ return _scan_once(string, idx)
+ finally:
+ memo.clear()
+
return _scan_once
make_scanner = c_make_scanner or py_make_scanner
diff --git a/Lib/json/tests/test_decode.py b/Lib/json/tests/test_decode.py
index 4610c6c..beb82a7 100644
--- a/Lib/json/tests/test_decode.py
+++ b/Lib/json/tests/test_decode.py
@@ -1,10 +1,25 @@
import decimal
from unittest import TestCase
from io import StringIO
+from contextlib import contextmanager
import json
+import json.decoder
+import json.scanner
from collections import OrderedDict
+
+@contextmanager
+def use_python_scanner():
+ py_scanner = json.scanner.py_make_scanner
+ old_scanner = json.decoder.make_scanner
+ json.decoder.make_scanner = py_scanner
+ try:
+ yield
+ finally:
+ json.decoder.make_scanner = old_scanner
+
+
class TestDecode(TestCase):
def test_decimal(self):
rval = json.loads('1.1', parse_float=decimal.Decimal)
@@ -39,3 +54,16 @@ class TestDecode(TestCase):
# exercise the uncommon cases. The array cases are already covered.
rval = json.loads('{ "key" : "value" , "k":"v" }')
self.assertEquals(rval, {"key":"value", "k":"v"})
+
+ def check_keys_reuse(self, source, loads):
+ rval = loads(source)
+ (a, b), (c, d) = sorted(rval[0]), sorted(rval[1])
+ self.assertIs(a, c)
+ self.assertIs(b, d)
+
+ def test_keys_reuse(self):
+ s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
+ self.check_keys_reuse(s, json.loads)
+ # Disabled: the pure Python version of json simply doesn't work
+ with use_python_scanner():
+ self.check_keys_reuse(s, json.decoder.JSONDecoder().decode)