diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-04 20:16:53 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-04 20:16:53 (GMT) |
commit | 7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3 (patch) | |
tree | 3d355bd888b6e2aaba52f33807d183055d61b700 /Lib/json | |
parent | d9107aadeda663173eb68c412396e5e3353a4059 (diff) | |
download | cpython-7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3.zip cpython-7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3.tar.gz cpython-7d6e076f6d8dd48cfd748b02dad17dbeb0b346a3.tar.bz2 |
Issue #7451: Improve decoding performance of JSON objects, and reduce
the memory consumption of said decoded objects when they use the same
strings as keys.
Diffstat (limited to 'Lib/json')
-rw-r--r-- | Lib/json/decoder.py | 10 | ||||
-rw-r--r-- | Lib/json/scanner.py | 10 | ||||
-rw-r--r-- | Lib/json/tests/test_decode.py | 28 |
3 files changed, 45 insertions, 3 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 3e7405b..6596154 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -147,10 +147,14 @@ WHITESPACE_STR = ' \t\n\r' def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, - _w=WHITESPACE.match, _ws=WHITESPACE_STR): + memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end pairs = [] pairs_append = pairs.append + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -167,6 +171,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, end += 1 while True: key, end = scanstring(s, end, strict) + key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': @@ -214,7 +219,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, pairs = object_hook(pairs) return pairs, end -def JSONArray(s_and_end, scan_once, context, _w=WHITESPACE.match): +def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end values = [] nextchar = s[end:end + 1] @@ -314,6 +319,7 @@ class JSONDecoder(object): self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring + self.memo = {} self.scan_once = make_scanner(self) diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py index b4f3561..23eef61 100644 --- a/Lib/json/scanner.py +++ b/Lib/json/scanner.py @@ -22,6 +22,8 @@ def py_make_scanner(context): parse_int = context.parse_int parse_constant = context.parse_constant object_hook = context.object_hook + object_pairs_hook = context.object_pairs_hook + memo = context.memo def _scan_once(string, idx): try: @@ -33,7 +35,7 @@ def py_make_scanner(context): return parse_string(string, idx + 1, strict) elif nextchar == '{': return parse_object((string, idx + 1), strict, - _scan_once, object_hook, object_pairs_hook) + _scan_once, object_hook, object_pairs_hook, memo) elif nextchar == '[': return parse_array((string, idx + 1), _scan_once) elif nextchar == 'n' and string[idx:idx + 4] == 'null': @@ -60,6 +62,12 @@ def py_make_scanner(context): else: raise StopIteration + def scan_once(string, idx): + try: + return _scan_once(string, idx) + finally: + memo.clear() + return _scan_once make_scanner = c_make_scanner or py_make_scanner diff --git a/Lib/json/tests/test_decode.py b/Lib/json/tests/test_decode.py index 4610c6c..beb82a7 100644 --- a/Lib/json/tests/test_decode.py +++ b/Lib/json/tests/test_decode.py @@ -1,10 +1,25 @@ import decimal from unittest import TestCase from io import StringIO +from contextlib import contextmanager import json +import json.decoder +import json.scanner from collections import OrderedDict + +@contextmanager +def use_python_scanner(): + py_scanner = json.scanner.py_make_scanner + old_scanner = json.decoder.make_scanner + json.decoder.make_scanner = py_scanner + try: + yield + finally: + json.decoder.make_scanner = old_scanner + + class TestDecode(TestCase): def test_decimal(self): rval = json.loads('1.1', parse_float=decimal.Decimal) @@ -39,3 +54,16 @@ class TestDecode(TestCase): # exercise the uncommon cases. The array cases are already covered. rval = json.loads('{ "key" : "value" , "k":"v" }') self.assertEquals(rval, {"key":"value", "k":"v"}) + + def check_keys_reuse(self, source, loads): + rval = loads(source) + (a, b), (c, d) = sorted(rval[0]), sorted(rval[1]) + self.assertIs(a, c) + self.assertIs(b, d) + + def test_keys_reuse(self): + s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]' + self.check_keys_reuse(s, json.loads) + # Disabled: the pure Python version of json simply doesn't work + with use_python_scanner(): + self.check_keys_reuse(s, json.decoder.JSONDecoder().decode) |