diff options
Diffstat (limited to 'Lib/json/__init__.py')
-rw-r--r-- | Lib/json/__init__.py | 212 |
1 files changed, 97 insertions, 115 deletions
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 1ba8b48..ce62361 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -3,23 +3,26 @@ JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data interchange format. :mod:`json` exposes an API familiar to users of the standard library -:mod:`marshal` and :mod:`pickle` modules. It is derived from a -version of the externally maintained simplejson library. +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. Encoding basic Python object hierarchies:: >>> import json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print(json.dumps("\"foo\bar")) + >>> print json.dumps("\"foo\bar") "\"foo\bar" - >>> print(json.dumps('\u1234')) + >>> print json.dumps(u'\u1234') "\u1234" - >>> print(json.dumps('\\')) + >>> print json.dumps('\\') "\\" - >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) {"a": 0, "b": 0, "c": 0} - >>> from io import StringIO + >>> from StringIO import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -28,14 +31,14 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import json - >>> mydict = {'4': 5, '6': 7} - >>> json.dumps([1,2,3,mydict], separators=(',', ':')) + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], sort_keys=True, separators=(',',':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import json - >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) + >>> print json.dumps({'4': 5, '6': 7}, sort_keys=True, + ... indent=4, separators=(',', ': ')) { "4": 5, "6": 7 @@ -44,12 +47,12 @@ Pretty printing:: Decoding JSON:: >>> import json - >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}] + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj True - >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar' + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from io import StringIO + >>> from StringIO import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -75,8 +78,7 @@ Specializing JSON object encoding:: >>> def encode_complex(obj): ... if isinstance(obj, complex): ... return [obj.real, obj.imag] - ... raise TypeError(f'Object of type {obj.__class__.__name__} ' - ... f'is not JSON serializable') + ... raise TypeError(repr(obj) + " is not JSON serializable") ... >>> json.dumps(2 + 1j, default=encode_complex) '[2.0, 1.0]' @@ -98,14 +100,13 @@ Using json.tool from the shell to validate and pretty-print:: __version__ = '2.0.9' __all__ = [ 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'JSONDecoder', 'JSONEncoder', ] __author__ = 'Bob Ippolito <bob@redivi.com>' -from .decoder import JSONDecoder, JSONDecodeError +from .decoder import JSONDecoder from .encoder import JSONEncoder -import codecs _default_encoder = JSONEncoder( skipkeys=False, @@ -114,22 +115,28 @@ _default_encoder = JSONEncoder( allow_nan=True, indent=None, separators=None, + encoding='utf-8', default=None, ) -def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + encoding='utf-8', default=None, sort_keys=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped - instead of raising a ``TypeError``. - - If ``ensure_ascii`` is false, then the strings written to ``fp`` can - contain non-ASCII characters if they appear in strings contained in - ``obj``. Otherwise, all such characters are escaped in JSON strings. + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is true (the default), all non-ASCII characters in the + output are escaped with ``\uXXXX`` sequences, and the result is a ``str`` + instance consisting of ASCII characters only. If ``ensure_ascii`` is + false, some chunks written to ``fp`` may be ``unicode`` instances. + This usually happens because the input contains unicode strings or the + ``encoding`` parameter is used. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter``) this is likely to + cause an error. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -143,12 +150,15 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, If ``indent`` is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. ``None`` is the most compact - representation. + representation. Since the default item separator is ``', '``, the + output might include trailing whitespace when ``indent`` is specified. + You can use ``separators=(',', ': ')`` to avoid this. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. - If specified, ``separators`` should be an ``(item_separator, key_separator)`` - tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and - ``(',', ': ')`` otherwise. To get the most compact JSON representation, - you should specify ``(',', ':')`` to eliminate whitespace. + ``encoding`` is the character encoding for str instances, default is UTF-8. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. @@ -165,14 +175,14 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - default is None and not sort_keys and not kw): + encoding == 'utf-8' and default is None and not sort_keys and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: cls = JSONEncoder iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, + separators=separators, encoding=encoding, default=default, sort_keys=sort_keys, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost @@ -180,18 +190,18 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, fp.write(chunk) -def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + encoding='utf-8', default=None, sort_keys=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped - instead of raising a ``TypeError``. + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + - If ``ensure_ascii`` is false, then the return value can contain non-ASCII - characters if they appear in strings contained in ``obj``. Otherwise, all - such characters are escaped in JSON strings. + If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and + the return value may be a ``unicode`` instance. See ``dump`` for details. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -205,12 +215,15 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, If ``indent`` is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. ``None`` is the most compact - representation. + representation. Since the default item separator is ``', '``, the + output might include trailing whitespace when ``indent`` is specified. + You can use ``separators=(',', ': ')`` to avoid this. - If specified, ``separators`` should be an ``(item_separator, key_separator)`` - tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and - ``(',', ': ')`` otherwise. To get the most compact JSON representation, - you should specify ``(',', ':')`` to eliminate whitespace. + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. @@ -227,55 +240,33 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - default is None and not sort_keys and not kw): + encoding == 'utf-8' and default is None and not sort_keys and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder return cls( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, default=default, sort_keys=sort_keys, - **kw).encode(obj) - - -_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) - - -def detect_encoding(b): - bstartswith = b.startswith - if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): - return 'utf-32' - if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): - return 'utf-16' - if bstartswith(codecs.BOM_UTF8): - return 'utf-8-sig' - - if len(b) >= 4: - if not b[0]: - # 00 00 -- -- - utf-32-be - # 00 XX -- -- - utf-16-be - return 'utf-16-be' if b[1] else 'utf-32-be' - if not b[1]: - # XX 00 00 00 - utf-32-le - # XX 00 00 XX - utf-16-le - # XX 00 XX -- - utf-16-le - return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' - elif len(b) == 2: - if not b[0]: - # 00 XX - utf-16-be - return 'utf-16-be' - if not b[1]: - # XX 00 - utf-16-le - return 'utf-16-le' - # default - return 'utf-8' - - -def load(fp, *, cls=None, object_hook=None, parse_float=None, + separators=separators, encoding=encoding, default=default, + sort_keys=sort_keys, **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + ``object_hook`` is an optional function that will be called with the result of any object literal decode (a ``dict``). The return value of ``object_hook`` will be used instead of the ``dict``. This feature @@ -284,22 +275,31 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, ``object_pairs_hook`` is an optional function that will be called with the result of any object literal decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. If ``object_hook`` - is also defined, the ``object_pairs_hook`` takes priority. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. + """ return loads(fp.read(), - cls=cls, object_hook=object_hook, + encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw) + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + **kw) -def loads(s, *, cls=None, object_hook=None, parse_float=None, +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): - """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance - containing a JSON document) to a Python object. + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. ``object_hook`` is an optional function that will be called with the result of any object literal decode (a ``dict``). The return value of @@ -309,8 +309,10 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, ``object_pairs_hook`` is an optional function that will be called with the result of any object literal decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. If ``object_hook`` - is also defined, the ``object_pairs_hook`` takes priority. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to @@ -330,28 +332,8 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. - The ``encoding`` argument is ignored and deprecated since Python 3.1. """ - if isinstance(s, str): - if s.startswith('\ufeff'): - raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", - s, 0) - else: - if not isinstance(s, (bytes, bytearray)): - raise TypeError(f'the JSON object must be str, bytes or bytearray, ' - f'not {s.__class__.__name__}') - s = s.decode(detect_encoding(s), 'surrogatepass') - - if "encoding" in kw: - import warnings - warnings.warn( - "'encoding' is ignored and deprecated. It will be removed in Python 3.9", - DeprecationWarning, - stacklevel=2 - ) - del kw['encoding'] - - if (cls is None and object_hook is None and + if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None and not kw): return _default_decoder.decode(s) @@ -367,4 +349,4 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant - return cls(**kw).decode(s) + return cls(encoding=encoding, **kw).decode(s) |