1 files changed, 97 insertions, 115 deletions
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 1ba8b48..ce62361 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -3,23 +3,26 @@ JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
 interchange format.
 
 :mod:`json` exposes an API familiar to users of the standard library
-:mod:`marshal` and :mod:`pickle` modules.  It is derived from a
-version of the externally maintained simplejson library.
+:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
+version of the :mod:`json` library contained in Python 2.6, but maintains
+compatibility with Python 2.4 and Python 2.5 and (currently) has
+significant performance advantages, even without using the optional C
+extension for speedups.
 
 Encoding basic Python object hierarchies::
 
     >>> import json
     >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
     '["foo", {"bar": ["baz", null, 1.0, 2]}]'
-    >>> print(json.dumps("\"foo\bar"))
+    >>> print json.dumps("\"foo\bar")
     "\"foo\bar"
-    >>> print(json.dumps('\u1234'))
+    >>> print json.dumps(u'\u1234')
     "\u1234"
-    >>> print(json.dumps('\\'))
+    >>> print json.dumps('\\')
     "\\"
-    >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
+    >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
     {"a": 0, "b": 0, "c": 0}
-    >>> from io import StringIO
+    >>> from StringIO import StringIO
     >>> io = StringIO()
     >>> json.dump(['streaming API'], io)
     >>> io.getvalue()
@@ -28,14 +31,14 @@ Encoding basic Python object hierarchies::
 Compact encoding::
 
     >>> import json
-    >>> mydict = {'4': 5, '6': 7}
-    >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
+    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], sort_keys=True, separators=(',',':'))
     '[1,2,3,{"4":5,"6":7}]'
 
 Pretty printing::
 
     >>> import json
-    >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4))
+    >>> print json.dumps({'4': 5, '6': 7}, sort_keys=True,
+    ...                  indent=4, separators=(',', ': '))
     {
         "4": 5,
         "6": 7
@@ -44,12 +47,12 @@ Pretty printing::
 Decoding JSON::
 
     >>> import json
-    >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}]
+    >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
     >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
     True
-    >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar'
+    >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
     True
-    >>> from io import StringIO
+    >>> from StringIO import StringIO
     >>> io = StringIO('["streaming API"]')
     >>> json.load(io)[0] == 'streaming API'
     True
@@ -75,8 +78,7 @@ Specializing JSON object encoding::
     >>> def encode_complex(obj):
     ...     if isinstance(obj, complex):
     ...         return [obj.real, obj.imag]
-    ...     raise TypeError(f'Object of type {obj.__class__.__name__} '
-    ...                     f'is not JSON serializable')
+    ...     raise TypeError(repr(obj) + " is not JSON serializable")
     ...
     >>> json.dumps(2 + 1j, default=encode_complex)
     '[2.0, 1.0]'
@@ -98,14 +100,13 @@ Using json.tool from the shell to validate and pretty-print::
 __version__ = '2.0.9'
 __all__ = [
     'dump', 'dumps', 'load', 'loads',
-    'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
+    'JSONDecoder', 'JSONEncoder',
 ]
 
 __author__ = 'Bob Ippolito <bob@redivi.com>'
 
-from .decoder import JSONDecoder, JSONDecodeError
+from .decoder import JSONDecoder
 from .encoder import JSONEncoder
-import codecs
 
 _default_encoder = JSONEncoder(
     skipkeys=False,
@@ -114,22 +115,28 @@ _default_encoder = JSONEncoder(
     allow_nan=True,
     indent=None,
     separators=None,
+    encoding='utf-8',
     default=None,
 )
 
-def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
+def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
         allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, sort_keys=False, **kw):
+        encoding='utf-8', default=None, sort_keys=False, **kw):
     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
     ``.write()``-supporting file-like object).
 
     If ``skipkeys`` is true then ``dict`` keys that are not basic types
-    (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
-    instead of raising a ``TypeError``.
-
-    If ``ensure_ascii`` is false, then the strings written to ``fp`` can
-    contain non-ASCII characters if they appear in strings contained in
-    ``obj``. Otherwise, all such characters are escaped in JSON strings.
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    will be skipped instead of raising a ``TypeError``.
+
+    If ``ensure_ascii`` is true (the default), all non-ASCII characters in the
+    output are escaped with ``\uXXXX`` sequences, and the result is a ``str``
+    instance consisting of ASCII characters only.  If ``ensure_ascii`` is
+    false, some chunks written to ``fp`` may be ``unicode`` instances.
+    This usually happens because the input contains unicode strings or the
+    ``encoding`` parameter is used. Unless ``fp.write()`` explicitly
+    understands ``unicode`` (as in ``codecs.getwriter``) this is likely to
+    cause an error.
 
     If ``check_circular`` is false, then the circular reference check
     for container types will be skipped and a circular reference will
@@ -143,12 +150,15 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
     If ``indent`` is a non-negative integer, then JSON array elements and
     object members will be pretty-printed with that indent level. An indent
     level of 0 will only insert newlines. ``None`` is the most compact
-    representation.
+    representation.  Since the default item separator is ``', '``,  the
+    output might include trailing whitespace when ``indent`` is specified.
+    You can use ``separators=(',', ': ')`` to avoid this.
+
+    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+    then it will be used instead of the default ``(', ', ': ')`` separators.
+    ``(',', ':')`` is the most compact JSON representation.
 
-    If specified, ``separators`` should be an ``(item_separator, key_separator)``
-    tuple.  The default is ``(', ', ': ')`` if *indent* is ``None`` and
-    ``(',', ': ')`` otherwise.  To get the most compact JSON representation,
-    you should specify ``(',', ':')`` to eliminate whitespace.
+    ``encoding`` is the character encoding for str instances, default is UTF-8.
 
     ``default(obj)`` is a function that should return a serializable version
     of obj or raise TypeError. The default simply raises TypeError.
@@ -165,14 +175,14 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        default is None and not sort_keys and not kw):
+        encoding == 'utf-8' and default is None and not sort_keys and not kw):
         iterable = _default_encoder.iterencode(obj)
     else:
         if cls is None:
             cls = JSONEncoder
         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
-            separators=separators,
+            separators=separators, encoding=encoding,
             default=default, sort_keys=sort_keys, **kw).iterencode(obj)
     # could accelerate with writelines in some versions of Python, at
     # a debuggability cost
@@ -180,18 +190,18 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
         fp.write(chunk)
 
 
-def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
+def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
         allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, sort_keys=False, **kw):
+        encoding='utf-8', default=None, sort_keys=False, **kw):
     """Serialize ``obj`` to a JSON formatted ``str``.
 
     If ``skipkeys`` is true then ``dict`` keys that are not basic types
-    (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
-    instead of raising a ``TypeError``.
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    will be skipped instead of raising a ``TypeError``.
+
 
-    If ``ensure_ascii`` is false, then the return value can contain non-ASCII
-    characters if they appear in strings contained in ``obj``. Otherwise, all
-    such characters are escaped in JSON strings.
+    If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and
+    the return value may be a ``unicode`` instance. See ``dump`` for details.
 
     If ``check_circular`` is false, then the circular reference check
     for container types will be skipped and a circular reference will
@@ -205,12 +215,15 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
     If ``indent`` is a non-negative integer, then JSON array elements and
     object members will be pretty-printed with that indent level. An indent
     level of 0 will only insert newlines. ``None`` is the most compact
-    representation.
+    representation.  Since the default item separator is ``', '``,  the
+    output might include trailing whitespace when ``indent`` is specified.
+    You can use ``separators=(',', ': ')`` to avoid this.
 
-    If specified, ``separators`` should be an ``(item_separator, key_separator)``
-    tuple.  The default is ``(', ', ': ')`` if *indent* is ``None`` and
-    ``(',', ': ')`` otherwise.  To get the most compact JSON representation,
-    you should specify ``(',', ':')`` to eliminate whitespace.
+    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+    then it will be used instead of the default ``(', ', ': ')`` separators.
+    ``(',', ':')`` is the most compact JSON representation.
+
+    ``encoding`` is the character encoding for str instances, default is UTF-8.
 
     ``default(obj)`` is a function that should return a serializable version
     of obj or raise TypeError. The default simply raises TypeError.
@@ -227,55 +240,33 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        default is None and not sort_keys and not kw):
+        encoding == 'utf-8' and default is None and not sort_keys and not kw):
         return _default_encoder.encode(obj)
     if cls is None:
         cls = JSONEncoder
     return cls(
         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
-        separators=separators, default=default, sort_keys=sort_keys,
-        **kw).encode(obj)
-
-
-_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
-
-
-def detect_encoding(b):
-    bstartswith = b.startswith
-    if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
-        return 'utf-32'
-    if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
-        return 'utf-16'
-    if bstartswith(codecs.BOM_UTF8):
-        return 'utf-8-sig'
-
-    if len(b) >= 4:
-        if not b[0]:
-            # 00 00 -- -- - utf-32-be
-            # 00 XX -- -- - utf-16-be
-            return 'utf-16-be' if b[1] else 'utf-32-be'
-        if not b[1]:
-            # XX 00 00 00 - utf-32-le
-            # XX 00 00 XX - utf-16-le
-            # XX 00 XX -- - utf-16-le
-            return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
-    elif len(b) == 2:
-        if not b[0]:
-            # 00 XX - utf-16-be
-            return 'utf-16-be'
-        if not b[1]:
-            # XX 00 - utf-16-le
-            return 'utf-16-le'
-    # default
-    return 'utf-8'
-
-
-def load(fp, *, cls=None, object_hook=None, parse_float=None,
+        separators=separators, encoding=encoding, default=default,
+        sort_keys=sort_keys, **kw).encode(obj)
+
+
+_default_decoder = JSONDecoder(encoding=None, object_hook=None,
+                               object_pairs_hook=None)
+
+
+def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
     a JSON document) to a Python object.
 
+    If the contents of ``fp`` is encoded with an ASCII based encoding other
+    than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
+    be specified. Encodings that are not ASCII based (such as UCS-2) are
+    not allowed, and should be wrapped with
+    ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
+    object and passed to ``loads()``
+
     ``object_hook`` is an optional function that will be called with the
     result of any object literal decode (a ``dict``). The return value of
     ``object_hook`` will be used instead of the ``dict``. This feature
@@ -284,22 +275,31 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None,
     ``object_pairs_hook`` is an optional function that will be called with the
     result of any object literal decoded with an ordered list of pairs.  The
     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
-    This feature can be used to implement custom decoders.  If ``object_hook``
-    is also defined, the ``object_pairs_hook`` takes priority.
+    This feature can be used to implement custom decoders that rely on the
+    order that the key and value pairs are decoded (for example,
+    collections.OrderedDict will remember the order of insertion). If
+    ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
 
     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
     kwarg; otherwise ``JSONDecoder`` is used.
+
     """
     return loads(fp.read(),
-        cls=cls, object_hook=object_hook,
+        encoding=encoding, cls=cls, object_hook=object_hook,
         parse_float=parse_float, parse_int=parse_int,
-        parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
+        parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
+        **kw)
 
 
-def loads(s, *, cls=None, object_hook=None, parse_float=None,
+def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
-    """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
-    containing a JSON document) to a Python object.
+    """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
+    document) to a Python object.
+
+    If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
+    other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
+    must be specified. Encodings that are not ASCII based (such as UCS-2)
+    are not allowed and should be decoded to ``unicode`` first.
 
     ``object_hook`` is an optional function that will be called with the
     result of any object literal decode (a ``dict``). The return value of
@@ -309,8 +309,10 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
     ``object_pairs_hook`` is an optional function that will be called with the
     result of any object literal decoded with an ordered list of pairs.  The
     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
-    This feature can be used to implement custom decoders.  If ``object_hook``
-    is also defined, the ``object_pairs_hook`` takes priority.
+    This feature can be used to implement custom decoders that rely on the
+    order that the key and value pairs are decoded (for example,
+    collections.OrderedDict will remember the order of insertion). If
+    ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
 
     ``parse_float``, if specified, will be called with the string
     of every JSON float to be decoded. By default this is equivalent to
@@ -330,28 +332,8 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
     kwarg; otherwise ``JSONDecoder`` is used.
 
-    The ``encoding`` argument is ignored and deprecated since Python 3.1.
     """
-    if isinstance(s, str):
-        if s.startswith('\ufeff'):
-            raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
-                                  s, 0)
-    else:
-        if not isinstance(s, (bytes, bytearray)):
-            raise TypeError(f'the JSON object must be str, bytes or bytearray, '
-                            f'not {s.__class__.__name__}')
-        s = s.decode(detect_encoding(s), 'surrogatepass')
-
-    if "encoding" in kw:
-        import warnings
-        warnings.warn(
-            "'encoding' is ignored and deprecated. It will be removed in Python 3.9",
-            DeprecationWarning,
-            stacklevel=2
-        )
-        del kw['encoding']
-
-    if (cls is None and object_hook is None and
+    if (cls is None and encoding is None and object_hook is None and
             parse_int is None and parse_float is None and
             parse_constant is None and object_pairs_hook is None and not kw):
         return _default_decoder.decode(s)
@@ -367,4 +349,4 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
         kw['parse_int'] = parse_int
     if parse_constant is not None:
         kw['parse_constant'] = parse_constant
-    return cls(**kw).decode(s)
+    return cls(encoding=encoding, **kw).decode(s)