1 files changed, 76 insertions, 70 deletions
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
index c8c78b9..97ffe8e 100644
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
@@ -7,17 +7,13 @@ try:
 except ImportError:
     c_encode_basestring_ascii = None
 try:
-    from _json import encode_basestring as c_encode_basestring
-except ImportError:
-    c_encode_basestring = None
-try:
     from _json import make_encoder as c_make_encoder
 except ImportError:
     c_make_encoder = None
 
 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
-HAS_UTF8 = re.compile(b'[\x80-\xff]')
+HAS_UTF8 = re.compile(r'[\x80-\xff]')
 ESCAPE_DCT = {
     '\\': '\\\\',
     '"': '\\"',
@@ -32,8 +28,9 @@ for i in range(0x20):
     #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
 
 INFINITY = float('inf')
+FLOAT_REPR = float.__repr__
 
-def py_encode_basestring(s):
+def encode_basestring(s):
     """Return a JSON representation of a Python string
 
     """
@@ -42,13 +39,12 @@ def py_encode_basestring(s):
     return '"' + ESCAPE.sub(replace, s) + '"'
 
 
-encode_basestring = (c_encode_basestring or py_encode_basestring)
-
-
 def py_encode_basestring_ascii(s):
     """Return an ASCII-only JSON representation of a Python string
 
     """
+    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+        s = s.decode('utf-8')
     def replace(match):
         s = match.group(0)
         try:
@@ -64,7 +60,8 @@ def py_encode_basestring_ascii(s):
                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
                 s2 = 0xdc00 | (n & 0x3ff)
                 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
-    return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
+                #return '\\u%04x\\u%04x' % (s1, s2)
+    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
 
 
 encode_basestring_ascii = (
@@ -82,9 +79,9 @@ class JSONEncoder(object):
     +-------------------+---------------+
     | list, tuple       | array         |
     +-------------------+---------------+
-    | str               | string        |
+    | str, unicode      | string        |
     +-------------------+---------------+
-    | int, float        | number        |
+    | int, long, float  | number        |
     +-------------------+---------------+
     | True              | true          |
     +-------------------+---------------+
@@ -101,18 +98,21 @@ class JSONEncoder(object):
     """
     item_separator = ', '
     key_separator = ': '
-    def __init__(self, *, skipkeys=False, ensure_ascii=True,
+    def __init__(self, skipkeys=False, ensure_ascii=True,
             check_circular=True, allow_nan=True, sort_keys=False,
-            indent=None, separators=None, default=None):
+            indent=None, separators=None, encoding='utf-8', default=None):
         """Constructor for JSONEncoder, with sensible defaults.
 
         If skipkeys is false, then it is a TypeError to attempt
-        encoding of keys that are not str, int, float or None.  If
+        encoding of keys that are not str, int, long, float or None.  If
         skipkeys is True, such items are simply skipped.
 
-        If ensure_ascii is true, the output is guaranteed to be str
-        objects with all incoming non-ASCII characters escaped.  If
-        ensure_ascii is false, the output can contain non-ASCII characters.
+        If *ensure_ascii* is true (the default), all non-ASCII
+        characters in the output are escaped with \uXXXX sequences,
+        and the results are str instances consisting of ASCII
+        characters only.  If ensure_ascii is False, a result may be a
+        unicode instance.  This usually happens if the input contains
+        unicode strings or the *encoding* parameter is used.
 
         If check_circular is true, then lists, dicts, and custom encoded
         objects will be checked for circular references during encoding to
@@ -131,17 +131,23 @@ class JSONEncoder(object):
         If indent is a non-negative integer, then JSON array
         elements and object members will be pretty-printed with that
         indent level.  An indent level of 0 will only insert newlines.
-        None is the most compact representation.
+        None is the most compact representation.  Since the default
+        item separator is ', ',  the output might include trailing
+        whitespace when indent is specified.  You can use
+        separators=(',', ': ') to avoid this.
 
-        If specified, separators should be an (item_separator, key_separator)
-        tuple.  The default is (', ', ': ') if *indent* is ``None`` and
-        (',', ': ') otherwise.  To get the most compact JSON representation,
-        you should specify (',', ':') to eliminate whitespace.
+        If specified, separators should be a (item_separator, key_separator)
+        tuple.  The default is (', ', ': ').  To get the most compact JSON
+        representation you should specify (',', ':') to eliminate whitespace.
 
         If specified, default is a function that gets called for objects
         that can't otherwise be serialized.  It should return a JSON encodable
         version of the object or raise a ``TypeError``.
 
+        If encoding is not None, then all input strings will be
+        transformed into unicode using that encoding prior to JSON-encoding.
+        The default is UTF-8.
+
         """
 
         self.skipkeys = skipkeys
@@ -152,10 +158,9 @@ class JSONEncoder(object):
         self.indent = indent
         if separators is not None:
             self.item_separator, self.key_separator = separators
-        elif indent is not None:
-            self.item_separator = ','
         if default is not None:
             self.default = default
+        self.encoding = encoding
 
     def default(self, o):
         """Implement this method in a subclass such that it returns
@@ -176,19 +181,22 @@ class JSONEncoder(object):
                 return JSONEncoder.default(self, o)
 
         """
-        raise TypeError(f'Object of type {o.__class__.__name__} '
-                        f'is not JSON serializable')
+        raise TypeError(repr(o) + " is not JSON serializable")
 
     def encode(self, o):
         """Return a JSON string representation of a Python data structure.
 
-        >>> from json.encoder import JSONEncoder
         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
         '{"foo": ["bar", "baz"]}'
 
         """
         # This is for extremely simple cases and benchmarks.
-        if isinstance(o, str):
+        if isinstance(o, basestring):
+            if isinstance(o, str):
+                _encoding = self.encoding
+                if (_encoding is not None
+                        and not (_encoding == 'utf-8')):
+                    o = o.decode(_encoding)
             if self.ensure_ascii:
                 return encode_basestring_ascii(o)
             else:
@@ -219,9 +227,14 @@ class JSONEncoder(object):
             _encoder = encode_basestring_ascii
         else:
             _encoder = encode_basestring
+        if self.encoding != 'utf-8':
+            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
+                if isinstance(o, str):
+                    o = o.decode(_encoding)
+                return _orig_encoder(o)
 
         def floatstr(o, allow_nan=self.allow_nan,
-                _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
+                _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
             # Check for specials.  Note that this type of test is processor
             # and/or platform-specific, so do tests which don't depend on the
             # internals.
@@ -244,7 +257,7 @@ class JSONEncoder(object):
 
 
         if (_one_shot and c_make_encoder is not None
-                and self.indent is None):
+                and self.indent is None and not self.sort_keys):
             _iterencode = c_make_encoder(
                 markers, self.default, _encoder, self.indent,
                 self.key_separator, self.item_separator, self.sort_keys,
@@ -260,20 +273,18 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
         ## HACK: hand-optimized bytecode; turn globals into locals
         ValueError=ValueError,
+        basestring=basestring,
         dict=dict,
         float=float,
         id=id,
         int=int,
         isinstance=isinstance,
         list=list,
+        long=long,
         str=str,
         tuple=tuple,
-        _intstr=int.__repr__,
     ):
 
-    if _indent is not None and not isinstance(_indent, str):
-        _indent = ' ' * _indent
-
     def _iterencode_list(lst, _current_indent_level):
         if not lst:
             yield '[]'
@@ -286,7 +297,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
         buf = '['
         if _indent is not None:
             _current_indent_level += 1
-            newline_indent = '\n' + _indent * _current_indent_level
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
             separator = _item_separator + newline_indent
             buf += newline_indent
         else:
@@ -298,7 +309,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                 first = False
             else:
                 buf = separator
-            if isinstance(value, str):
+            if isinstance(value, basestring):
                 yield buf + _encoder(value)
             elif value is None:
                 yield buf + 'null'
@@ -306,13 +317,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                 yield buf + 'true'
             elif value is False:
                 yield buf + 'false'
-            elif isinstance(value, int):
-                # Subclasses of int/float may override __repr__, but we still
-                # want to encode them as integers/floats in JSON. One example
-                # within the standard library is IntEnum.
-                yield buf + _intstr(value)
+            elif isinstance(value, (int, long)):
+                yield buf + str(value)
             elif isinstance(value, float):
-                # see comment above for int
                 yield buf + _floatstr(value)
             else:
                 yield buf
@@ -322,10 +329,11 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                     chunks = _iterencode_dict(value, _current_indent_level)
                 else:
                     chunks = _iterencode(value, _current_indent_level)
-                yield from chunks
+                for chunk in chunks:
+                    yield chunk
         if newline_indent is not None:
             _current_indent_level -= 1
-            yield '\n' + _indent * _current_indent_level
+            yield '\n' + (' ' * (_indent * _current_indent_level))
         yield ']'
         if markers is not None:
             del markers[markerid]
@@ -342,7 +350,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
         yield '{'
         if _indent is not None:
             _current_indent_level += 1
-            newline_indent = '\n' + _indent * _current_indent_level
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
             item_separator = _item_separator + newline_indent
             yield newline_indent
         else:
@@ -350,16 +358,15 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
             item_separator = _item_separator
         first = True
         if _sort_keys:
-            items = sorted(dct.items())
+            items = sorted(dct.items(), key=lambda kv: kv[0])
         else:
-            items = dct.items()
+            items = dct.iteritems()
         for key, value in items:
-            if isinstance(key, str):
+            if isinstance(key, basestring):
                 pass
             # JavaScript is weakly typed for these, so it makes sense to
             # also allow them.  Many encoders seem to do something like this.
             elif isinstance(key, float):
-                # see comment for int/float in _make_iterencode
                 key = _floatstr(key)
             elif key is True:
                 key = 'true'
@@ -367,21 +374,19 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                 key = 'false'
             elif key is None:
                 key = 'null'
-            elif isinstance(key, int):
-                # see comment for int/float in _make_iterencode
-                key = _intstr(key)
+            elif isinstance(key, (int, long)):
+                key = str(key)
             elif _skipkeys:
                 continue
             else:
-                raise TypeError(f'keys must be str, int, float, bool or None, '
-                                f'not {key.__class__.__name__}')
+                raise TypeError("key " + repr(key) + " is not a string")
             if first:
                 first = False
             else:
                 yield item_separator
             yield _encoder(key)
             yield _key_separator
-            if isinstance(value, str):
+            if isinstance(value, basestring):
                 yield _encoder(value)
             elif value is None:
                 yield 'null'
@@ -389,11 +394,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                 yield 'true'
             elif value is False:
                 yield 'false'
-            elif isinstance(value, int):
-                # see comment for int/float in _make_iterencode
-                yield _intstr(value)
+            elif isinstance(value, (int, long)):
+                yield str(value)
             elif isinstance(value, float):
-                # see comment for int/float in _make_iterencode
                 yield _floatstr(value)
             else:
                 if isinstance(value, (list, tuple)):
@@ -402,16 +405,17 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                     chunks = _iterencode_dict(value, _current_indent_level)
                 else:
                     chunks = _iterencode(value, _current_indent_level)
-                yield from chunks
+                for chunk in chunks:
+                    yield chunk
         if newline_indent is not None:
             _current_indent_level -= 1
-            yield '\n' + _indent * _current_indent_level
+            yield '\n' + (' ' * (_indent * _current_indent_level))
         yield '}'
         if markers is not None:
             del markers[markerid]
 
     def _iterencode(o, _current_indent_level):
-        if isinstance(o, str):
+        if isinstance(o, basestring):
             yield _encoder(o)
         elif o is None:
             yield 'null'
@@ -419,16 +423,16 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
             yield 'true'
         elif o is False:
             yield 'false'
-        elif isinstance(o, int):
-            # see comment for int/float in _make_iterencode
-            yield _intstr(o)
+        elif isinstance(o, (int, long)):
+            yield str(o)
         elif isinstance(o, float):
-            # see comment for int/float in _make_iterencode
             yield _floatstr(o)
         elif isinstance(o, (list, tuple)):
-            yield from _iterencode_list(o, _current_indent_level)
+            for chunk in _iterencode_list(o, _current_indent_level):
+                yield chunk
         elif isinstance(o, dict):
-            yield from _iterencode_dict(o, _current_indent_level)
+            for chunk in _iterencode_dict(o, _current_indent_level):
+                yield chunk
         else:
             if markers is not None:
                 markerid = id(o)
@@ -436,7 +440,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                     raise ValueError("Circular reference detected")
                 markers[markerid] = o
             o = _default(o)
-            yield from _iterencode(o, _current_indent_level)
+            for chunk in _iterencode(o, _current_indent_level):
+                yield chunk
             if markers is not None:
                 del markers[markerid]
+
     return _iterencode