diff options
Diffstat (limited to 'Lib/json/encoder.py')
-rw-r--r-- | Lib/json/encoder.py | 146 |
1 files changed, 76 insertions, 70 deletions
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index c8c78b9..97ffe8e 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -7,17 +7,13 @@ try: except ImportError: c_encode_basestring_ascii = None try: - from _json import encode_basestring as c_encode_basestring -except ImportError: - c_encode_basestring = None -try: from _json import make_encoder as c_make_encoder except ImportError: c_make_encoder = None ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') -HAS_UTF8 = re.compile(b'[\x80-\xff]') +HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { '\\': '\\\\', '"': '\\"', @@ -32,8 +28,9 @@ for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) INFINITY = float('inf') +FLOAT_REPR = float.__repr__ -def py_encode_basestring(s): +def encode_basestring(s): """Return a JSON representation of a Python string """ @@ -42,13 +39,12 @@ def py_encode_basestring(s): return '"' + ESCAPE.sub(replace, s) + '"' -encode_basestring = (c_encode_basestring or py_encode_basestring) - - def py_encode_basestring_ascii(s): """Return an ASCII-only JSON representation of a Python string """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -64,7 +60,8 @@ def py_encode_basestring_ascii(s): s1 = 0xd800 | ((n >> 10) & 0x3ff) s2 = 0xdc00 | (n & 0x3ff) return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) - return '"' + ESCAPE_ASCII.sub(replace, s) + '"' + #return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' encode_basestring_ascii = ( @@ -82,9 +79,9 @@ class JSONEncoder(object): +-------------------+---------------+ | list, tuple | array | +-------------------+---------------+ - | str | string | + | str, unicode | string | +-------------------+---------------+ - | int, float | number | + | int, long, float | number | +-------------------+---------------+ | True | true | +-------------------+---------------+ @@ -101,18 +98,21 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' - def __init__(self, *, skipkeys=False, ensure_ascii=True, + def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, default=None): + indent=None, separators=None, encoding='utf-8', default=None): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt - encoding of keys that are not str, int, float or None. If + encoding of keys that are not str, int, long, float or None. If skipkeys is True, such items are simply skipped. - If ensure_ascii is true, the output is guaranteed to be str - objects with all incoming non-ASCII characters escaped. If - ensure_ascii is false, the output can contain non-ASCII characters. + If *ensure_ascii* is true (the default), all non-ASCII + characters in the output are escaped with \uXXXX sequences, + and the results are str instances consisting of ASCII + characters only. If ensure_ascii is False, a result may be a + unicode instance. This usually happens if the input contains + unicode strings or the *encoding* parameter is used. If check_circular is true, then lists, dicts, and custom encoded objects will be checked for circular references during encoding to @@ -131,17 +131,23 @@ class JSONEncoder(object): If indent is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. + None is the most compact representation. Since the default + item separator is ', ', the output might include trailing + whitespace when indent is specified. You can use + separators=(',', ': ') to avoid this. - If specified, separators should be an (item_separator, key_separator) - tuple. The default is (', ', ': ') if *indent* is ``None`` and - (',', ': ') otherwise. To get the most compact JSON representation, - you should specify (',', ':') to eliminate whitespace. + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable version of the object or raise a ``TypeError``. + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + """ self.skipkeys = skipkeys @@ -152,10 +158,9 @@ class JSONEncoder(object): self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators - elif indent is not None: - self.item_separator = ',' if default is not None: self.default = default + self.encoding = encoding def default(self, o): """Implement this method in a subclass such that it returns @@ -176,19 +181,22 @@ class JSONEncoder(object): return JSONEncoder.default(self, o) """ - raise TypeError(f'Object of type {o.__class__.__name__} ' - f'is not JSON serializable') + raise TypeError(repr(o) + " is not JSON serializable") def encode(self, o): """Return a JSON string representation of a Python data structure. - >>> from json.encoder import JSONEncoder >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) '{"foo": ["bar", "baz"]}' """ # This is for extremely simple cases and benchmarks. - if isinstance(o, str): + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -219,9 +227,14 @@ class JSONEncoder(object): _encoder = encode_basestring_ascii else: _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) def floatstr(o, allow_nan=self.allow_nan, - _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): + _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): # Check for specials. Note that this type of test is processor # and/or platform-specific, so do tests which don't depend on the # internals. @@ -244,7 +257,7 @@ class JSONEncoder(object): if (_one_shot and c_make_encoder is not None - and self.indent is None): + and self.indent is None and not self.sort_keys): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, @@ -260,20 +273,18 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, ## HACK: hand-optimized bytecode; turn globals into locals ValueError=ValueError, + basestring=basestring, dict=dict, float=float, id=id, int=int, isinstance=isinstance, list=list, + long=long, str=str, tuple=tuple, - _intstr=int.__repr__, ): - if _indent is not None and not isinstance(_indent, str): - _indent = ' ' * _indent - def _iterencode_list(lst, _current_indent_level): if not lst: yield '[]' @@ -286,7 +297,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, buf = '[' if _indent is not None: _current_indent_level += 1 - newline_indent = '\n' + _indent * _current_indent_level + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) separator = _item_separator + newline_indent buf += newline_indent else: @@ -298,7 +309,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, first = False else: buf = separator - if isinstance(value, str): + if isinstance(value, basestring): yield buf + _encoder(value) elif value is None: yield buf + 'null' @@ -306,13 +317,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield buf + 'true' elif value is False: yield buf + 'false' - elif isinstance(value, int): - # Subclasses of int/float may override __repr__, but we still - # want to encode them as integers/floats in JSON. One example - # within the standard library is IntEnum. - yield buf + _intstr(value) + elif isinstance(value, (int, long)): + yield buf + str(value) elif isinstance(value, float): - # see comment above for int yield buf + _floatstr(value) else: yield buf @@ -322,10 +329,11 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) - yield from chunks + for chunk in chunks: + yield chunk if newline_indent is not None: _current_indent_level -= 1 - yield '\n' + _indent * _current_indent_level + yield '\n' + (' ' * (_indent * _current_indent_level)) yield ']' if markers is not None: del markers[markerid] @@ -342,7 +350,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield '{' if _indent is not None: _current_indent_level += 1 - newline_indent = '\n' + _indent * _current_indent_level + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) item_separator = _item_separator + newline_indent yield newline_indent else: @@ -350,16 +358,15 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, item_separator = _item_separator first = True if _sort_keys: - items = sorted(dct.items()) + items = sorted(dct.items(), key=lambda kv: kv[0]) else: - items = dct.items() + items = dct.iteritems() for key, value in items: - if isinstance(key, str): + if isinstance(key, basestring): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. elif isinstance(key, float): - # see comment for int/float in _make_iterencode key = _floatstr(key) elif key is True: key = 'true' @@ -367,21 +374,19 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, key = 'false' elif key is None: key = 'null' - elif isinstance(key, int): - # see comment for int/float in _make_iterencode - key = _intstr(key) + elif isinstance(key, (int, long)): + key = str(key) elif _skipkeys: continue else: - raise TypeError(f'keys must be str, int, float, bool or None, ' - f'not {key.__class__.__name__}') + raise TypeError("key " + repr(key) + " is not a string") if first: first = False else: yield item_separator yield _encoder(key) yield _key_separator - if isinstance(value, str): + if isinstance(value, basestring): yield _encoder(value) elif value is None: yield 'null' @@ -389,11 +394,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif value is False: yield 'false' - elif isinstance(value, int): - # see comment for int/float in _make_iterencode - yield _intstr(value) + elif isinstance(value, (int, long)): + yield str(value) elif isinstance(value, float): - # see comment for int/float in _make_iterencode yield _floatstr(value) else: if isinstance(value, (list, tuple)): @@ -402,16 +405,17 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) - yield from chunks + for chunk in chunks: + yield chunk if newline_indent is not None: _current_indent_level -= 1 - yield '\n' + _indent * _current_indent_level + yield '\n' + (' ' * (_indent * _current_indent_level)) yield '}' if markers is not None: del markers[markerid] def _iterencode(o, _current_indent_level): - if isinstance(o, str): + if isinstance(o, basestring): yield _encoder(o) elif o is None: yield 'null' @@ -419,16 +423,16 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif o is False: yield 'false' - elif isinstance(o, int): - # see comment for int/float in _make_iterencode - yield _intstr(o) + elif isinstance(o, (int, long)): + yield str(o) elif isinstance(o, float): - # see comment for int/float in _make_iterencode yield _floatstr(o) elif isinstance(o, (list, tuple)): - yield from _iterencode_list(o, _current_indent_level) + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk elif isinstance(o, dict): - yield from _iterencode_dict(o, _current_indent_level) + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk else: if markers is not None: markerid = id(o) @@ -436,7 +440,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, raise ValueError("Circular reference detected") markers[markerid] = o o = _default(o) - yield from _iterencode(o, _current_indent_level) + for chunk in _iterencode(o, _current_indent_level): + yield chunk if markers is not None: del markers[markerid] + return _iterencode |