summaryrefslogtreecommitdiffstats
path: root/Lib/json/encoder.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/json/encoder.py')
-rw-r--r--Lib/json/encoder.py146
1 files changed, 76 insertions, 70 deletions
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
index c8c78b9..97ffe8e 100644
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
@@ -7,17 +7,13 @@ try:
except ImportError:
c_encode_basestring_ascii = None
try:
- from _json import encode_basestring as c_encode_basestring
-except ImportError:
- c_encode_basestring = None
-try:
from _json import make_encoder as c_make_encoder
except ImportError:
c_make_encoder = None
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
-HAS_UTF8 = re.compile(b'[\x80-\xff]')
+HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = {
'\\': '\\\\',
'"': '\\"',
@@ -32,8 +28,9 @@ for i in range(0x20):
#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
INFINITY = float('inf')
+FLOAT_REPR = float.__repr__
-def py_encode_basestring(s):
+def encode_basestring(s):
"""Return a JSON representation of a Python string
"""
@@ -42,13 +39,12 @@ def py_encode_basestring(s):
return '"' + ESCAPE.sub(replace, s) + '"'
-encode_basestring = (c_encode_basestring or py_encode_basestring)
-
-
def py_encode_basestring_ascii(s):
"""Return an ASCII-only JSON representation of a Python string
"""
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
def replace(match):
s = match.group(0)
try:
@@ -64,7 +60,8 @@ def py_encode_basestring_ascii(s):
s1 = 0xd800 | ((n >> 10) & 0x3ff)
s2 = 0xdc00 | (n & 0x3ff)
return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
- return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
+ #return '\\u%04x\\u%04x' % (s1, s2)
+ return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
encode_basestring_ascii = (
@@ -82,9 +79,9 @@ class JSONEncoder(object):
+-------------------+---------------+
| list, tuple | array |
+-------------------+---------------+
- | str | string |
+ | str, unicode | string |
+-------------------+---------------+
- | int, float | number |
+ | int, long, float | number |
+-------------------+---------------+
| True | true |
+-------------------+---------------+
@@ -101,18 +98,21 @@ class JSONEncoder(object):
"""
item_separator = ', '
key_separator = ': '
- def __init__(self, *, skipkeys=False, ensure_ascii=True,
+ def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False,
- indent=None, separators=None, default=None):
+ indent=None, separators=None, encoding='utf-8', default=None):
"""Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt
- encoding of keys that are not str, int, float or None. If
+ encoding of keys that are not str, int, long, float or None. If
skipkeys is True, such items are simply skipped.
- If ensure_ascii is true, the output is guaranteed to be str
- objects with all incoming non-ASCII characters escaped. If
- ensure_ascii is false, the output can contain non-ASCII characters.
+ If *ensure_ascii* is true (the default), all non-ASCII
+ characters in the output are escaped with \uXXXX sequences,
+ and the results are str instances consisting of ASCII
+ characters only. If ensure_ascii is False, a result may be a
+ unicode instance. This usually happens if the input contains
+ unicode strings or the *encoding* parameter is used.
If check_circular is true, then lists, dicts, and custom encoded
objects will be checked for circular references during encoding to
@@ -131,17 +131,23 @@ class JSONEncoder(object):
If indent is a non-negative integer, then JSON array
elements and object members will be pretty-printed with that
indent level. An indent level of 0 will only insert newlines.
- None is the most compact representation.
+ None is the most compact representation. Since the default
+ item separator is ', ', the output might include trailing
+ whitespace when indent is specified. You can use
+ separators=(',', ': ') to avoid this.
- If specified, separators should be an (item_separator, key_separator)
- tuple. The default is (', ', ': ') if *indent* is ``None`` and
- (',', ': ') otherwise. To get the most compact JSON representation,
- you should specify (',', ':') to eliminate whitespace.
+ If specified, separators should be a (item_separator, key_separator)
+ tuple. The default is (', ', ': '). To get the most compact JSON
+ representation you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
version of the object or raise a ``TypeError``.
+ If encoding is not None, then all input strings will be
+ transformed into unicode using that encoding prior to JSON-encoding.
+ The default is UTF-8.
+
"""
self.skipkeys = skipkeys
@@ -152,10 +158,9 @@ class JSONEncoder(object):
self.indent = indent
if separators is not None:
self.item_separator, self.key_separator = separators
- elif indent is not None:
- self.item_separator = ','
if default is not None:
self.default = default
+ self.encoding = encoding
def default(self, o):
"""Implement this method in a subclass such that it returns
@@ -176,19 +181,22 @@ class JSONEncoder(object):
return JSONEncoder.default(self, o)
"""
- raise TypeError(f'Object of type {o.__class__.__name__} '
- f'is not JSON serializable')
+ raise TypeError(repr(o) + " is not JSON serializable")
def encode(self, o):
"""Return a JSON string representation of a Python data structure.
- >>> from json.encoder import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'
"""
# This is for extremely simple cases and benchmarks.
- if isinstance(o, str):
+ if isinstance(o, basestring):
+ if isinstance(o, str):
+ _encoding = self.encoding
+ if (_encoding is not None
+ and not (_encoding == 'utf-8')):
+ o = o.decode(_encoding)
if self.ensure_ascii:
return encode_basestring_ascii(o)
else:
@@ -219,9 +227,14 @@ class JSONEncoder(object):
_encoder = encode_basestring_ascii
else:
_encoder = encode_basestring
+ if self.encoding != 'utf-8':
+ def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
+ if isinstance(o, str):
+ o = o.decode(_encoding)
+ return _orig_encoder(o)
def floatstr(o, allow_nan=self.allow_nan,
- _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
+ _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on the
# internals.
@@ -244,7 +257,7 @@ class JSONEncoder(object):
if (_one_shot and c_make_encoder is not None
- and self.indent is None):
+ and self.indent is None and not self.sort_keys):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
@@ -260,20 +273,18 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
## HACK: hand-optimized bytecode; turn globals into locals
ValueError=ValueError,
+ basestring=basestring,
dict=dict,
float=float,
id=id,
int=int,
isinstance=isinstance,
list=list,
+ long=long,
str=str,
tuple=tuple,
- _intstr=int.__repr__,
):
- if _indent is not None and not isinstance(_indent, str):
- _indent = ' ' * _indent
-
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
@@ -286,7 +297,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
buf = '['
if _indent is not None:
_current_indent_level += 1
- newline_indent = '\n' + _indent * _current_indent_level
+ newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
separator = _item_separator + newline_indent
buf += newline_indent
else:
@@ -298,7 +309,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
first = False
else:
buf = separator
- if isinstance(value, str):
+ if isinstance(value, basestring):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
@@ -306,13 +317,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield buf + 'true'
elif value is False:
yield buf + 'false'
- elif isinstance(value, int):
- # Subclasses of int/float may override __repr__, but we still
- # want to encode them as integers/floats in JSON. One example
- # within the standard library is IntEnum.
- yield buf + _intstr(value)
+ elif isinstance(value, (int, long)):
+ yield buf + str(value)
elif isinstance(value, float):
- # see comment above for int
yield buf + _floatstr(value)
else:
yield buf
@@ -322,10 +329,11 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
- yield from chunks
+ for chunk in chunks:
+ yield chunk
if newline_indent is not None:
_current_indent_level -= 1
- yield '\n' + _indent * _current_indent_level
+ yield '\n' + (' ' * (_indent * _current_indent_level))
yield ']'
if markers is not None:
del markers[markerid]
@@ -342,7 +350,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield '{'
if _indent is not None:
_current_indent_level += 1
- newline_indent = '\n' + _indent * _current_indent_level
+ newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
item_separator = _item_separator + newline_indent
yield newline_indent
else:
@@ -350,16 +358,15 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
item_separator = _item_separator
first = True
if _sort_keys:
- items = sorted(dct.items())
+ items = sorted(dct.items(), key=lambda kv: kv[0])
else:
- items = dct.items()
+ items = dct.iteritems()
for key, value in items:
- if isinstance(key, str):
+ if isinstance(key, basestring):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
- # see comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
@@ -367,21 +374,19 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
key = 'false'
elif key is None:
key = 'null'
- elif isinstance(key, int):
- # see comment for int/float in _make_iterencode
- key = _intstr(key)
+ elif isinstance(key, (int, long)):
+ key = str(key)
elif _skipkeys:
continue
else:
- raise TypeError(f'keys must be str, int, float, bool or None, '
- f'not {key.__class__.__name__}')
+ raise TypeError("key " + repr(key) + " is not a string")
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
- if isinstance(value, str):
+ if isinstance(value, basestring):
yield _encoder(value)
elif value is None:
yield 'null'
@@ -389,11 +394,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield 'true'
elif value is False:
yield 'false'
- elif isinstance(value, int):
- # see comment for int/float in _make_iterencode
- yield _intstr(value)
+ elif isinstance(value, (int, long)):
+ yield str(value)
elif isinstance(value, float):
- # see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
@@ -402,16 +405,17 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
- yield from chunks
+ for chunk in chunks:
+ yield chunk
if newline_indent is not None:
_current_indent_level -= 1
- yield '\n' + _indent * _current_indent_level
+ yield '\n' + (' ' * (_indent * _current_indent_level))
yield '}'
if markers is not None:
del markers[markerid]
def _iterencode(o, _current_indent_level):
- if isinstance(o, str):
+ if isinstance(o, basestring):
yield _encoder(o)
elif o is None:
yield 'null'
@@ -419,16 +423,16 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield 'true'
elif o is False:
yield 'false'
- elif isinstance(o, int):
- # see comment for int/float in _make_iterencode
- yield _intstr(o)
+ elif isinstance(o, (int, long)):
+ yield str(o)
elif isinstance(o, float):
- # see comment for int/float in _make_iterencode
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
- yield from _iterencode_list(o, _current_indent_level)
+ for chunk in _iterencode_list(o, _current_indent_level):
+ yield chunk
elif isinstance(o, dict):
- yield from _iterencode_dict(o, _current_indent_level)
+ for chunk in _iterencode_dict(o, _current_indent_level):
+ yield chunk
else:
if markers is not None:
markerid = id(o)
@@ -436,7 +440,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
- yield from _iterencode(o, _current_indent_level)
+ for chunk in _iterencode(o, _current_indent_level):
+ yield chunk
if markers is not None:
del markers[markerid]
+
return _iterencode