diff options
Diffstat (limited to 'Lib/json')
-rw-r--r-- | Lib/json/__init__.py | 212 | ||||
-rw-r--r-- | Lib/json/decoder.py | 185 | ||||
-rw-r--r-- | Lib/json/encoder.py | 146 | ||||
-rw-r--r-- | Lib/json/scanner.py | 20 | ||||
-rw-r--r-- | Lib/json/tests/__init__.py | 73 | ||||
-rw-r--r-- | Lib/json/tests/test_check_circular.py | 34 | ||||
-rw-r--r-- | Lib/json/tests/test_decode.py | 69 | ||||
-rw-r--r-- | Lib/json/tests/test_default.py | 12 | ||||
-rw-r--r-- | Lib/json/tests/test_dump.py | 32 | ||||
-rw-r--r-- | Lib/json/tests/test_encode_basestring_ascii.py | 41 | ||||
-rw-r--r-- | Lib/json/tests/test_fail.py | 105 | ||||
-rw-r--r-- | Lib/json/tests/test_float.py | 48 | ||||
-rw-r--r-- | Lib/json/tests/test_indent.py | 60 | ||||
-rw-r--r-- | Lib/json/tests/test_pass1.py | 75 | ||||
-rw-r--r-- | Lib/json/tests/test_pass2.py | 18 | ||||
-rw-r--r-- | Lib/json/tests/test_pass3.py | 24 | ||||
-rw-r--r-- | Lib/json/tests/test_recursion.py | 108 | ||||
-rw-r--r-- | Lib/json/tests/test_scanstring.py | 157 | ||||
-rw-r--r-- | Lib/json/tests/test_separators.py | 44 | ||||
-rw-r--r-- | Lib/json/tests/test_speedups.py | 51 | ||||
-rw-r--r-- | Lib/json/tests/test_tool.py | 69 | ||||
-rw-r--r-- | Lib/json/tests/test_unicode.py | 89 | ||||
-rw-r--r-- | Lib/json/tool.py | 73 |
23 files changed, 1414 insertions, 331 deletions
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 1ba8b48..ce62361 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -3,23 +3,26 @@ JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data interchange format. :mod:`json` exposes an API familiar to users of the standard library -:mod:`marshal` and :mod:`pickle` modules. It is derived from a -version of the externally maintained simplejson library. +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. Encoding basic Python object hierarchies:: >>> import json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print(json.dumps("\"foo\bar")) + >>> print json.dumps("\"foo\bar") "\"foo\bar" - >>> print(json.dumps('\u1234')) + >>> print json.dumps(u'\u1234') "\u1234" - >>> print(json.dumps('\\')) + >>> print json.dumps('\\') "\\" - >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) {"a": 0, "b": 0, "c": 0} - >>> from io import StringIO + >>> from StringIO import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -28,14 +31,14 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import json - >>> mydict = {'4': 5, '6': 7} - >>> json.dumps([1,2,3,mydict], separators=(',', ':')) + >>> json.dumps([1,2,3,{'4': 5, '6': 7}], sort_keys=True, separators=(',',':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import json - >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) + >>> print json.dumps({'4': 5, '6': 7}, sort_keys=True, + ... indent=4, separators=(',', ': ')) { "4": 5, "6": 7 @@ -44,12 +47,12 @@ Pretty printing:: Decoding JSON:: >>> import json - >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}] + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj True - >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar' + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from io import StringIO + >>> from StringIO import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -75,8 +78,7 @@ Specializing JSON object encoding:: >>> def encode_complex(obj): ... if isinstance(obj, complex): ... return [obj.real, obj.imag] - ... raise TypeError(f'Object of type {obj.__class__.__name__} ' - ... f'is not JSON serializable') + ... raise TypeError(repr(obj) + " is not JSON serializable") ... >>> json.dumps(2 + 1j, default=encode_complex) '[2.0, 1.0]' @@ -98,14 +100,13 @@ Using json.tool from the shell to validate and pretty-print:: __version__ = '2.0.9' __all__ = [ 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'JSONDecoder', 'JSONEncoder', ] __author__ = 'Bob Ippolito <bob@redivi.com>' -from .decoder import JSONDecoder, JSONDecodeError +from .decoder import JSONDecoder from .encoder import JSONEncoder -import codecs _default_encoder = JSONEncoder( skipkeys=False, @@ -114,22 +115,28 @@ _default_encoder = JSONEncoder( allow_nan=True, indent=None, separators=None, + encoding='utf-8', default=None, ) -def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + encoding='utf-8', default=None, sort_keys=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped - instead of raising a ``TypeError``. - - If ``ensure_ascii`` is false, then the strings written to ``fp`` can - contain non-ASCII characters if they appear in strings contained in - ``obj``. Otherwise, all such characters are escaped in JSON strings. + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is true (the default), all non-ASCII characters in the + output are escaped with ``\uXXXX`` sequences, and the result is a ``str`` + instance consisting of ASCII characters only. If ``ensure_ascii`` is + false, some chunks written to ``fp`` may be ``unicode`` instances. + This usually happens because the input contains unicode strings or the + ``encoding`` parameter is used. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter``) this is likely to + cause an error. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -143,12 +150,15 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, If ``indent`` is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. ``None`` is the most compact - representation. + representation. Since the default item separator is ``', '``, the + output might include trailing whitespace when ``indent`` is specified. + You can use ``separators=(',', ': ')`` to avoid this. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. - If specified, ``separators`` should be an ``(item_separator, key_separator)`` - tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and - ``(',', ': ')`` otherwise. To get the most compact JSON representation, - you should specify ``(',', ':')`` to eliminate whitespace. + ``encoding`` is the character encoding for str instances, default is UTF-8. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. @@ -165,14 +175,14 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - default is None and not sort_keys and not kw): + encoding == 'utf-8' and default is None and not sort_keys and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: cls = JSONEncoder iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, + separators=separators, encoding=encoding, default=default, sort_keys=sort_keys, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost @@ -180,18 +190,18 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, fp.write(chunk) -def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + encoding='utf-8', default=None, sort_keys=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped - instead of raising a ``TypeError``. + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + - If ``ensure_ascii`` is false, then the return value can contain non-ASCII - characters if they appear in strings contained in ``obj``. Otherwise, all - such characters are escaped in JSON strings. + If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and + the return value may be a ``unicode`` instance. See ``dump`` for details. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -205,12 +215,15 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, If ``indent`` is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. ``None`` is the most compact - representation. + representation. Since the default item separator is ``', '``, the + output might include trailing whitespace when ``indent`` is specified. + You can use ``separators=(',', ': ')`` to avoid this. - If specified, ``separators`` should be an ``(item_separator, key_separator)`` - tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and - ``(',', ': ')`` otherwise. To get the most compact JSON representation, - you should specify ``(',', ':')`` to eliminate whitespace. + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. @@ -227,55 +240,33 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - default is None and not sort_keys and not kw): + encoding == 'utf-8' and default is None and not sort_keys and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder return cls( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, default=default, sort_keys=sort_keys, - **kw).encode(obj) - - -_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) - - -def detect_encoding(b): - bstartswith = b.startswith - if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): - return 'utf-32' - if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): - return 'utf-16' - if bstartswith(codecs.BOM_UTF8): - return 'utf-8-sig' - - if len(b) >= 4: - if not b[0]: - # 00 00 -- -- - utf-32-be - # 00 XX -- -- - utf-16-be - return 'utf-16-be' if b[1] else 'utf-32-be' - if not b[1]: - # XX 00 00 00 - utf-32-le - # XX 00 00 XX - utf-16-le - # XX 00 XX -- - utf-16-le - return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' - elif len(b) == 2: - if not b[0]: - # 00 XX - utf-16-be - return 'utf-16-be' - if not b[1]: - # XX 00 - utf-16-le - return 'utf-16-le' - # default - return 'utf-8' - - -def load(fp, *, cls=None, object_hook=None, parse_float=None, + separators=separators, encoding=encoding, default=default, + sort_keys=sort_keys, **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + ``object_hook`` is an optional function that will be called with the result of any object literal decode (a ``dict``). The return value of ``object_hook`` will be used instead of the ``dict``. This feature @@ -284,22 +275,31 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, ``object_pairs_hook`` is an optional function that will be called with the result of any object literal decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. If ``object_hook`` - is also defined, the ``object_pairs_hook`` takes priority. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. + """ return loads(fp.read(), - cls=cls, object_hook=object_hook, + encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw) + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + **kw) -def loads(s, *, cls=None, object_hook=None, parse_float=None, +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): - """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance - containing a JSON document) to a Python object. + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. ``object_hook`` is an optional function that will be called with the result of any object literal decode (a ``dict``). The return value of @@ -309,8 +309,10 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, ``object_pairs_hook`` is an optional function that will be called with the result of any object literal decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. If ``object_hook`` - is also defined, the ``object_pairs_hook`` takes priority. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to @@ -330,28 +332,8 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. - The ``encoding`` argument is ignored and deprecated since Python 3.1. """ - if isinstance(s, str): - if s.startswith('\ufeff'): - raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", - s, 0) - else: - if not isinstance(s, (bytes, bytearray)): - raise TypeError(f'the JSON object must be str, bytes or bytearray, ' - f'not {s.__class__.__name__}') - s = s.decode(detect_encoding(s), 'surrogatepass') - - if "encoding" in kw: - import warnings - warnings.warn( - "'encoding' is ignored and deprecated. It will be removed in Python 3.9", - DeprecationWarning, - stacklevel=2 - ) - del kw['encoding'] - - if (cls is None and object_hook is None and + if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None and not kw): return _default_decoder.decode(s) @@ -367,4 +349,4 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant - return cls(**kw).decode(s) + return cls(encoding=encoding, **kw).decode(s) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index d7d8244..5141f87 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -1,6 +1,8 @@ """Implementation of JSONDecoder """ import re +import sys +import struct from json import scanner try: @@ -8,39 +10,40 @@ try: except ImportError: c_scanstring = None -__all__ = ['JSONDecoder', 'JSONDecodeError'] +__all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL -NaN = float('nan') -PosInf = float('inf') -NegInf = float('-inf') +def _floatconstants(): + nan, = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00') + inf, = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00') + return nan, inf, -inf +NaN, PosInf, NegInf = _floatconstants() -class JSONDecodeError(ValueError): - """Subclass of ValueError with the following additional properties: - msg: The unformatted error message - doc: The JSON document being parsed - pos: The start index of doc where parsing failed - lineno: The line corresponding to pos - colno: The column corresponding to pos +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + 1 + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno - """ - # Note that this exception is used from _json - def __init__(self, msg, doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - colno = pos - doc.rfind('\n', 0, pos) - errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) - ValueError.__init__(self, errmsg) - self.msg = msg - self.doc = doc - self.pos = pos - self.lineno = lineno - self.colno = colno - - def __reduce__(self): - return self.__class__, (self.msg, self.doc, self.pos) + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _json + lineno, colno = linecol(doc, pos) + if end is None: + fmt = '{0}: line {1} column {2} (char {3})' + return fmt.format(msg, lineno, colno, pos) + #fmt = '%s: line %d column %d (char %d)' + #return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) _CONSTANTS = { @@ -49,13 +52,14 @@ _CONSTANTS = { 'NaN': NaN, } - STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': '"', '\\': '\\', '/': '/', - 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', } +DEFAULT_ENCODING = "utf-8" + def _decode_uXXXX(s, pos): esc = s[pos + 1:pos + 5] if len(esc) == 4 and esc[1] not in 'xX': @@ -64,9 +68,9 @@ def _decode_uXXXX(s, pos): except ValueError: pass msg = "Invalid \\uXXXX escape" - raise JSONDecodeError(msg, s, pos) + raise ValueError(errmsg(msg, s, pos)) -def py_scanstring(s, end, strict=True, +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. @@ -76,17 +80,22 @@ def py_scanstring(s, end, strict=True, Returns a tuple of the decoded string and the index of the character in s after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING chunks = [] _append = chunks.append begin = end - 1 while 1: chunk = _m(s, end) if chunk is None: - raise JSONDecodeError("Unterminated string starting at", s, begin) + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -96,34 +105,38 @@ def py_scanstring(s, end, strict=True, if strict: #msg = "Invalid control character %r at" % (terminator,) msg = "Invalid control character {0!r} at".format(terminator) - raise JSONDecodeError(msg, s, end) + raise ValueError(errmsg(msg, s, end)) else: _append(terminator) continue try: esc = s[end] except IndexError: - raise JSONDecodeError("Unterminated string starting at", - s, begin) from None + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: {0!r}".format(esc) - raise JSONDecodeError(msg, s, end) + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) end += 1 else: + # Unicode escape sequence uni = _decode_uXXXX(s, end) end += 5 - if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': + # Check for surrogate pair on UCS-4 systems + if sys.maxunicode > 65535 and \ + 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': uni2 = _decode_uXXXX(s, end + 1) if 0xdc00 <= uni2 <= 0xdfff: uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) end += 6 - char = chr(uni) + char = unichr(uni) + # Append the unescaped character _append(char) - return ''.join(chunks), end + return u''.join(chunks), end # Use speedup if available @@ -132,16 +145,11 @@ scanstring = c_scanstring or py_scanstring WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' - -def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, - memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, + object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end pairs = [] pairs_append = pairs.append - # Backwards compatibility - if memo is None: - memo = {} - memo_get = memo.setdefault # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -160,18 +168,18 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise JSONDecodeError( - "Expecting property name enclosed in double quotes", s, end) + raise ValueError(errmsg( + "Expecting property name enclosed in double quotes", s, end)) end += 1 while True: - key, end = scanstring(s, end, strict) - key = memo_get(key, key) + key, end = scanstring(s, end, encoding, strict) + # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise JSONDecodeError("Expecting ':' delimiter", s, end) + raise ValueError(errmsg("Expecting ':' delimiter", s, end)) end += 1 try: @@ -184,9 +192,10 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, try: value, end = scan_once(s, end) - except StopIteration as err: - raise JSONDecodeError("Expecting value", s, err.value) from None + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) pairs_append((key, value)) + try: nextchar = s[end] if nextchar in _ws: @@ -199,13 +208,23 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, if nextchar == '}': break elif nextchar != ',': - raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) - end = _w(s, end).end() - nextchar = s[end:end + 1] + raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 if nextchar != '"': - raise JSONDecodeError( - "Expecting property name enclosed in double quotes", s, end - 1) + raise ValueError(errmsg( + "Expecting property name enclosed in double quotes", s, end - 1)) if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end @@ -228,8 +247,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): while True: try: value, end = scan_once(s, end) - except StopIteration as err: - raise JSONDecodeError("Expecting value", s, err.value) from None + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -239,7 +258,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) + raise ValueError(errmsg("Expecting ',' delimiter", s, end)) try: if s[end] in _ws: end += 1 @@ -250,7 +269,6 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): return values, end - class JSONDecoder(object): """Simple JSON <http://json.org> decoder @@ -263,9 +281,9 @@ class JSONDecoder(object): +---------------+-------------------+ | array | list | +---------------+-------------------+ - | string | str | + | string | unicode | +---------------+-------------------+ - | number (int) | int | + | number (int) | int, long | +---------------+-------------------+ | number (real) | float | +---------------+-------------------+ @@ -281,10 +299,17 @@ class JSONDecoder(object): """ - def __init__(self, *, object_hook=None, parse_float=None, + def __init__(self, encoding=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None): - """``object_hook``, if specified, will be called with the result + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result of every JSON object decoded and its return value will be used in place of the given ``dict``. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). @@ -292,8 +317,10 @@ class JSONDecoder(object): ``object_pairs_hook``, if specified will be called with the result of every JSON object decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. - If ``object_hook`` is also defined, the ``object_pairs_hook`` takes + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. ``parse_float``, if specified, will be called with the string @@ -315,34 +342,34 @@ class JSONDecoder(object): characters will be allowed inside strings. Control characters in this context are those with character codes in the 0-31 range, including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. + """ + self.encoding = encoding self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or int self.parse_constant = parse_constant or _CONSTANTS.__getitem__ self.strict = strict - self.object_pairs_hook = object_pairs_hook self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring - self.memo = {} self.scan_once = scanner.make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): - """Return the Python representation of ``s`` (a ``str`` instance - containing a JSON document). + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) """ obj, end = self.raw_decode(s, idx=_w(s, 0).end()) end = _w(s, end).end() if end != len(s): - raise JSONDecodeError("Extra data", s, end) + raise ValueError(errmsg("Extra data", s, end, len(s))) return obj def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` beginning with - a JSON document) and return a 2-tuple of the Python + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. This can be used to decode a JSON document from a string that may @@ -351,6 +378,6 @@ class JSONDecoder(object): """ try: obj, end = self.scan_once(s, idx) - except StopIteration as err: - raise JSONDecodeError("Expecting value", s, err.value) from None + except StopIteration: + raise ValueError("No JSON object could be decoded") return obj, end diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index c8c78b9..97ffe8e 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -7,17 +7,13 @@ try: except ImportError: c_encode_basestring_ascii = None try: - from _json import encode_basestring as c_encode_basestring -except ImportError: - c_encode_basestring = None -try: from _json import make_encoder as c_make_encoder except ImportError: c_make_encoder = None ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') -HAS_UTF8 = re.compile(b'[\x80-\xff]') +HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { '\\': '\\\\', '"': '\\"', @@ -32,8 +28,9 @@ for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) INFINITY = float('inf') +FLOAT_REPR = float.__repr__ -def py_encode_basestring(s): +def encode_basestring(s): """Return a JSON representation of a Python string """ @@ -42,13 +39,12 @@ def py_encode_basestring(s): return '"' + ESCAPE.sub(replace, s) + '"' -encode_basestring = (c_encode_basestring or py_encode_basestring) - - def py_encode_basestring_ascii(s): """Return an ASCII-only JSON representation of a Python string """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -64,7 +60,8 @@ def py_encode_basestring_ascii(s): s1 = 0xd800 | ((n >> 10) & 0x3ff) s2 = 0xdc00 | (n & 0x3ff) return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) - return '"' + ESCAPE_ASCII.sub(replace, s) + '"' + #return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' encode_basestring_ascii = ( @@ -82,9 +79,9 @@ class JSONEncoder(object): +-------------------+---------------+ | list, tuple | array | +-------------------+---------------+ - | str | string | + | str, unicode | string | +-------------------+---------------+ - | int, float | number | + | int, long, float | number | +-------------------+---------------+ | True | true | +-------------------+---------------+ @@ -101,18 +98,21 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' - def __init__(self, *, skipkeys=False, ensure_ascii=True, + def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, default=None): + indent=None, separators=None, encoding='utf-8', default=None): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt - encoding of keys that are not str, int, float or None. If + encoding of keys that are not str, int, long, float or None. If skipkeys is True, such items are simply skipped. - If ensure_ascii is true, the output is guaranteed to be str - objects with all incoming non-ASCII characters escaped. If - ensure_ascii is false, the output can contain non-ASCII characters. + If *ensure_ascii* is true (the default), all non-ASCII + characters in the output are escaped with \uXXXX sequences, + and the results are str instances consisting of ASCII + characters only. If ensure_ascii is False, a result may be a + unicode instance. This usually happens if the input contains + unicode strings or the *encoding* parameter is used. If check_circular is true, then lists, dicts, and custom encoded objects will be checked for circular references during encoding to @@ -131,17 +131,23 @@ class JSONEncoder(object): If indent is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. + None is the most compact representation. Since the default + item separator is ', ', the output might include trailing + whitespace when indent is specified. You can use + separators=(',', ': ') to avoid this. - If specified, separators should be an (item_separator, key_separator) - tuple. The default is (', ', ': ') if *indent* is ``None`` and - (',', ': ') otherwise. To get the most compact JSON representation, - you should specify (',', ':') to eliminate whitespace. + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable version of the object or raise a ``TypeError``. + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + """ self.skipkeys = skipkeys @@ -152,10 +158,9 @@ class JSONEncoder(object): self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators - elif indent is not None: - self.item_separator = ',' if default is not None: self.default = default + self.encoding = encoding def default(self, o): """Implement this method in a subclass such that it returns @@ -176,19 +181,22 @@ class JSONEncoder(object): return JSONEncoder.default(self, o) """ - raise TypeError(f'Object of type {o.__class__.__name__} ' - f'is not JSON serializable') + raise TypeError(repr(o) + " is not JSON serializable") def encode(self, o): """Return a JSON string representation of a Python data structure. - >>> from json.encoder import JSONEncoder >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) '{"foo": ["bar", "baz"]}' """ # This is for extremely simple cases and benchmarks. - if isinstance(o, str): + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -219,9 +227,14 @@ class JSONEncoder(object): _encoder = encode_basestring_ascii else: _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) def floatstr(o, allow_nan=self.allow_nan, - _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): + _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): # Check for specials. Note that this type of test is processor # and/or platform-specific, so do tests which don't depend on the # internals. @@ -244,7 +257,7 @@ class JSONEncoder(object): if (_one_shot and c_make_encoder is not None - and self.indent is None): + and self.indent is None and not self.sort_keys): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, @@ -260,20 +273,18 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, ## HACK: hand-optimized bytecode; turn globals into locals ValueError=ValueError, + basestring=basestring, dict=dict, float=float, id=id, int=int, isinstance=isinstance, list=list, + long=long, str=str, tuple=tuple, - _intstr=int.__repr__, ): - if _indent is not None and not isinstance(_indent, str): - _indent = ' ' * _indent - def _iterencode_list(lst, _current_indent_level): if not lst: yield '[]' @@ -286,7 +297,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, buf = '[' if _indent is not None: _current_indent_level += 1 - newline_indent = '\n' + _indent * _current_indent_level + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) separator = _item_separator + newline_indent buf += newline_indent else: @@ -298,7 +309,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, first = False else: buf = separator - if isinstance(value, str): + if isinstance(value, basestring): yield buf + _encoder(value) elif value is None: yield buf + 'null' @@ -306,13 +317,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield buf + 'true' elif value is False: yield buf + 'false' - elif isinstance(value, int): - # Subclasses of int/float may override __repr__, but we still - # want to encode them as integers/floats in JSON. One example - # within the standard library is IntEnum. - yield buf + _intstr(value) + elif isinstance(value, (int, long)): + yield buf + str(value) elif isinstance(value, float): - # see comment above for int yield buf + _floatstr(value) else: yield buf @@ -322,10 +329,11 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) - yield from chunks + for chunk in chunks: + yield chunk if newline_indent is not None: _current_indent_level -= 1 - yield '\n' + _indent * _current_indent_level + yield '\n' + (' ' * (_indent * _current_indent_level)) yield ']' if markers is not None: del markers[markerid] @@ -342,7 +350,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield '{' if _indent is not None: _current_indent_level += 1 - newline_indent = '\n' + _indent * _current_indent_level + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) item_separator = _item_separator + newline_indent yield newline_indent else: @@ -350,16 +358,15 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, item_separator = _item_separator first = True if _sort_keys: - items = sorted(dct.items()) + items = sorted(dct.items(), key=lambda kv: kv[0]) else: - items = dct.items() + items = dct.iteritems() for key, value in items: - if isinstance(key, str): + if isinstance(key, basestring): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. elif isinstance(key, float): - # see comment for int/float in _make_iterencode key = _floatstr(key) elif key is True: key = 'true' @@ -367,21 +374,19 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, key = 'false' elif key is None: key = 'null' - elif isinstance(key, int): - # see comment for int/float in _make_iterencode - key = _intstr(key) + elif isinstance(key, (int, long)): + key = str(key) elif _skipkeys: continue else: - raise TypeError(f'keys must be str, int, float, bool or None, ' - f'not {key.__class__.__name__}') + raise TypeError("key " + repr(key) + " is not a string") if first: first = False else: yield item_separator yield _encoder(key) yield _key_separator - if isinstance(value, str): + if isinstance(value, basestring): yield _encoder(value) elif value is None: yield 'null' @@ -389,11 +394,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif value is False: yield 'false' - elif isinstance(value, int): - # see comment for int/float in _make_iterencode - yield _intstr(value) + elif isinstance(value, (int, long)): + yield str(value) elif isinstance(value, float): - # see comment for int/float in _make_iterencode yield _floatstr(value) else: if isinstance(value, (list, tuple)): @@ -402,16 +405,17 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) - yield from chunks + for chunk in chunks: + yield chunk if newline_indent is not None: _current_indent_level -= 1 - yield '\n' + _indent * _current_indent_level + yield '\n' + (' ' * (_indent * _current_indent_level)) yield '}' if markers is not None: del markers[markerid] def _iterencode(o, _current_indent_level): - if isinstance(o, str): + if isinstance(o, basestring): yield _encoder(o) elif o is None: yield 'null' @@ -419,16 +423,16 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif o is False: yield 'false' - elif isinstance(o, int): - # see comment for int/float in _make_iterencode - yield _intstr(o) + elif isinstance(o, (int, long)): + yield str(o) elif isinstance(o, float): - # see comment for int/float in _make_iterencode yield _floatstr(o) elif isinstance(o, (list, tuple)): - yield from _iterencode_list(o, _current_indent_level) + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk elif isinstance(o, dict): - yield from _iterencode_dict(o, _current_indent_level) + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk else: if markers is not None: markerid = id(o) @@ -436,7 +440,9 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, raise ValueError("Circular reference detected") markers[markerid] = o o = _default(o) - yield from _iterencode(o, _current_indent_level) + for chunk in _iterencode(o, _current_indent_level): + yield chunk if markers is not None: del markers[markerid] + return _iterencode diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py index 7a61cfc..74e6805 100644 --- a/Lib/json/scanner.py +++ b/Lib/json/scanner.py @@ -17,25 +17,25 @@ def py_make_scanner(context): parse_array = context.parse_array parse_string = context.parse_string match_number = NUMBER_RE.match + encoding = context.encoding strict = context.strict parse_float = context.parse_float parse_int = context.parse_int parse_constant = context.parse_constant object_hook = context.object_hook object_pairs_hook = context.object_pairs_hook - memo = context.memo def _scan_once(string, idx): try: nextchar = string[idx] except IndexError: - raise StopIteration(idx) from None + raise StopIteration if nextchar == '"': - return parse_string(string, idx + 1, strict) + return parse_string(string, idx + 1, encoding, strict) elif nextchar == '{': - return parse_object((string, idx + 1), strict, - _scan_once, object_hook, object_pairs_hook, memo) + return parse_object((string, idx + 1), encoding, strict, + _scan_once, object_hook, object_pairs_hook) elif nextchar == '[': return parse_array((string, idx + 1), _scan_once) elif nextchar == 'n' and string[idx:idx + 4] == 'null': @@ -60,14 +60,8 @@ def py_make_scanner(context): elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: - raise StopIteration(idx) + raise StopIteration - def scan_once(string, idx): - try: - return _scan_once(string, idx) - finally: - memo.clear() - - return scan_once + return _scan_once make_scanner = c_make_scanner or py_make_scanner diff --git a/Lib/json/tests/__init__.py b/Lib/json/tests/__init__.py new file mode 100644 index 0000000..90cb2b7 --- /dev/null +++ b/Lib/json/tests/__init__.py @@ -0,0 +1,73 @@ +import os +import sys +import json +import doctest +import unittest + +from test import test_support + +# import json with and without accelerations +cjson = test_support.import_fresh_module('json', fresh=['_json']) +pyjson = test_support.import_fresh_module('json', blocked=['_json']) + +# create two base classes that will be used by the other tests +class PyTest(unittest.TestCase): + json = pyjson + loads = staticmethod(pyjson.loads) + dumps = staticmethod(pyjson.dumps) + +@unittest.skipUnless(cjson, 'requires _json') +class CTest(unittest.TestCase): + if cjson is not None: + json = cjson + loads = staticmethod(cjson.loads) + dumps = staticmethod(cjson.dumps) + +# test PyTest and CTest checking if the functions come from the right module +class TestPyTest(PyTest): + def test_pyjson(self): + self.assertEqual(self.json.scanner.make_scanner.__module__, + 'json.scanner') + self.assertEqual(self.json.decoder.scanstring.__module__, + 'json.decoder') + self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__, + 'json.encoder') + +class TestCTest(CTest): + def test_cjson(self): + self.assertEqual(self.json.scanner.make_scanner.__module__, '_json') + self.assertEqual(self.json.decoder.scanstring.__module__, '_json') + self.assertEqual(self.json.encoder.c_make_encoder.__module__, '_json') + self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__, + '_json') + + +here = os.path.dirname(__file__) + +def test_suite(): + suite = additional_tests() + loader = unittest.TestLoader() + for fn in os.listdir(here): + if fn.startswith("test") and fn.endswith(".py"): + modname = "json.tests." + fn[:-3] + __import__(modname) + module = sys.modules[modname] + suite.addTests(loader.loadTestsFromModule(module)) + return suite + +def additional_tests(): + suite = unittest.TestSuite() + for mod in (json, json.encoder, json.decoder): + suite.addTest(doctest.DocTestSuite(mod)) + suite.addTest(TestPyTest('test_pyjson')) + suite.addTest(TestCTest('test_cjson')) + return suite + +def main(): + suite = test_suite() + runner = unittest.TextTestRunner() + runner.run(suite) + +if __name__ == '__main__': + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + main() diff --git a/Lib/json/tests/test_check_circular.py b/Lib/json/tests/test_check_circular.py new file mode 100644 index 0000000..3ad3d24 --- /dev/null +++ b/Lib/json/tests/test_check_circular.py @@ -0,0 +1,34 @@ +from json.tests import PyTest, CTest + + +def default_iterable(obj): + return list(obj) + +class TestCheckCircular(object): + def test_circular_dict(self): + dct = {} + dct['a'] = dct + self.assertRaises(ValueError, self.dumps, dct) + + def test_circular_list(self): + lst = [] + lst.append(lst) + self.assertRaises(ValueError, self.dumps, lst) + + def test_circular_composite(self): + dct2 = {} + dct2['a'] = [] + dct2['a'].append(dct2) + self.assertRaises(ValueError, self.dumps, dct2) + + def test_circular_default(self): + self.dumps([set()], default=default_iterable) + self.assertRaises(TypeError, self.dumps, [set()]) + + def test_circular_off_default(self): + self.dumps([set()], default=default_iterable, check_circular=False) + self.assertRaises(TypeError, self.dumps, [set()], check_circular=False) + + +class TestPyCheckCircular(TestCheckCircular, PyTest): pass +class TestCCheckCircular(TestCheckCircular, CTest): pass diff --git a/Lib/json/tests/test_decode.py b/Lib/json/tests/test_decode.py new file mode 100644 index 0000000..0014546 --- /dev/null +++ b/Lib/json/tests/test_decode.py @@ -0,0 +1,69 @@ +import decimal +from StringIO import StringIO +from collections import OrderedDict +from json.tests import PyTest, CTest + + +class TestDecode(object): + def test_decimal(self): + rval = self.loads('1.1', parse_float=decimal.Decimal) + self.assertTrue(isinstance(rval, decimal.Decimal)) + self.assertEqual(rval, decimal.Decimal('1.1')) + + def test_float(self): + rval = self.loads('1', parse_int=float) + self.assertTrue(isinstance(rval, float)) + self.assertEqual(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = self.loads('{ "key" : "value" , "k":"v" }') + self.assertEqual(rval, {"key":"value", "k":"v"}) + + def test_empty_objects(self): + self.assertEqual(self.loads('{}'), {}) + self.assertEqual(self.loads('[]'), []) + self.assertEqual(self.loads('""'), u"") + self.assertIsInstance(self.loads('""'), unicode) + + def test_object_pairs_hook(self): + s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' + p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4), + ("qrt", 5), ("pad", 6), ("hoy", 7)] + self.assertEqual(self.loads(s), eval(s)) + self.assertEqual(self.loads(s, object_pairs_hook=lambda x: x), p) + self.assertEqual(self.json.load(StringIO(s), + object_pairs_hook=lambda x: x), p) + od = self.loads(s, object_pairs_hook=OrderedDict) + self.assertEqual(od, OrderedDict(p)) + self.assertEqual(type(od), OrderedDict) + # the object_pairs_hook takes priority over the object_hook + self.assertEqual(self.loads(s, object_pairs_hook=OrderedDict, + object_hook=lambda x: None), + OrderedDict(p)) + # check that empty object literals work (see #17368) + self.assertEqual(self.loads('{}', object_pairs_hook=OrderedDict), + OrderedDict()) + self.assertEqual(self.loads('{"empty": {}}', + object_pairs_hook=OrderedDict), + OrderedDict([('empty', OrderedDict())])) + + def test_extra_data(self): + s = '[1, 2, 3]5' + msg = 'Extra data' + self.assertRaisesRegexp(ValueError, msg, self.loads, s) + + def test_invalid_escape(self): + s = '["abc\\y"]' + msg = 'escape' + self.assertRaisesRegexp(ValueError, msg, self.loads, s) + + def test_negative_index(self): + d = self.json.JSONDecoder() + self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) + self.assertRaises(ValueError, d.raw_decode, u'a'*42, -50000) + +class TestPyDecode(TestDecode, PyTest): pass +class TestCDecode(TestDecode, CTest): pass diff --git a/Lib/json/tests/test_default.py b/Lib/json/tests/test_default.py new file mode 100644 index 0000000..c2a07f6 --- /dev/null +++ b/Lib/json/tests/test_default.py @@ -0,0 +1,12 @@ +from json.tests import PyTest, CTest + + +class TestDefault(object): + def test_default(self): + self.assertEqual( + self.dumps(type, default=repr), + self.dumps(repr(type))) + + +class TestPyDefault(TestDefault, PyTest): pass +class TestCDefault(TestDefault, CTest): pass diff --git a/Lib/json/tests/test_dump.py b/Lib/json/tests/test_dump.py new file mode 100644 index 0000000..cd92569 --- /dev/null +++ b/Lib/json/tests/test_dump.py @@ -0,0 +1,32 @@ +from cStringIO import StringIO +from json.tests import PyTest, CTest + + +class TestDump(object): + def test_dump(self): + sio = StringIO() + self.json.dump({}, sio) + self.assertEqual(sio.getvalue(), '{}') + + def test_dumps(self): + self.assertEqual(self.dumps({}), '{}') + + def test_encode_truefalse(self): + self.assertEqual(self.dumps( + {True: False, False: True}, sort_keys=True), + '{"false": true, "true": false}') + self.assertEqual(self.dumps( + {2: 3.0, 4.0: 5L, False: 1, 6L: True}, sort_keys=True), + '{"false": 1, "2": 3.0, "4.0": 5, "6": true}') + + # Issue 16228: Crash on encoding resized list + def test_encode_mutated(self): + a = [object()] * 10 + def crasher(obj): + del a[-1] + self.assertEqual(self.dumps(a, default=crasher), + '[null, null, null, null, null]') + + +class TestPyDump(TestDump, PyTest): pass +class TestCDump(TestDump, CTest): pass diff --git a/Lib/json/tests/test_encode_basestring_ascii.py b/Lib/json/tests/test_encode_basestring_ascii.py new file mode 100644 index 0000000..9f9d5a5 --- /dev/null +++ b/Lib/json/tests/test_encode_basestring_ascii.py @@ -0,0 +1,41 @@ +from collections import OrderedDict +from json.tests import PyTest, CTest + + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBasestringAscii(object): + def test_encode_basestring_ascii(self): + fname = self.json.encoder.encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = self.json.encoder.encode_basestring_ascii(input_string) + self.assertEqual(result, expect, + '{0!r} != {1!r} for {2}({3!r})'.format( + result, expect, fname, input_string)) + + def test_ordered_dict(self): + # See issue 6105 + items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] + s = self.dumps(OrderedDict(items)) + self.assertEqual(s, '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') + + +class TestPyEncodeBasestringAscii(TestEncodeBasestringAscii, PyTest): pass +class TestCEncodeBasestringAscii(TestEncodeBasestringAscii, CTest): pass diff --git a/Lib/json/tests/test_fail.py b/Lib/json/tests/test_fail.py new file mode 100644 index 0000000..e31b379 --- /dev/null +++ b/Lib/json/tests/test_fail.py @@ -0,0 +1,105 @@ +from json.tests import PyTest, CTest + +# 2007-10-05 +JSONDOCS = [ + # http://json.org/JSON_checker/test/fail1.json + '"A JSON payload should be an object or array, not a string."', + # http://json.org/JSON_checker/test/fail2.json + '["Unclosed array"', + # http://json.org/JSON_checker/test/fail3.json + '{unquoted_key: "keys must be quoted"}', + # http://json.org/JSON_checker/test/fail4.json + '["extra comma",]', + # http://json.org/JSON_checker/test/fail5.json + '["double extra comma",,]', + # http://json.org/JSON_checker/test/fail6.json + '[ , "<-- missing value"]', + # http://json.org/JSON_checker/test/fail7.json + '["Comma after the close"],', + # http://json.org/JSON_checker/test/fail8.json + '["Extra close"]]', + # http://json.org/JSON_checker/test/fail9.json + '{"Extra comma": true,}', + # http://json.org/JSON_checker/test/fail10.json + '{"Extra value after close": true} "misplaced quoted value"', + # http://json.org/JSON_checker/test/fail11.json + '{"Illegal expression": 1 + 2}', + # http://json.org/JSON_checker/test/fail12.json + '{"Illegal invocation": alert()}', + # http://json.org/JSON_checker/test/fail13.json + '{"Numbers cannot have leading zeroes": 013}', + # http://json.org/JSON_checker/test/fail14.json + '{"Numbers cannot be hex": 0x14}', + # http://json.org/JSON_checker/test/fail15.json + '["Illegal backslash escape: \\x15"]', + # http://json.org/JSON_checker/test/fail16.json + '[\\naked]', + # http://json.org/JSON_checker/test/fail17.json + '["Illegal backslash escape: \\017"]', + # http://json.org/JSON_checker/test/fail18.json + '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', + # http://json.org/JSON_checker/test/fail19.json + '{"Missing colon" null}', + # http://json.org/JSON_checker/test/fail20.json + '{"Double colon":: null}', + # http://json.org/JSON_checker/test/fail21.json + '{"Comma instead of colon", null}', + # http://json.org/JSON_checker/test/fail22.json + '["Colon instead of comma": false]', + # http://json.org/JSON_checker/test/fail23.json + '["Bad value", truth]', + # http://json.org/JSON_checker/test/fail24.json + "['single quote']", + # http://json.org/JSON_checker/test/fail25.json + '["\ttab\tcharacter\tin\tstring\t"]', + # http://json.org/JSON_checker/test/fail26.json + '["tab\\ character\\ in\\ string\\ "]', + # http://json.org/JSON_checker/test/fail27.json + '["line\nbreak"]', + # http://json.org/JSON_checker/test/fail28.json + '["line\\\nbreak"]', + # http://json.org/JSON_checker/test/fail29.json + '[0e]', + # http://json.org/JSON_checker/test/fail30.json + '[0e+]', + # http://json.org/JSON_checker/test/fail31.json + '[0e+-1]', + # http://json.org/JSON_checker/test/fail32.json + '{"Comma instead if closing brace": true,', + # http://json.org/JSON_checker/test/fail33.json + '["mismatch"}', + # http://code.google.com/p/simplejson/issues/detail?id=3 + u'["A\u001FZ control characters in string"]', +] + +SKIPS = { + 1: "why not have a string payload?", + 18: "spec doesn't specify any nesting limitations", +} + +class TestFail(object): + def test_failures(self): + for idx, doc in enumerate(JSONDOCS): + idx = idx + 1 + if idx in SKIPS: + self.loads(doc) + continue + try: + self.loads(doc) + except ValueError: + pass + else: + self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc)) + + def test_non_string_keys_dict(self): + data = {'a' : 1, (1, 2) : 2} + + #This is for c encoder + self.assertRaises(TypeError, self.dumps, data) + + #This is for python encoder + self.assertRaises(TypeError, self.dumps, data, indent=True) + + +class TestPyFail(TestFail, PyTest): pass +class TestCFail(TestFail, CTest): pass diff --git a/Lib/json/tests/test_float.py b/Lib/json/tests/test_float.py new file mode 100644 index 0000000..c10381d --- /dev/null +++ b/Lib/json/tests/test_float.py @@ -0,0 +1,48 @@ +import math +from json.tests import PyTest, CTest + + +class TestFloat(object): + def test_floats(self): + for num in [1617161771.7650001, math.pi, math.pi**100, + math.pi**-100, 3.1]: + self.assertEqual(float(self.dumps(num)), num) + self.assertEqual(self.loads(self.dumps(num)), num) + self.assertEqual(self.loads(unicode(self.dumps(num))), num) + + def test_ints(self): + for num in [1, 1L, 1<<32, 1<<64]: + self.assertEqual(self.dumps(num), str(num)) + self.assertEqual(int(self.dumps(num)), num) + self.assertEqual(self.loads(self.dumps(num)), num) + self.assertEqual(self.loads(unicode(self.dumps(num))), num) + + def test_out_of_range(self): + self.assertEqual(self.loads('[23456789012E666]'), [float('inf')]) + self.assertEqual(self.loads('[-23456789012E666]'), [float('-inf')]) + + def test_allow_nan(self): + for val in (float('inf'), float('-inf'), float('nan')): + out = self.dumps([val]) + if val == val: # inf + self.assertEqual(self.loads(out), [val]) + else: # nan + res = self.loads(out) + self.assertEqual(len(res), 1) + self.assertNotEqual(res[0], res[0]) + self.assertRaises(ValueError, self.dumps, [val], allow_nan=False) + + def test_float_subclasses_use_float_repr(self): + # Issue 27934. + class PeculiarFloat(float): + def __repr__(self): + return "I'm not valid JSON" + def __str__(self): + return "Neither am I" + + val = PeculiarFloat(3.2) + self.assertEqual(self.loads(self.dumps(val)), val) + + +class TestPyFloat(TestFloat, PyTest): pass +class TestCFloat(TestFloat, CTest): pass diff --git a/Lib/json/tests/test_indent.py b/Lib/json/tests/test_indent.py new file mode 100644 index 0000000..9b18761 --- /dev/null +++ b/Lib/json/tests/test_indent.py @@ -0,0 +1,60 @@ +import textwrap +from StringIO import StringIO +from json.tests import PyTest, CTest + + +class TestIndent(object): + def test_indent(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ]""") + + + d1 = self.dumps(h) + d2 = self.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) + + h1 = self.loads(d1) + h2 = self.loads(d2) + + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(d2, expect) + + def test_indent0(self): + h = {3: 1} + def check(indent, expected): + d1 = self.dumps(h, indent=indent) + self.assertEqual(d1, expected) + + sio = StringIO() + self.json.dump(h, sio, indent=indent) + self.assertEqual(sio.getvalue(), expected) + + # indent=0 should emit newlines + check(0, '{\n"3": 1\n}') + # indent=None is more compact + check(None, '{"3": 1}') + + +class TestPyIndent(TestIndent, PyTest): pass +class TestCIndent(TestIndent, CTest): pass diff --git a/Lib/json/tests/test_pass1.py b/Lib/json/tests/test_pass1.py new file mode 100644 index 0000000..df8259b --- /dev/null +++ b/Lib/json/tests/test_pass1.py @@ -0,0 +1,75 @@ +from json.tests import PyTest, CTest + + +# from http://json.org/JSON_checker/test/pass1.json +JSON = r''' +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E66, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "0123456789": "digit", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* <!-- --", + "# -- --> */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066, +1e1, +0.1e1, +1e-1, +1e00,2e+00,2e-00 +,"rosebud"] +''' + +class TestPass1(object): + def test_parse(self): + # test in/out equivalence and parsing + res = self.loads(JSON) + out = self.dumps(res) + self.assertEqual(res, self.loads(out)) + + +class TestPyPass1(TestPass1, PyTest): pass +class TestCPass1(TestPass1, CTest): pass diff --git a/Lib/json/tests/test_pass2.py b/Lib/json/tests/test_pass2.py new file mode 100644 index 0000000..a2bb6d7 --- /dev/null +++ b/Lib/json/tests/test_pass2.py @@ -0,0 +1,18 @@ +from json.tests import PyTest, CTest + + +# from http://json.org/JSON_checker/test/pass2.json +JSON = r''' +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] +''' + +class TestPass2(object): + def test_parse(self): + # test in/out equivalence and parsing + res = self.loads(JSON) + out = self.dumps(res) + self.assertEqual(res, self.loads(out)) + + +class TestPyPass2(TestPass2, PyTest): pass +class TestCPass2(TestPass2, CTest): pass diff --git a/Lib/json/tests/test_pass3.py b/Lib/json/tests/test_pass3.py new file mode 100644 index 0000000..221f9a0 --- /dev/null +++ b/Lib/json/tests/test_pass3.py @@ -0,0 +1,24 @@ +from json.tests import PyTest, CTest + + +# from http://json.org/JSON_checker/test/pass3.json +JSON = r''' +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} +''' + + +class TestPass3(object): + def test_parse(self): + # test in/out equivalence and parsing + res = self.loads(JSON) + out = self.dumps(res) + self.assertEqual(res, self.loads(out)) + + +class TestPyPass3(TestPass3, PyTest): pass +class TestCPass3(TestPass3, CTest): pass diff --git a/Lib/json/tests/test_recursion.py b/Lib/json/tests/test_recursion.py new file mode 100644 index 0000000..b5221e5 --- /dev/null +++ b/Lib/json/tests/test_recursion.py @@ -0,0 +1,108 @@ +from json.tests import PyTest, CTest + + +class JSONTestObject: + pass + + +class TestRecursion(object): + def test_listrecursion(self): + x = [] + x.append(x) + try: + self.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on list recursion") + x = [] + y = [x] + x.append(y) + try: + self.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on alternating list recursion") + y = [] + x = [y, y] + # ensure that the marker is cleared + self.dumps(x) + + def test_dictrecursion(self): + x = {} + x["test"] = x + try: + self.dumps(x) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on dict recursion") + x = {} + y = {"a": x, "b": x} + # ensure that the marker is cleared + self.dumps(x) + + def test_defaultrecursion(self): + class RecursiveJSONEncoder(self.json.JSONEncoder): + recurse = False + def default(self, o): + if o is JSONTestObject: + if self.recurse: + return [JSONTestObject] + else: + return 'JSONTestObject' + return pyjson.JSONEncoder.default(o) + + enc = RecursiveJSONEncoder() + self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"') + enc.recurse = True + try: + enc.encode(JSONTestObject) + except ValueError: + pass + else: + self.fail("didn't raise ValueError on default recursion") + + + def test_highly_nested_objects_decoding(self): + # test that loading highly-nested objects doesn't segfault when C + # accelerations are used. See #12017 + # str + with self.assertRaises(RuntimeError): + self.loads('{"a":' * 100000 + '1' + '}' * 100000) + with self.assertRaises(RuntimeError): + self.loads('{"a":' * 100000 + '[1]' + '}' * 100000) + with self.assertRaises(RuntimeError): + self.loads('[' * 100000 + '1' + ']' * 100000) + # unicode + with self.assertRaises(RuntimeError): + self.loads(u'{"a":' * 100000 + u'1' + u'}' * 100000) + with self.assertRaises(RuntimeError): + self.loads(u'{"a":' * 100000 + u'[1]' + u'}' * 100000) + with self.assertRaises(RuntimeError): + self.loads(u'[' * 100000 + u'1' + u']' * 100000) + + def test_highly_nested_objects_encoding(self): + # See #12051 + l, d = [], {} + for x in xrange(100000): + l, d = [l], {'k':d} + with self.assertRaises(RuntimeError): + self.dumps(l) + with self.assertRaises(RuntimeError): + self.dumps(d) + + def test_endless_recursion(self): + # See #12051 + class EndlessJSONEncoder(self.json.JSONEncoder): + def default(self, o): + """If check_circular is False, this will keep adding another list.""" + return [o] + + with self.assertRaises(RuntimeError): + EndlessJSONEncoder(check_circular=False).encode(5j) + + +class TestPyRecursion(TestRecursion, PyTest): pass +class TestCRecursion(TestRecursion, CTest): pass diff --git a/Lib/json/tests/test_scanstring.py b/Lib/json/tests/test_scanstring.py new file mode 100644 index 0000000..ed80a41 --- /dev/null +++ b/Lib/json/tests/test_scanstring.py @@ -0,0 +1,157 @@ +import sys +from json.tests import PyTest, CTest + + +class TestScanstring(object): + def test_scanstring(self): + scanstring = self.json.decoder.scanstring + if sys.maxunicode == 65535: + self.assertEqual( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEqual( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEqual( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEqual( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEqual( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEqual( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEqual( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEqual( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEqual( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEqual( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEqual( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEqual( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEqual( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEqual( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEqual( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEqual( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEqual( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEqual( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEqual( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEqual( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEqual( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12)) + + def test_surrogates(self): + scanstring = self.json.decoder.scanstring + def assertScan(given, expect): + self.assertEqual(scanstring(given, 1, None, True), + (expect, len(given))) + if not isinstance(given, unicode): + given = unicode(given) + self.assertEqual(scanstring(given, 1, None, True), + (expect, len(given))) + + surrogates = unichr(0xd834) + unichr(0xdd20) + assertScan('"z\\ud834\\u0079x"', u'z\ud834yx') + assertScan('"z\\ud834\\udd20x"', u'z\U0001d120x') + assertScan('"z\\ud834\\ud834\\udd20x"', u'z\ud834\U0001d120x') + assertScan('"z\\ud834x"', u'z\ud834x') + assertScan(u'"z\\ud834\udd20x12345"', u'z%sx12345' % surrogates) + assertScan('"z\\udd20x"', u'z\udd20x') + assertScan(u'"z\ud834\udd20x"', u'z\ud834\udd20x') + assertScan(u'"z\ud834\\udd20x"', u'z%sx' % surrogates) + assertScan(u'"z\ud834x"', u'z\ud834x') + + def test_bad_escapes(self): + scanstring = self.json.decoder.scanstring + bad_escapes = [ + '"\\"', + '"\\x"', + '"\\u"', + '"\\u0"', + '"\\u01"', + '"\\u012"', + '"\\uz012"', + '"\\u0z12"', + '"\\u01z2"', + '"\\u012z"', + '"\\u0x12"', + '"\\u0X12"', + '"\\ud834\\"', + '"\\ud834\\u"', + '"\\ud834\\ud"', + '"\\ud834\\udd"', + '"\\ud834\\udd2"', + '"\\ud834\\uzdd2"', + '"\\ud834\\udzd2"', + '"\\ud834\\uddz2"', + '"\\ud834\\udd2z"', + '"\\ud834\\u0x20"', + '"\\ud834\\u0X20"', + ] + for s in bad_escapes: + with self.assertRaises(ValueError): + scanstring(s, 1, None, True) + + def test_issue3623(self): + self.assertRaises(ValueError, self.json.decoder.scanstring, b"xxx", 1, + "xxx") + self.assertRaises(UnicodeDecodeError, + self.json.encoder.encode_basestring_ascii, b"xx\xff") + + def test_overflow(self): + with self.assertRaises(OverflowError): + self.json.decoder.scanstring(b"xxx", sys.maxsize+1) + + +class TestPyScanstring(TestScanstring, PyTest): pass +class TestCScanstring(TestScanstring, CTest): pass diff --git a/Lib/json/tests/test_separators.py b/Lib/json/tests/test_separators.py new file mode 100644 index 0000000..a4246e1 --- /dev/null +++ b/Lib/json/tests/test_separators.py @@ -0,0 +1,44 @@ +import textwrap +from json.tests import PyTest, CTest + + +class TestSeparators(object): + def test_separators(self): + h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', + {'nifty': 87}, {'field': 'yes', 'morefield': False} ] + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ] , + [ + "whoops" + ] , + [] , + "d-shtaeou" , + "d-nthiouh" , + "i-vhbjkhnth" , + { + "nifty" : 87 + } , + { + "field" : "yes" , + "morefield" : false + } + ]""") + + + d1 = self.dumps(h) + d2 = self.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) + + h1 = self.loads(d1) + h2 = self.loads(d2) + + self.assertEqual(h1, h) + self.assertEqual(h2, h) + self.assertEqual(d2, expect) + + +class TestPySeparators(TestSeparators, PyTest): pass +class TestCSeparators(TestSeparators, CTest): pass diff --git a/Lib/json/tests/test_speedups.py b/Lib/json/tests/test_speedups.py new file mode 100644 index 0000000..a6b7c38 --- /dev/null +++ b/Lib/json/tests/test_speedups.py @@ -0,0 +1,51 @@ +from json.tests import CTest + + +class BadBool: + def __nonzero__(self): + 1/0.0 + + +class TestSpeedups(CTest): + def test_scanstring(self): + self.assertEqual(self.json.decoder.scanstring.__module__, "_json") + self.assertIs(self.json.decoder.scanstring, self.json.decoder.c_scanstring) + + def test_encode_basestring_ascii(self): + self.assertEqual(self.json.encoder.encode_basestring_ascii.__module__, + "_json") + self.assertIs(self.json.encoder.encode_basestring_ascii, + self.json.encoder.c_encode_basestring_ascii) + +class TestDecode(CTest): + def test_make_scanner(self): + self.assertRaises(AttributeError, self.json.scanner.c_make_scanner, 1) + + def test_bad_bool_args(self): + def test(value): + self.json.decoder.JSONDecoder(strict=BadBool()).decode(value) + self.assertRaises(ZeroDivisionError, test, '""') + self.assertRaises(ZeroDivisionError, test, '{}') + self.assertRaises(ZeroDivisionError, test, u'""') + self.assertRaises(ZeroDivisionError, test, u'{}') + + +class TestEncode(CTest): + def test_make_encoder(self): + self.assertRaises(TypeError, self.json.encoder.c_make_encoder, + None, + "\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75", + None) + + def test_bad_bool_args(self): + def test(name): + self.json.encoder.JSONEncoder(**{name: BadBool()}).encode({'a': 1}) + self.assertRaises(ZeroDivisionError, test, 'skipkeys') + self.assertRaises(ZeroDivisionError, test, 'ensure_ascii') + self.assertRaises(ZeroDivisionError, test, 'check_circular') + self.assertRaises(ZeroDivisionError, test, 'allow_nan') + self.assertRaises(ZeroDivisionError, test, 'sort_keys') + + def test_bad_encoding(self): + with self.assertRaises(UnicodeEncodeError): + self.json.encoder.JSONEncoder(encoding=u'\udcff').encode({'key': 123}) diff --git a/Lib/json/tests/test_tool.py b/Lib/json/tests/test_tool.py new file mode 100644 index 0000000..27dfb84 --- /dev/null +++ b/Lib/json/tests/test_tool.py @@ -0,0 +1,69 @@ +import os +import sys +import textwrap +import unittest +import subprocess +from test import test_support +from test.script_helper import assert_python_ok + +class TestTool(unittest.TestCase): + data = """ + + [["blorpie"],[ "whoops" ] , [ + ],\t"d-shtaeou",\r"d-nthiouh", + "i-vhbjkhnth", {"nifty":87}, {"morefield" :\tfalse,"field" + :"yes"} ] + """ + + expect = textwrap.dedent("""\ + [ + [ + "blorpie" + ], + [ + "whoops" + ], + [], + "d-shtaeou", + "d-nthiouh", + "i-vhbjkhnth", + { + "nifty": 87 + }, + { + "field": "yes", + "morefield": false + } + ] + """) + + def test_stdin_stdout(self): + proc = subprocess.Popen( + (sys.executable, '-m', 'json.tool'), + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, err = proc.communicate(self.data.encode()) + self.assertEqual(out.splitlines(), self.expect.encode().splitlines()) + self.assertEqual(err, None) + + def _create_infile(self): + infile = test_support.TESTFN + with open(infile, "w") as fp: + self.addCleanup(os.remove, infile) + fp.write(self.data) + return infile + + def test_infile_stdout(self): + infile = self._create_infile() + rc, out, err = assert_python_ok('-m', 'json.tool', infile) + self.assertEqual(out.splitlines(), self.expect.encode().splitlines()) + self.assertEqual(err, b'') + + def test_infile_outfile(self): + infile = self._create_infile() + outfile = test_support.TESTFN + '.out' + rc, out, err = assert_python_ok('-m', 'json.tool', infile, outfile) + self.addCleanup(os.remove, outfile) + with open(outfile, "r") as fp: + self.assertEqual(fp.read(), self.expect) + self.assertEqual(out, b'') + self.assertEqual(err, b'') diff --git a/Lib/json/tests/test_unicode.py b/Lib/json/tests/test_unicode.py new file mode 100644 index 0000000..e90f158 --- /dev/null +++ b/Lib/json/tests/test_unicode.py @@ -0,0 +1,89 @@ +from collections import OrderedDict +from json.tests import PyTest, CTest + + +class TestUnicode(object): + def test_encoding1(self): + encoder = self.json.JSONEncoder(encoding='utf-8') + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = encoder.encode(u) + js = encoder.encode(s) + self.assertEqual(ju, js) + + def test_encoding2(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + s = u.encode('utf-8') + ju = self.dumps(u, encoding='utf-8') + js = self.dumps(s, encoding='utf-8') + self.assertEqual(ju, js) + + def test_encoding3(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = self.dumps(u) + self.assertEqual(j, '"\\u03b1\\u03a9"') + + def test_encoding4(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = self.dumps([u]) + self.assertEqual(j, '["\\u03b1\\u03a9"]') + + def test_encoding5(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = self.dumps(u, ensure_ascii=False) + self.assertEqual(j, u'"{0}"'.format(u)) + + def test_encoding6(self): + u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' + j = self.dumps([u], ensure_ascii=False) + self.assertEqual(j, u'["{0}"]'.format(u)) + + def test_big_unicode_encode(self): + u = u'\U0001d120' + self.assertEqual(self.dumps(u), '"\\ud834\\udd20"') + self.assertEqual(self.dumps(u, ensure_ascii=False), u'"\U0001d120"') + + def test_big_unicode_decode(self): + u = u'z\U0001d120x' + self.assertEqual(self.loads('"' + u + '"'), u) + self.assertEqual(self.loads('"z\\ud834\\udd20x"'), u) + + def test_unicode_decode(self): + for i in range(0, 0xd7ff): + u = unichr(i) + s = '"\\u{0:04x}"'.format(i) + self.assertEqual(self.loads(s), u) + + def test_object_pairs_hook_with_unicode(self): + s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' + p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4), + (u"qrt", 5), (u"pad", 6), (u"hoy", 7)] + self.assertEqual(self.loads(s), eval(s)) + self.assertEqual(self.loads(s, object_pairs_hook = lambda x: x), p) + od = self.loads(s, object_pairs_hook = OrderedDict) + self.assertEqual(od, OrderedDict(p)) + self.assertEqual(type(od), OrderedDict) + # the object_pairs_hook takes priority over the object_hook + self.assertEqual(self.loads(s, + object_pairs_hook = OrderedDict, + object_hook = lambda x: None), + OrderedDict(p)) + + def test_default_encoding(self): + self.assertEqual(self.loads(u'{"a": "\xe9"}'.encode('utf-8')), + {'a': u'\xe9'}) + + def test_unicode_preservation(self): + self.assertEqual(type(self.loads(u'""')), unicode) + self.assertEqual(type(self.loads(u'"a"')), unicode) + self.assertEqual(type(self.loads(u'["a"]')[0]), unicode) + # Issue 10038. + self.assertEqual(type(self.loads('"foo"')), unicode) + + def test_bad_encoding(self): + self.assertRaises(UnicodeEncodeError, self.loads, '"a"', u"rat\xe9") + self.assertRaises(TypeError, self.loads, '"a"', 1) + + +class TestPyUnicode(TestUnicode, PyTest): pass +class TestCUnicode(TestUnicode, CTest): pass diff --git a/Lib/json/tool.py b/Lib/json/tool.py index 6d7d9a0..fc5d749 100644 --- a/Lib/json/tool.py +++ b/Lib/json/tool.py @@ -10,65 +10,30 @@ Usage:: Expecting property name enclosed in double quotes: line 1 column 3 (char 2) """ -import argparse -import json import sys - +import json def main(): - prog = 'python -m json.tool' - description = ('A simple command line interface for json module ' - 'to validate and pretty-print JSON objects.') - parser = argparse.ArgumentParser(prog=prog, description=description) - parser.add_argument('infile', nargs='?', - type=argparse.FileType(encoding="utf-8"), - help='a JSON file to be validated or pretty-printed', - default=sys.stdin) - parser.add_argument('outfile', nargs='?', - type=argparse.FileType('w', encoding="utf-8"), - help='write the output of infile to outfile', - default=sys.stdout) - parser.add_argument('--sort-keys', action='store_true', default=False, - help='sort the output of dictionaries alphabetically by key') - parser.add_argument('--no-ensure-ascii', dest='ensure_ascii', action='store_false', - help='disable escaping of non-ASCII characters') - parser.add_argument('--json-lines', action='store_true', default=False, - help='parse input using the JSON Lines format. ' - 'Use with --no-indent or --compact to produce valid JSON Lines output.') - group = parser.add_mutually_exclusive_group() - group.add_argument('--indent', default=4, type=int, - help='separate items with newlines and use this number ' - 'of spaces for indentation') - group.add_argument('--tab', action='store_const', dest='indent', - const='\t', help='separate items with newlines and use ' - 'tabs for indentation') - group.add_argument('--no-indent', action='store_const', dest='indent', - const=None, - help='separate items with spaces rather than newlines') - group.add_argument('--compact', action='store_true', - help='suppress all whitespace separation (most compact)') - options = parser.parse_args() - - dump_args = { - 'sort_keys': options.sort_keys, - 'indent': options.indent, - 'ensure_ascii': options.ensure_ascii, - } - if options.compact: - dump_args['indent'] = None - dump_args['separators'] = ',', ':' - - with options.infile as infile, options.outfile as outfile: + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + with infile: try: - if options.json_lines: - objs = (json.loads(line) for line in infile) - else: - objs = (json.load(infile), ) - for obj in objs: - json.dump(obj, outfile, **dump_args) - outfile.write('\n') - except ValueError as e: + obj = json.load(infile) + except ValueError, e: raise SystemExit(e) + with outfile: + json.dump(obj, outfile, sort_keys=True, + indent=4, separators=(',', ': ')) + outfile.write('\n') if __name__ == '__main__': |