diff options
Diffstat (limited to 'Lib/json/decoder.py')
-rw-r--r-- | Lib/json/decoder.py | 185 |
1 files changed, 106 insertions, 79 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index d7d8244..5141f87 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -1,6 +1,8 @@ """Implementation of JSONDecoder """ import re +import sys +import struct from json import scanner try: @@ -8,39 +10,40 @@ try: except ImportError: c_scanstring = None -__all__ = ['JSONDecoder', 'JSONDecodeError'] +__all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL -NaN = float('nan') -PosInf = float('inf') -NegInf = float('-inf') +def _floatconstants(): + nan, = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00') + inf, = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00') + return nan, inf, -inf +NaN, PosInf, NegInf = _floatconstants() -class JSONDecodeError(ValueError): - """Subclass of ValueError with the following additional properties: - msg: The unformatted error message - doc: The JSON document being parsed - pos: The start index of doc where parsing failed - lineno: The line corresponding to pos - colno: The column corresponding to pos +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + 1 + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno - """ - # Note that this exception is used from _json - def __init__(self, msg, doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - colno = pos - doc.rfind('\n', 0, pos) - errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) - ValueError.__init__(self, errmsg) - self.msg = msg - self.doc = doc - self.pos = pos - self.lineno = lineno - self.colno = colno - - def __reduce__(self): - return self.__class__, (self.msg, self.doc, self.pos) + +def errmsg(msg, doc, pos, end=None): + # Note that this function is called from _json + lineno, colno = linecol(doc, pos) + if end is None: + fmt = '{0}: line {1} column {2} (char {3})' + return fmt.format(msg, lineno, colno, pos) + #fmt = '%s: line %d column %d (char %d)' + #return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) _CONSTANTS = { @@ -49,13 +52,14 @@ _CONSTANTS = { 'NaN': NaN, } - STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': '"', '\\': '\\', '/': '/', - 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', } +DEFAULT_ENCODING = "utf-8" + def _decode_uXXXX(s, pos): esc = s[pos + 1:pos + 5] if len(esc) == 4 and esc[1] not in 'xX': @@ -64,9 +68,9 @@ def _decode_uXXXX(s, pos): except ValueError: pass msg = "Invalid \\uXXXX escape" - raise JSONDecodeError(msg, s, pos) + raise ValueError(errmsg(msg, s, pos)) -def py_scanstring(s, end, strict=True, +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. @@ -76,17 +80,22 @@ def py_scanstring(s, end, strict=True, Returns a tuple of the decoded string and the index of the character in s after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING chunks = [] _append = chunks.append begin = end - 1 while 1: chunk = _m(s, end) if chunk is None: - raise JSONDecodeError("Unterminated string starting at", s, begin) + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -96,34 +105,38 @@ def py_scanstring(s, end, strict=True, if strict: #msg = "Invalid control character %r at" % (terminator,) msg = "Invalid control character {0!r} at".format(terminator) - raise JSONDecodeError(msg, s, end) + raise ValueError(errmsg(msg, s, end)) else: _append(terminator) continue try: esc = s[end] except IndexError: - raise JSONDecodeError("Unterminated string starting at", - s, begin) from None + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: {0!r}".format(esc) - raise JSONDecodeError(msg, s, end) + msg = "Invalid \\escape: " + repr(esc) + raise ValueError(errmsg(msg, s, end)) end += 1 else: + # Unicode escape sequence uni = _decode_uXXXX(s, end) end += 5 - if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': + # Check for surrogate pair on UCS-4 systems + if sys.maxunicode > 65535 and \ + 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': uni2 = _decode_uXXXX(s, end + 1) if 0xdc00 <= uni2 <= 0xdfff: uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) end += 6 - char = chr(uni) + char = unichr(uni) + # Append the unescaped character _append(char) - return ''.join(chunks), end + return u''.join(chunks), end # Use speedup if available @@ -132,16 +145,11 @@ scanstring = c_scanstring or py_scanstring WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' - -def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, - memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, + object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end pairs = [] pairs_append = pairs.append - # Backwards compatibility - if memo is None: - memo = {} - memo_get = memo.setdefault # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -160,18 +168,18 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise JSONDecodeError( - "Expecting property name enclosed in double quotes", s, end) + raise ValueError(errmsg( + "Expecting property name enclosed in double quotes", s, end)) end += 1 while True: - key, end = scanstring(s, end, strict) - key = memo_get(key, key) + key, end = scanstring(s, end, encoding, strict) + # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise JSONDecodeError("Expecting ':' delimiter", s, end) + raise ValueError(errmsg("Expecting ':' delimiter", s, end)) end += 1 try: @@ -184,9 +192,10 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, try: value, end = scan_once(s, end) - except StopIteration as err: - raise JSONDecodeError("Expecting value", s, err.value) from None + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) pairs_append((key, value)) + try: nextchar = s[end] if nextchar in _ws: @@ -199,13 +208,23 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, if nextchar == '}': break elif nextchar != ',': - raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) - end = _w(s, end).end() - nextchar = s[end:end + 1] + raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 if nextchar != '"': - raise JSONDecodeError( - "Expecting property name enclosed in double quotes", s, end - 1) + raise ValueError(errmsg( + "Expecting property name enclosed in double quotes", s, end - 1)) if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end @@ -228,8 +247,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): while True: try: value, end = scan_once(s, end) - except StopIteration as err: - raise JSONDecodeError("Expecting value", s, err.value) from None + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -239,7 +258,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) + raise ValueError(errmsg("Expecting ',' delimiter", s, end)) try: if s[end] in _ws: end += 1 @@ -250,7 +269,6 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): return values, end - class JSONDecoder(object): """Simple JSON <http://json.org> decoder @@ -263,9 +281,9 @@ class JSONDecoder(object): +---------------+-------------------+ | array | list | +---------------+-------------------+ - | string | str | + | string | unicode | +---------------+-------------------+ - | number (int) | int | + | number (int) | int, long | +---------------+-------------------+ | number (real) | float | +---------------+-------------------+ @@ -281,10 +299,17 @@ class JSONDecoder(object): """ - def __init__(self, *, object_hook=None, parse_float=None, + def __init__(self, encoding=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None): - """``object_hook``, if specified, will be called with the result + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result of every JSON object decoded and its return value will be used in place of the given ``dict``. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). @@ -292,8 +317,10 @@ class JSONDecoder(object): ``object_pairs_hook``, if specified will be called with the result of every JSON object decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. - If ``object_hook`` is also defined, the ``object_pairs_hook`` takes + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. ``parse_float``, if specified, will be called with the string @@ -315,34 +342,34 @@ class JSONDecoder(object): characters will be allowed inside strings. Control characters in this context are those with character codes in the 0-31 range, including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. + """ + self.encoding = encoding self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or int self.parse_constant = parse_constant or _CONSTANTS.__getitem__ self.strict = strict - self.object_pairs_hook = object_pairs_hook self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring - self.memo = {} self.scan_once = scanner.make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): - """Return the Python representation of ``s`` (a ``str`` instance - containing a JSON document). + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) """ obj, end = self.raw_decode(s, idx=_w(s, 0).end()) end = _w(s, end).end() if end != len(s): - raise JSONDecodeError("Extra data", s, end) + raise ValueError(errmsg("Extra data", s, end, len(s))) return obj def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` beginning with - a JSON document) and return a 2-tuple of the Python + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. This can be used to decode a JSON document from a string that may @@ -351,6 +378,6 @@ class JSONDecoder(object): """ try: obj, end = self.scan_once(s, idx) - except StopIteration as err: - raise JSONDecodeError("Expecting value", s, err.value) from None + except StopIteration: + raise ValueError("No JSON object could be decoded") return obj, end |