summaryrefslogtreecommitdiffstats
path: root/Lib/json/decoder.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/json/decoder.py')
-rw-r--r--Lib/json/decoder.py185
1 files changed, 106 insertions, 79 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index d7d8244..5141f87 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -1,6 +1,8 @@
"""Implementation of JSONDecoder
"""
import re
+import sys
+import struct
from json import scanner
try:
@@ -8,39 +10,40 @@ try:
except ImportError:
c_scanstring = None
-__all__ = ['JSONDecoder', 'JSONDecodeError']
+__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
-NaN = float('nan')
-PosInf = float('inf')
-NegInf = float('-inf')
+def _floatconstants():
+ nan, = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00')
+ inf, = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00')
+ return nan, inf, -inf
+NaN, PosInf, NegInf = _floatconstants()
-class JSONDecodeError(ValueError):
- """Subclass of ValueError with the following additional properties:
- msg: The unformatted error message
- doc: The JSON document being parsed
- pos: The start index of doc where parsing failed
- lineno: The line corresponding to pos
- colno: The column corresponding to pos
+def linecol(doc, pos):
+ lineno = doc.count('\n', 0, pos) + 1
+ if lineno == 1:
+ colno = pos + 1
+ else:
+ colno = pos - doc.rindex('\n', 0, pos)
+ return lineno, colno
- """
- # Note that this exception is used from _json
- def __init__(self, msg, doc, pos):
- lineno = doc.count('\n', 0, pos) + 1
- colno = pos - doc.rfind('\n', 0, pos)
- errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
- ValueError.__init__(self, errmsg)
- self.msg = msg
- self.doc = doc
- self.pos = pos
- self.lineno = lineno
- self.colno = colno
-
- def __reduce__(self):
- return self.__class__, (self.msg, self.doc, self.pos)
+
+def errmsg(msg, doc, pos, end=None):
+ # Note that this function is called from _json
+ lineno, colno = linecol(doc, pos)
+ if end is None:
+ fmt = '{0}: line {1} column {2} (char {3})'
+ return fmt.format(msg, lineno, colno, pos)
+ #fmt = '%s: line %d column %d (char %d)'
+ #return fmt % (msg, lineno, colno, pos)
+ endlineno, endcolno = linecol(doc, end)
+ fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
+ return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
+ #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
+ #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
_CONSTANTS = {
@@ -49,13 +52,14 @@ _CONSTANTS = {
'NaN': NaN,
}
-
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
- '"': '"', '\\': '\\', '/': '/',
- 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
+ '"': u'"', '\\': u'\\', '/': u'/',
+ 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
}
+DEFAULT_ENCODING = "utf-8"
+
def _decode_uXXXX(s, pos):
esc = s[pos + 1:pos + 5]
if len(esc) == 4 and esc[1] not in 'xX':
@@ -64,9 +68,9 @@ def _decode_uXXXX(s, pos):
except ValueError:
pass
msg = "Invalid \\uXXXX escape"
- raise JSONDecodeError(msg, s, pos)
+ raise ValueError(errmsg(msg, s, pos))
-def py_scanstring(s, end, strict=True,
+def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
@@ -76,17 +80,22 @@ def py_scanstring(s, end, strict=True,
Returns a tuple of the decoded string and the index of the character in s
after the end quote."""
+ if encoding is None:
+ encoding = DEFAULT_ENCODING
chunks = []
_append = chunks.append
begin = end - 1
while 1:
chunk = _m(s, end)
if chunk is None:
- raise JSONDecodeError("Unterminated string starting at", s, begin)
+ raise ValueError(
+ errmsg("Unterminated string starting at", s, begin))
end = chunk.end()
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
if content:
+ if not isinstance(content, unicode):
+ content = unicode(content, encoding)
_append(content)
# Terminator is the end of string, a literal control character,
# or a backslash denoting that an escape sequence follows
@@ -96,34 +105,38 @@ def py_scanstring(s, end, strict=True,
if strict:
#msg = "Invalid control character %r at" % (terminator,)
msg = "Invalid control character {0!r} at".format(terminator)
- raise JSONDecodeError(msg, s, end)
+ raise ValueError(errmsg(msg, s, end))
else:
_append(terminator)
continue
try:
esc = s[end]
except IndexError:
- raise JSONDecodeError("Unterminated string starting at",
- s, begin) from None
+ raise ValueError(
+ errmsg("Unterminated string starting at", s, begin))
# If not a unicode escape sequence, must be in the lookup table
if esc != 'u':
try:
char = _b[esc]
except KeyError:
- msg = "Invalid \\escape: {0!r}".format(esc)
- raise JSONDecodeError(msg, s, end)
+ msg = "Invalid \\escape: " + repr(esc)
+ raise ValueError(errmsg(msg, s, end))
end += 1
else:
+ # Unicode escape sequence
uni = _decode_uXXXX(s, end)
end += 5
- if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
+ # Check for surrogate pair on UCS-4 systems
+ if sys.maxunicode > 65535 and \
+ 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
uni2 = _decode_uXXXX(s, end + 1)
if 0xdc00 <= uni2 <= 0xdfff:
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
end += 6
- char = chr(uni)
+ char = unichr(uni)
+ # Append the unescaped character
_append(char)
- return ''.join(chunks), end
+ return u''.join(chunks), end
# Use speedup if available
@@ -132,16 +145,11 @@ scanstring = c_scanstring or py_scanstring
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
-
-def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
- memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
+ object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
s, end = s_and_end
pairs = []
pairs_append = pairs.append
- # Backwards compatibility
- if memo is None:
- memo = {}
- memo_get = memo.setdefault
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
@@ -160,18 +168,18 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
pairs = object_hook(pairs)
return pairs, end + 1
elif nextchar != '"':
- raise JSONDecodeError(
- "Expecting property name enclosed in double quotes", s, end)
+ raise ValueError(errmsg(
+ "Expecting property name enclosed in double quotes", s, end))
end += 1
while True:
- key, end = scanstring(s, end, strict)
- key = memo_get(key, key)
+ key, end = scanstring(s, end, encoding, strict)
+
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':':
end = _w(s, end).end()
if s[end:end + 1] != ':':
- raise JSONDecodeError("Expecting ':' delimiter", s, end)
+ raise ValueError(errmsg("Expecting ':' delimiter", s, end))
end += 1
try:
@@ -184,9 +192,10 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
try:
value, end = scan_once(s, end)
- except StopIteration as err:
- raise JSONDecodeError("Expecting value", s, err.value) from None
+ except StopIteration:
+ raise ValueError(errmsg("Expecting object", s, end))
pairs_append((key, value))
+
try:
nextchar = s[end]
if nextchar in _ws:
@@ -199,13 +208,23 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
if nextchar == '}':
break
elif nextchar != ',':
- raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
- end = _w(s, end).end()
- nextchar = s[end:end + 1]
+ raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
+
+ try:
+ nextchar = s[end]
+ if nextchar in _ws:
+ end += 1
+ nextchar = s[end]
+ if nextchar in _ws:
+ end = _w(s, end + 1).end()
+ nextchar = s[end]
+ except IndexError:
+ nextchar = ''
+
end += 1
if nextchar != '"':
- raise JSONDecodeError(
- "Expecting property name enclosed in double quotes", s, end - 1)
+ raise ValueError(errmsg(
+ "Expecting property name enclosed in double quotes", s, end - 1))
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end
@@ -228,8 +247,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
while True:
try:
value, end = scan_once(s, end)
- except StopIteration as err:
- raise JSONDecodeError("Expecting value", s, err.value) from None
+ except StopIteration:
+ raise ValueError(errmsg("Expecting object", s, end))
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
@@ -239,7 +258,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']':
break
elif nextchar != ',':
- raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
+ raise ValueError(errmsg("Expecting ',' delimiter", s, end))
try:
if s[end] in _ws:
end += 1
@@ -250,7 +269,6 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
return values, end
-
class JSONDecoder(object):
"""Simple JSON <http://json.org> decoder
@@ -263,9 +281,9 @@ class JSONDecoder(object):
+---------------+-------------------+
| array | list |
+---------------+-------------------+
- | string | str |
+ | string | unicode |
+---------------+-------------------+
- | number (int) | int |
+ | number (int) | int, long |
+---------------+-------------------+
| number (real) | float |
+---------------+-------------------+
@@ -281,10 +299,17 @@ class JSONDecoder(object):
"""
- def __init__(self, *, object_hook=None, parse_float=None,
+ def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
- """``object_hook``, if specified, will be called with the result
+ """``encoding`` determines the encoding used to interpret any ``str``
+ objects decoded by this instance (utf-8 by default). It has no
+ effect when decoding ``unicode`` objects.
+
+ Note that currently only encodings that are a superset of ASCII work,
+ strings of other encodings should be passed in as ``unicode``.
+
+ ``object_hook``, if specified, will be called with the result
of every JSON object decoded and its return value will be used in
place of the given ``dict``. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
@@ -292,8 +317,10 @@ class JSONDecoder(object):
``object_pairs_hook``, if specified will be called with the result of
every JSON object decoded with an ordered list of pairs. The return
value of ``object_pairs_hook`` will be used instead of the ``dict``.
- This feature can be used to implement custom decoders.
- If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
+ This feature can be used to implement custom decoders that rely on the
+ order that the key and value pairs are decoded (for example,
+ collections.OrderedDict will remember the order of insertion). If
+ ``object_hook`` is also defined, the ``object_pairs_hook`` takes
priority.
``parse_float``, if specified, will be called with the string
@@ -315,34 +342,34 @@ class JSONDecoder(object):
characters will be allowed inside strings. Control characters in
this context are those with character codes in the 0-31 range,
including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
+
"""
+ self.encoding = encoding
self.object_hook = object_hook
+ self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
self.strict = strict
- self.object_pairs_hook = object_pairs_hook
self.parse_object = JSONObject
self.parse_array = JSONArray
self.parse_string = scanstring
- self.memo = {}
self.scan_once = scanner.make_scanner(self)
-
def decode(self, s, _w=WHITESPACE.match):
- """Return the Python representation of ``s`` (a ``str`` instance
- containing a JSON document).
+ """Return the Python representation of ``s`` (a ``str`` or ``unicode``
+ instance containing a JSON document)
"""
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
end = _w(s, end).end()
if end != len(s):
- raise JSONDecodeError("Extra data", s, end)
+ raise ValueError(errmsg("Extra data", s, end, len(s)))
return obj
def raw_decode(self, s, idx=0):
- """Decode a JSON document from ``s`` (a ``str`` beginning with
- a JSON document) and return a 2-tuple of the Python
+ """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
+ beginning with a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended.
This can be used to decode a JSON document from a string that may
@@ -351,6 +378,6 @@ class JSONDecoder(object):
"""
try:
obj, end = self.scan_once(s, idx)
- except StopIteration as err:
- raise JSONDecodeError("Expecting value", s, err.value) from None
+ except StopIteration:
+ raise ValueError("No JSON object could be decoded")
return obj, end