diff options
author | Christian Heimes <christian@cheimes.de> | 2008-05-08 14:29:10 (GMT) |
---|---|---|
committer | Christian Heimes <christian@cheimes.de> | 2008-05-08 14:29:10 (GMT) |
commit | 90540004d3e1cbc0bd5595838bca3d1970198120 (patch) | |
tree | 9006c98a1c71cf39761d3eb35d4c227540b24947 /Lib/json/decoder.py | |
parent | c848655eb05c38f9c31bca0df87f2013670a1efa (diff) | |
download | cpython-90540004d3e1cbc0bd5595838bca3d1970198120.zip cpython-90540004d3e1cbc0bd5595838bca3d1970198120.tar.gz cpython-90540004d3e1cbc0bd5595838bca3d1970198120.tar.bz2 |
Merged revisions 62734,62736,62748,62769 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r62734 | brett.cannon | 2008-05-05 22:21:38 +0200 (Mon, 05 May 2008) | 5 lines
Add the 'json' package. Code taken from simplejson 1.9 and contributed by Bob
Ippolito.
Closes issue #2750.
........
r62736 | georg.brandl | 2008-05-05 22:53:39 +0200 (Mon, 05 May 2008) | 2 lines
Fix JSON module docs.
........
r62748 | benjamin.peterson | 2008-05-06 04:51:10 +0200 (Tue, 06 May 2008) | 2 lines
PEP 8 nits in json package
........
r62769 | christian.heimes | 2008-05-06 18:18:41 +0200 (Tue, 06 May 2008) | 2 lines
Intern static string
Use float constructors instead of magic code for float constants
........
Diffstat (limited to 'Lib/json/decoder.py')
-rw-r--r-- | Lib/json/decoder.py | 339 |
1 files changed, 339 insertions, 0 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py new file mode 100644 index 0000000..5283eae --- /dev/null +++ b/Lib/json/decoder.py @@ -0,0 +1,339 @@ +"""Implementation of JSONDecoder +""" + +import re +import sys + +from json.scanner import Scanner, pattern +try: + from _json import scanstring as c_scanstring +except ImportError: + c_scanstring = None + +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf') + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + if end is None: + fmt = '{0}: line {1} column {2} (char {3})' + return fmt.format(msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' + return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, + 'true': True, + 'false': False, + 'null': None, +} + + +def JSONConstant(match, context, c=_CONSTANTS): + s = match.group(0) + fn = getattr(context, 'parse_constant', None) + if fn is None: + rval = c[s] + else: + rval = fn(s) + return rval, None +pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) + + +def JSONNumber(match, context): + match = JSONNumber.regex.match(match.string, *match.span()) + integer, frac, exp = match.groups() + if frac or exp: + fn = getattr(context, 'parse_float', None) or float + res = fn(integer + (frac or '') + (exp or '')) + else: + fn = getattr(context, 'parse_int', None) or int + res = fn(integer) + return res, None +pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) + + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': '"', '\\': '\\', '/': '/', + 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', +} + +DEFAULT_ENCODING = "utf-8" + + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + if content: + if not isinstance(content, str): + content = str(content, encoding) + _append(content) + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character {0!r} at".format(terminator) + raise ValueError(errmsg(msg, s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + if esc != 'u': + try: + m = _b[esc] + except KeyError: + msg = "Invalid \\escape: {0!r}".format(esc) + raise ValueError(errmsg(msg, s, end)) + end += 1 + else: + esc = s[end + 1:end + 5] + next_end = end + 5 + msg = "Invalid \\uXXXX escape" + try: + if len(esc) != 4: + raise ValueError + uni = int(esc, 16) + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + m = chr(uni) + except ValueError: + raise ValueError(errmsg(msg, s, end)) + end = next_end + _append(m) + return ''.join(chunks), end + + +# Use speedup +if c_scanstring is not None: + scanstring = c_scanstring +else: + scanstring = py_scanstring + +def JSONString(match, context): + encoding = getattr(context, 'encoding', None) + strict = getattr(context, 'strict', True) + return scanstring(match.string, match.end(), encoding, strict) +pattern(r'"')(JSONString) + + +WHITESPACE = re.compile(r'\s*', FLAGS) + + +def JSONObject(match, context, _w=WHITESPACE.match): + pairs = {} + s = match.string + end = _w(s, match.end()).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + encoding = getattr(context, 'encoding', None) + strict = getattr(context, 'strict', True) + iterscan = JSONScanner.iterscan + while True: + key, end = scanstring(s, end, encoding, strict) + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + end = _w(s, end + 1).end() + try: + value, end = next(iterscan(s, idx=end, context=context)) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == '}': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + object_hook = getattr(context, 'object_hook', None) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end +pattern(r'{')(JSONObject) + + +def JSONArray(match, context, _w=WHITESPACE.match): + values = [] + s = match.string + end = _w(s, match.end()).end() + # Look-ahead for trivial empty array + nextchar = s[end:end + 1] + if nextchar == ']': + return values, end + 1 + iterscan = JSONScanner.iterscan + while True: + try: + value, end = next(iterscan(s, idx=end, context=context)) + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + values.append(value) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + end = _w(s, end).end() + return values, end +pattern(r'\[')(JSONArray) + + +ANYTHING = [ + JSONObject, + JSONArray, + JSONString, + JSONConstant, + JSONNumber, +] + +JSONScanner = Scanner(ANYTHING) + + +class JSONDecoder(object): + """Simple JSON <http://json.org> decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + """ + + _scanner = Scanner(ANYTHING) + __all__ = ['__init__', 'decode', 'raw_decode'] + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + """``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result of + every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + ``parse_float``, if specified, will be called with the string + of every JSON float to be decoded. By default this is equivalent to + float(num_str). This can be used to use another datatype or parser + for JSON floats (e.g. decimal.Decimal). + + ``parse_int``, if specified, will be called with the string + of every JSON int to be decoded. By default this is equivalent to + int(num_str). This can be used to use another datatype or parser + for JSON integers (e.g. float). + + ``parse_constant``, if specified, will be called with one of the + following strings: -Infinity, Infinity, NaN, null, true, false. + This can be used to raise an exception if invalid JSON numbers + are encountered. + + """ + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float + self.parse_int = parse_int + self.parse_constant = parse_constant + self.strict = strict + + def decode(self, s, _w=WHITESPACE.match): + """ + Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, **kw): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + kw.setdefault('context', self) + try: + obj, end = next(self._scanner.iterscan(s, **kw)) + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end |