diff options
Diffstat (limited to 'Lib/json/scanner.py')
-rw-r--r-- | Lib/json/scanner.py | 115 |
1 files changed, 56 insertions, 59 deletions
diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py index 4b065ab..00c5470 100644 --- a/Lib/json/scanner.py +++ b/Lib/json/scanner.py @@ -1,69 +1,66 @@ -"""Iterator based sre token scanner - +"""JSON token scanner """ - import re -import sre_parse -import sre_compile -import sre_constants - -from re import VERBOSE, MULTILINE, DOTALL -from sre_constants import BRANCH, SUBPATTERN +try: + from _json import make_scanner as c_make_scanner +except ImportError: + c_make_scanner = None -__all__ = ['Scanner', 'pattern'] +__all__ = ['make_scanner'] -FLAGS = (VERBOSE | MULTILINE | DOTALL) +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) -class Scanner(object): - def __init__(self, lexicon, flags=FLAGS): - self.actions = [None] - # Combine phrases into a compound pattern - s = sre_parse.Pattern() - s.flags = flags - p = [] - for idx, token in enumerate(lexicon): - phrase = token.pattern - try: - subpattern = sre_parse.SubPattern(s, - [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) - except sre_constants.error: - raise - p.append(subpattern) - self.actions.append(token) +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + encoding = context.encoding + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook - s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work - p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) - self.scanner = sre_compile.compile(p) + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration - def iterscan(self, string, idx=0, context=None): - """Yield match, end_idx for each match + if nextchar == '"': + return parse_string(string, idx + 1, encoding, strict) + elif nextchar == '{': + return parse_object((string, idx + 1), encoding, strict, + _scan_once, object_hook) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 - """ - match = self.scanner.scanner(string, idx).match - actions = self.actions - lastend = idx - end = len(string) - while True: - m = match() - if m is None: - break - matchbegin, matchend = m.span() - if lastend == matchend: - break - action = actions[m.lastindex] - if action is not None: - rval, next_pos = action(m, context) - if next_pos is not None and next_pos != matchend: - # "fast forward" the scanner - matchend = next_pos - match = self.scanner.scanner(string, matchend).match - yield rval, matchend - lastend = matchend + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration + return _scan_once -def pattern(pattern, flags=FLAGS): - def decorator(fn): - fn.pattern = pattern - fn.regex = re.compile(pattern, flags) - return fn - return decorator +make_scanner = c_make_scanner or py_make_scanner |