diff options
author | Christian Heimes <christian@cheimes.de> | 2008-05-08 14:29:10 (GMT) |
---|---|---|
committer | Christian Heimes <christian@cheimes.de> | 2008-05-08 14:29:10 (GMT) |
commit | 90540004d3e1cbc0bd5595838bca3d1970198120 (patch) | |
tree | 9006c98a1c71cf39761d3eb35d4c227540b24947 /Lib/json/scanner.py | |
parent | c848655eb05c38f9c31bca0df87f2013670a1efa (diff) | |
download | cpython-90540004d3e1cbc0bd5595838bca3d1970198120.zip cpython-90540004d3e1cbc0bd5595838bca3d1970198120.tar.gz cpython-90540004d3e1cbc0bd5595838bca3d1970198120.tar.bz2 |
Merged revisions 62734,62736,62748,62769 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r62734 | brett.cannon | 2008-05-05 22:21:38 +0200 (Mon, 05 May 2008) | 5 lines
Add the 'json' package. Code taken from simplejson 1.9 and contributed by Bob
Ippolito.
Closes issue #2750.
........
r62736 | georg.brandl | 2008-05-05 22:53:39 +0200 (Mon, 05 May 2008) | 2 lines
Fix JSON module docs.
........
r62748 | benjamin.peterson | 2008-05-06 04:51:10 +0200 (Tue, 06 May 2008) | 2 lines
PEP 8 nits in json package
........
r62769 | christian.heimes | 2008-05-06 18:18:41 +0200 (Tue, 06 May 2008) | 2 lines
Intern static string
Use float constructors instead of magic code for float constants
........
Diffstat (limited to 'Lib/json/scanner.py')
-rw-r--r-- | Lib/json/scanner.py | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py new file mode 100644 index 0000000..4b065ab --- /dev/null +++ b/Lib/json/scanner.py @@ -0,0 +1,69 @@ +"""Iterator based sre token scanner + +""" + +import re +import sre_parse +import sre_compile +import sre_constants + +from re import VERBOSE, MULTILINE, DOTALL +from sre_constants import BRANCH, SUBPATTERN + +__all__ = ['Scanner', 'pattern'] + +FLAGS = (VERBOSE | MULTILINE | DOTALL) + +class Scanner(object): + def __init__(self, lexicon, flags=FLAGS): + self.actions = [None] + # Combine phrases into a compound pattern + s = sre_parse.Pattern() + s.flags = flags + p = [] + for idx, token in enumerate(lexicon): + phrase = token.pattern + try: + subpattern = sre_parse.SubPattern(s, + [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) + except sre_constants.error: + raise + p.append(subpattern) + self.actions.append(token) + + s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work + p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) + self.scanner = sre_compile.compile(p) + + def iterscan(self, string, idx=0, context=None): + """Yield match, end_idx for each match + + """ + match = self.scanner.scanner(string, idx).match + actions = self.actions + lastend = idx + end = len(string) + while True: + m = match() + if m is None: + break + matchbegin, matchend = m.span() + if lastend == matchend: + break + action = actions[m.lastindex] + if action is not None: + rval, next_pos = action(m, context) + if next_pos is not None and next_pos != matchend: + # "fast forward" the scanner + matchend = next_pos + match = self.scanner.scanner(string, matchend).match + yield rval, matchend + lastend = matchend + + +def pattern(pattern, flags=FLAGS): + def decorator(fn): + fn.pattern = pattern + fn.regex = re.compile(pattern, flags) + return fn + return decorator |