diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2001-10-21 18:04:11 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2001-10-21 18:04:11 (GMT) |
commit | 1296a8d77e6701d18090c24853cd098f12ef069a (patch) | |
tree | 6b23bb96d99a6a016738468514be09dfbacf3f0d /Lib | |
parent | bec95b9d8825b39cff46a8c645fa0eeb8409854e (diff) | |
download | cpython-1296a8d77e6701d18090c24853cd098f12ef069a.zip cpython-1296a8d77e6701d18090c24853cd098f12ef069a.tar.gz cpython-1296a8d77e6701d18090c24853cd098f12ef069a.tar.bz2 |
sre.Scanner fixes (from Greg Chapman). also added a Scanner sanity
check to the test suite.
added a few missing exception checks in the _sre module
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre.py | 11 | ||||
-rw-r--r-- | Lib/test/test_sre.py | 20 |
2 files changed, 26 insertions, 5 deletions
@@ -330,15 +330,16 @@ copy_reg.pickle(_pattern_type, _pickle, _compile) # experimental stuff (see python-dev discussions for details) class Scanner: - def __init__(self, lexicon): + def __init__(self, lexicon, flags=0): from sre_constants import BRANCH, SUBPATTERN self.lexicon = lexicon # combine phrases into a compound pattern p = [] s = sre_parse.Pattern() + s.flags = flags for phrase, action in lexicon: p.append(sre_parse.SubPattern(s, [ - (SUBPATTERN, (len(p), sre_parse.parse(phrase))), + (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))), ])) p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) s.groups = len(p) @@ -346,16 +347,16 @@ class Scanner: def scan(self, string): result = [] append = result.append - match = self.scanner.match + match = self.scanner.scanner(string).match i = 0 while 1: - m = match(string, i) + m = match() if not m: break j = m.end() if i == j: break - action = self.lexicon[m.lastindex][1] + action = self.lexicon[m.lastindex-1][1] if callable(action): self.match = m action = action(self, m.group()) diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 12a66f9..e879151 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -224,6 +224,26 @@ test(r"""pat.match(p) is not None""", 1) test(r"""pat.match(p).span()""", (0,256)) if verbose: + print 'Running tests on sre.Scanner' + +def s_ident(scanner, token): return token +def s_operator(scanner, token): return "op%s" % token +def s_float(scanner, token): return float(token) +def s_int(scanner, token): return int(token) + +scanner = sre.Scanner([ + (r"[a-zA-Z_]\w*", s_ident), + (r"\d+\.\d*", s_float), + (r"\d+", s_int), + (r"=|\+|-|\*|/", s_operator), + (r"\s+", None), + ]) + +# sanity check +test('scanner.scan("sum = 3*foo + 312.50 + bar")', + (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], '')) + +if verbose: print 'Pickling a SRE_Pattern instance' try: |