From 1296a8d77e6701d18090c24853cd098f12ef069a Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Sun, 21 Oct 2001 18:04:11 +0000 Subject: sre.Scanner fixes (from Greg Chapman). also added a Scanner sanity check to the test suite. added a few missing exception checks in the _sre module --- Lib/sre.py | 11 ++++++----- Lib/test/test_sre.py | 20 ++++++++++++++++++++ Modules/_sre.c | 17 +++++++++++++++++ 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/Lib/sre.py b/Lib/sre.py index 9c3f4b3..701334e 100644 --- a/Lib/sre.py +++ b/Lib/sre.py @@ -330,15 +330,16 @@ copy_reg.pickle(_pattern_type, _pickle, _compile) # experimental stuff (see python-dev discussions for details) class Scanner: - def __init__(self, lexicon): + def __init__(self, lexicon, flags=0): from sre_constants import BRANCH, SUBPATTERN self.lexicon = lexicon # combine phrases into a compound pattern p = [] s = sre_parse.Pattern() + s.flags = flags for phrase, action in lexicon: p.append(sre_parse.SubPattern(s, [ - (SUBPATTERN, (len(p), sre_parse.parse(phrase))), + (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))), ])) p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) s.groups = len(p) @@ -346,16 +347,16 @@ class Scanner: def scan(self, string): result = [] append = result.append - match = self.scanner.match + match = self.scanner.scanner(string).match i = 0 while 1: - m = match(string, i) + m = match() if not m: break j = m.end() if i == j: break - action = self.lexicon[m.lastindex][1] + action = self.lexicon[m.lastindex-1][1] if callable(action): self.match = m action = action(self, m.group()) diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 12a66f9..e879151 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -224,6 +224,26 @@ test(r"""pat.match(p) is not None""", 1) test(r"""pat.match(p).span()""", (0,256)) if verbose: + print 'Running tests on sre.Scanner' + +def s_ident(scanner, token): return token +def s_operator(scanner, token): return "op%s" % token +def s_float(scanner, token): return float(token) +def s_int(scanner, token): return int(token) + +scanner = sre.Scanner([ + (r"[a-zA-Z_]\w*", s_ident), + (r"\d+\.\d*", s_float), + (r"\d+", s_int), + (r"=|\+|-|\*|/", s_operator), + (r"\s+", None), + ]) + +# sanity check +test('scanner.scan("sum = 3*foo + 312.50 + bar")', + (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], '')) + +if verbose: print 'Pickling a SRE_Pattern instance' try: diff --git a/Modules/_sre.c b/Modules/_sre.c index be93d93..3a2d47c 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1800,6 +1800,11 @@ join(PyObject* list, PyObject* pattern) return NULL; } args = PyTuple_New(1); + if (!args) { + Py_DECREF(function); + Py_DECREF(joiner); + return NULL; + } PyTuple_SET_ITEM(args, 0, list); result = PyObject_CallObject(function, args); Py_DECREF(args); /* also removes list */ @@ -1896,6 +1901,10 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) return NULL; list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } while (state.start <= state.end) { @@ -1995,6 +2004,10 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw) return NULL; list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } n = 0; last = state.start; @@ -2110,6 +2123,10 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string, return NULL; list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } n = i = 0; -- cgit v0.12