summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-10-28 19:30:41 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-10-28 19:30:41 (GMT)
commitebc37b28fa3fd66336116447b7c2b9b1c2614630 (patch)
treee977fafcc576a01ac2e489d556990628dd5f4f35
parent8fdb6383dc6ade2eba4c402af571f689bee2b610 (diff)
downloadcpython-ebc37b28fa3fd66336116447b7c2b9b1c2614630.zip
cpython-ebc37b28fa3fd66336116447b7c2b9b1c2614630.tar.gz
cpython-ebc37b28fa3fd66336116447b7c2b9b1c2614630.tar.bz2
-- properly reset groups in findall (bug #117612)
-- fixed negative lookbehind to work correctly at the beginning of the target string (bug #117242) -- improved syntax check; you can no longer refer to a group inside itself (bug #110866)
-rw-r--r--Lib/sre_parse.py14
-rw-r--r--Lib/test/test_sre.py3
-rw-r--r--Modules/_sre.c33
3 files changed, 33 insertions, 17 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 7c36d4f..5334e06 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -62,14 +62,20 @@ class Pattern:
# master pattern object. keeps track of global attributes
def __init__(self):
self.flags = 0
+ self.open = []
self.groups = 1
self.groupdict = {}
- def getgroup(self, name=None):
+ def opengroup(self, name=None):
gid = self.groups
self.groups = gid + 1
if name:
self.groupdict[name] = gid
+ self.open.append(gid)
return gid
+ def closegroup(self, gid):
+ self.open.remove(gid)
+ def checkgroup(self, gid):
+ return gid < self.groups and gid not in self.open
class SubPattern:
# a subpattern, in intermediate form
@@ -278,6 +284,8 @@ def _escape(source, escape, state):
# got at least one decimal digit; this is a group reference
group = _group(escape, state.groups)
if group:
+ if not state.checkgroup(group):
+ raise error, "cannot refer to open group"
return GROUPREF, group
raise ValueError
if len(escape) == 2:
@@ -547,10 +555,12 @@ def _parse(source, state):
# anonymous group
group = None
else:
- group = state.getgroup(name)
+ group = state.opengroup(name)
p = _parse_sub(source, state)
if not source.match(")"):
raise error, "unbalanced parenthesis"
+ if group is not None:
+ state.closegroup(group)
subpattern.append((SUBPATTERN, (group, p)))
else:
while 1:
diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py
index 373efa0..b9692a1 100644
--- a/Lib/test/test_sre.py
+++ b/Lib/test/test_sre.py
@@ -167,6 +167,9 @@ test(r"""sre.findall(r"(:)(:*)", "a:b::c:::d")""",
[(":", ""), (":", ":"), (":", "::")])
test(r"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")])
+# bug 117612
+test(r"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")])
+
if verbose:
print "Running tests on sre.match"
diff --git a/Modules/_sre.c b/Modules/_sre.c
index b72b8b2..954547f 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -5,14 +5,14 @@
*
* partial history:
* 1999-10-24 fl created (based on existing template matcher code)
- * 2000-03-06 fl first alpha, sort of (0.5)
- * 2000-06-30 fl added fast search optimization (0.9.3)
- * 2000-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
- * 2000-07-02 fl added charset optimizations, etc (0.9.5)
+ * 2000-03-06 fl first alpha, sort of
+ * 2000-06-30 fl added fast search optimization
+ * 2000-06-30 fl added assert (lookahead) primitives, etc
+ * 2000-07-02 fl added charset optimizations, etc
* 2000-07-03 fl store code in pattern object, lookbehind, etc
* 2000-07-08 fl added regs attribute
- * 2000-07-21 fl reset lastindex in scanner methods (0.9.6)
- * 2000-08-01 fl fixes for 1.6b1 (0.9.8)
+ * 2000-07-21 fl reset lastindex in scanner methods
+ * 2000-08-01 fl fixes for 1.6b1
* 2000-08-03 fl added recursion limit
* 2000-08-07 fl use PyOS_CheckStack() if available
* 2000-08-08 fl changed findall to return empty strings instead of None
@@ -21,6 +21,7 @@
* 2000-09-20 fl added expand method
* 2000-09-21 fl don't use the buffer interface for unicode strings
* 2000-10-03 fl fixed assert_not primitive; support keyword arguments
+ * 2000-10-24 fl really fixed assert_not; reset groups in findall
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
@@ -35,7 +36,7 @@
#ifndef SRE_RECURSIVE
-char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
+char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2000 by Secret Labs AB ";
#include "Python.h"
@@ -783,13 +784,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
/* <ASSERT_NOT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
state->ptr = ptr - pattern[1];
- if (state->ptr < state->beginning)
- return 0;
- i = SRE_MATCH(state, pattern + 2, level + 1);
- if (i < 0)
- return i;
- if (i)
- return 0;
+ if (state->ptr >= state->beginning) {
+ i = SRE_MATCH(state, pattern + 2, level + 1);
+ if (i < 0)
+ return i;
+ if (i)
+ return 0;
+ }
pattern += pattern[0];
break;
@@ -1199,7 +1200,7 @@ _compile(PyObject* self_, PyObject* args)
n = PySequence_Length(code);
#endif
- self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
+ self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
if (!self) {
Py_DECREF(code);
return NULL;
@@ -1680,6 +1681,8 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
PyObject* item;
+ state_reset(&state);
+
state.ptr = state.start;
if (state.charsize == 1) {