7 files changed, 104 insertions, 66 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index 6dea5c4..8d03e92 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -3,7 +3,7 @@
 #
 # re-compatible interface for the sre matching engine
 #
-# Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
 #
 # This version of the SRE library can be redistributed under CNRI's
 # Python 1.6 license.  For any other use, please contact Secret Labs
@@ -14,23 +14,22 @@
 # other compatibility work.
 #
 
-# FIXME: change all FIXME's to XXX ;-)
-
 import sre_compile
 import sre_parse
 
 import string
 
 # flags
-I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
-L = LOCALE = sre_compile.SRE_FLAG_LOCALE
-M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
-S = DOTALL = sre_compile.SRE_FLAG_DOTALL
-X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
+I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
+L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
+U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
+M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
+S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
+X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
 
-# sre extensions (may or may not be in 1.6/2.0 final)
-T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE
+# sre extensions (experimental, don't rely on these)
+T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
+DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
 
 # sre exception
 error = sre_compile.error
@@ -38,36 +37,60 @@ error = sre_compile.error
 # --------------------------------------------------------------------
 # public interface
 
-# FIXME: add docstrings
-
 def match(pattern, string, flags=0):
+    """Try to apply the pattern at the start of the string, returning
+    a match object, or None if no match was found."""
     return _compile(pattern, flags).match(string)
 
 def search(pattern, string, flags=0):
+    """Scan through string looking for a match to the pattern, returning
+    a match object, or None if no match was found."""
     return _compile(pattern, flags).search(string)
 
 def sub(pattern, repl, string, count=0):
+    """Return the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in string by the
+    replacement repl"""
     return _compile(pattern, 0).sub(repl, string, count)
 
 def subn(pattern, repl, string, count=0):
+    """Return a 2-tuple containing (new_string, number).
+    new_string is the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in the source
+    string by the replacement repl.  number is the number of
+    substitutions that were made."""
     return _compile(pattern, 0).subn(repl, string, count)
 
 def split(pattern, string, maxsplit=0):
+    """Split the source string by the occurrences of the pattern,
+    returning a list containing the resulting substrings."""
     return _compile(pattern, 0).split(string, maxsplit)
 
 def findall(pattern, string, maxsplit=0):
+    """Return a list of all non-overlapping matches in the string.
+
+    If one or more groups are present in the pattern, return a
+    list of groups; this will be a list of tuples if the pattern
+    has more than one group.
+
+    Empty matches are included in the result."""
     return _compile(pattern, 0).findall(string, maxsplit)
 
 def compile(pattern, flags=0):
+    "Compile a regular expression pattern, returning a pattern object."
     return _compile(pattern, flags)
 
 def purge():
+    "Clear the regular expression cache"
     _cache.clear()
 
 def template(pattern, flags=0):
+    "Compile a template pattern, returning a pattern object"
+
     return _compile(pattern, flags|T)
 
 def escape(pattern):
+    "Escape all non-alphanumeric characters in pattern."
     s = list(pattern)
     for i in range(len(pattern)):
         c = pattern[i]
@@ -204,7 +227,7 @@ class Scanner:
                 break
             action = self.lexicon[m.lastindex][1]
             if callable(action):
-                self.match = match
+                self.match = m
                 action = action(self, m.group())
             if action is not None:
                 append(action)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index dc508e5..adab767 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -3,7 +3,7 @@
 #
 # convert template to internal format
 #
-# Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
 #
 # See the sre.py file for information on usage and redistribution.
 #
@@ -176,7 +176,7 @@ def _optimize_charset(charset, fixup):
                 for i in range(fixup(av[0]), fixup(av[1])+1):
                     charmap[i] = 1
             elif op is CATEGORY:
-                # FIXME: could append to charmap tail
+                # XXX: could append to charmap tail
                 return charset # cannot compress
     except IndexError:
         # character set contains unicode characters
@@ -364,7 +364,7 @@ def compile(p, flags=0):
 
     # print code
 
-    # FIXME: <fl> get rid of this limitation!
+    # XXX: <fl> get rid of this limitation!
     assert p.pattern.groups <= 100,\
            "sorry, but this version only supports 100 named groups"
 
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index ea649c0..a5e4bb8 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -4,7 +4,7 @@
 # various symbols used by the regular expression engine.
 # run this script to update the _sre include files!
 #
-# Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
 #
 # See the sre.py file for information on usage and redistribution.
 #
@@ -54,10 +54,12 @@ SUBPATTERN = "subpattern"
 # positions
 AT_BEGINNING = "at_beginning"
 AT_BEGINNING_LINE = "at_beginning_line"
+AT_BEGINNING_STRING = "at_beginning_string"
 AT_BOUNDARY = "at_boundary"
 AT_NON_BOUNDARY = "at_non_boundary"
 AT_END = "at_end"
 AT_END_LINE = "at_end_line"
+AT_END_STRING = "at_end_string"
 
 # categories
 CATEGORY_DIGIT = "category_digit"
@@ -109,8 +111,8 @@ OPCODES = [
 ]
 
 ATCODES = [
-    AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
-    AT_NON_BOUNDARY, AT_END, AT_END_LINE
+    AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
+    AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING
 ]
 
 CHCODES = [
@@ -178,6 +180,7 @@ SRE_FLAG_MULTILINE = 8 # treat target as multiline string
 SRE_FLAG_DOTALL = 16 # treat target as a single string
 SRE_FLAG_UNICODE = 32 # use unicode locale
 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+SRE_FLAG_DEBUG = 128 # debugging
 
 # flags for INFO primitive
 SRE_INFO_PREFIX = 1 # has prefix
@@ -201,7 +204,7 @@ if __name__ == "__main__":
  * NOTE: This file is generated by sre_constants.py.  If you need
  * to change anything in here, edit sre_constants.py and run it.
  *
- * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  *
  * See the _sre.c file for information on usage and redistribution.
  */
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 5334e06..a21fd61 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -3,7 +3,7 @@
 #
 # convert re-style regular expression to sre pattern
 #
-# Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
 #
 # See the sre.py file for information on usage and redistribution.
 #
@@ -34,7 +34,7 @@ ESCAPES = {
 }
 
 CATEGORIES = {
-    r"\A": (AT, AT_BEGINNING), # start of string
+    r"\A": (AT, AT_BEGINNING_STRING), # start of string
     r"\b": (AT, AT_BOUNDARY),
     r"\B": (AT, AT_NON_BOUNDARY),
     r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
@@ -43,7 +43,7 @@ CATEGORIES = {
     r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
     r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
     r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
-    r"\Z": (AT, AT_END), # end of string
+    r"\Z": (AT, AT_END_STRING), # end of string
 }
 
 FLAGS = {
@@ -421,13 +421,13 @@ def _parse(source, state):
                         code1 = code1[1][0]
                     set.append(code1)
 
-            # FIXME: <fl> move set optimization to compiler!
+            # XXX: <fl> should move set optimization to compiler!
             if len(set)==1 and set[0][0] is LITERAL:
                 subpattern.append(set[0]) # optimization
             elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
                 subpattern.append((NOT_LITERAL, set[1][1])) # optimization
             else:
-                # FIXME: <fl> add charmap optimization
+                # XXX: <fl> should add charmap optimization here
                 subpattern.append((IN, set))
 
         elif this and this[0] in REPEAT_CHARS:
@@ -457,7 +457,7 @@ def _parse(source, state):
                     min = int(lo)
                 if hi:
                     max = int(hi)
-                # FIXME: <fl> check that hi >= lo!
+                # XXX: <fl> check that hi >= lo ???
             else:
                 raise error, "not supported"
             # figure out which item to repeat
@@ -601,7 +601,8 @@ def parse(str, flags=0, pattern=None):
     elif tail:
         raise error, "bogus characters at end of regular expression"
 
-    # p.dump()
+    if flags & SRE_FLAG_DEBUG:
+        p.dump()
 
     if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
         # the VERBOSE flag was switched on inside the pattern.  to be
@@ -672,8 +673,7 @@ def parse_template(source, pattern):
     return p
 
 def expand_template(template, match):
-    # FIXME: <fl> this is sooooo slow.  drop in the slicelist
-    # code instead
+    # XXX: <fl> this is sooooo slow.  drop in the slicelist code instead
     p = []
     a = p.append
     sep = match.string[:0]
diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py
index 9c01c66..b9692a1 100644
--- a/Lib/test/test_sre.py
+++ b/Lib/test/test_sre.py
@@ -47,12 +47,12 @@ if verbose:
     print 'Running tests on character literals'
 
 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
-    test(r"""sre.match(r"\%03o" % i, chr(i)) is not None""", 1)
-    test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") is not None""", 1)
-    test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") is not None""", 1)
-    test(r"""sre.match(r"\x%02x" % i, chr(i)) is not None""", 1)
-    test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") is not None""", 1)
-    test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") is not None""", 1)
+    test(r"""sre.match(r"\%03o" % i, chr(i)) != None""", 1)
+    test(r"""sre.match(r"\%03o0" % i, chr(i)+"0") != None""", 1)
+    test(r"""sre.match(r"\%03o8" % i, chr(i)+"8") != None""", 1)
+    test(r"""sre.match(r"\x%02x" % i, chr(i)) != None""", 1)
+    test(r"""sre.match(r"\x%02x0" % i, chr(i)+"0") != None""", 1)
+    test(r"""sre.match(r"\x%02xz" % i, chr(i)+"z") != None""", 1)
 test(r"""sre.match("\911", "")""", None, sre.error)
 
 #
@@ -197,11 +197,11 @@ if verbose:
 p = ""
 for i in range(0, 256):
     p = p + chr(i)
-    test(r"""sre.match(sre.escape(chr(i)), chr(i)) is not None""", 1)
+    test(r"""sre.match(sre.escape(chr(i)), chr(i)) != None""", 1)
     test(r"""sre.match(sre.escape(chr(i)), chr(i)).span()""", (0,1))
 
 pat = sre.compile(sre.escape(p))
-test(r"""pat.match(p) is not None""", 1)
+test(r"""pat.match(p) != None""", 1)
 test(r"""pat.match(p).span()""", (0,256))
 
 if verbose:
diff --git a/Modules/_sre.c b/Modules/_sre.c
index ccbd7b2..28ec61c 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -22,8 +22,10 @@
  * 2000-09-21 fl  don't use the buffer interface for unicode strings
  * 2000-10-03 fl  fixed assert_not primitive; support keyword arguments
  * 2000-10-24 fl  really fixed assert_not; reset groups in findall
+ * 2000-12-21 fl  fixed memory leak in groupdict
+ * 2001-01-02 fl  properly reset pointer after failed assertion in MIN_UNTIL
  *
- * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  *
  * This version of the SRE library can be redistributed under CNRI's
  * Python 1.6 license.  For any other use, please contact Secret Labs
@@ -355,6 +357,7 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
     switch (at) {
 
     case SRE_AT_BEGINNING:
+    case SRE_AT_BEGINNING_STRING:
         return ((void*) ptr == state->beginning);
 
     case SRE_AT_BEGINNING_LINE:
@@ -370,6 +373,9 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
         return ((void*) ptr == state->end ||
                 SRE_IS_LINEBREAK((int) ptr[0]));
 
+    case SRE_AT_END_STRING:
+        return ((void*) ptr == state->end);
+
     case SRE_AT_BOUNDARY:
         if (state->beginning == state->end)
             return 0;
@@ -826,7 +832,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
             /* this operator only works if the repeated item is
                exactly one character wide, and we're not already
                collecting backtracking points.  for other cases,
-               use the MAX_REPEAT operator instead */
+               use the MAX_REPEAT operator */
 
             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
 
@@ -900,7 +906,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
 
         case SRE_OP_REPEAT:
             /* create repeat context.  all the hard work is done
-               by the UNTIL operator */
+               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
             TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
                    pattern[1], pattern[2]));
@@ -974,6 +980,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
             if (i)
                 return i;
             state->repeat = rp;
+            state->ptr = ptr;
             return 0;
 
         case SRE_OP_MIN_UNTIL:
@@ -986,7 +993,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
 
             count = rp->count + 1;
 
-            TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
+            TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
+                   rp->pattern));
 
             state->ptr = ptr;
 
@@ -1009,6 +1017,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
                 /* free(rp); */
                 return i;
             }
+            state->ptr = ptr;
             state->repeat = rp;
 
             if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
@@ -1020,6 +1029,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
             if (i)
                 return i;
             rp->count = count - 1;
+            state->ptr = ptr;
             return 0;
 
         default:
@@ -1965,7 +1975,7 @@ match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
 
     PyObject* def = Py_None;
     static char* kwlist[] = { "default", NULL };
-    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
+    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
         return NULL;
 
     result = PyDict_New();
@@ -1973,35 +1983,35 @@ match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
         return result;
 
     keys = PyMapping_Keys(self->pattern->groupindex);
-    if (!keys) {
-        Py_DECREF(result);
-        return NULL;
-    }
+    if (!keys)
+        goto failed;
 
     for (index = 0; index < PyList_GET_SIZE(keys); index++) {
+        int status;
         PyObject* key;
-        PyObject* item;
+        PyObject* value;
         key = PyList_GET_ITEM(keys, index);
-        if (!key) {
-            Py_DECREF(keys);
-            Py_DECREF(result);
-            return NULL;
-        }
-        item = match_getslice(self, key, def);
-        if (!item) {
+        if (!key)
+            goto failed;
+        value = match_getslice(self, key, def);
+        if (!value) {
             Py_DECREF(key);
-            Py_DECREF(keys);
-            Py_DECREF(result);
-            return NULL;
+            goto failed;
         }
-        /* FIXME: <fl> this can fail, right? */
-        PyDict_SetItem(result, key, item);
-	Py_DECREF(item);
+        status = PyDict_SetItem(result, key, value);
+        Py_DECREF(value);
+        if (status < 0)
+            goto failed;
     }
 
     Py_DECREF(keys);
 
     return result;
+
+failed:
+    Py_DECREF(keys);
+    Py_DECREF(result);
+    return NULL;
 }
 
 static PyObject*
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h
index 5c55c3d..6cad089 100644
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -6,7 +6,7 @@
  * NOTE: This file is generated by sre_constants.py.  If you need
  * to change anything in here, edit sre_constants.py and run it.
  *
- * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  *
  * See the _sre.c file for information on usage and redistribution.
  */
@@ -42,10 +42,12 @@
 #define SRE_OP_SUBPATTERN 28
 #define SRE_AT_BEGINNING 0
 #define SRE_AT_BEGINNING_LINE 1
-#define SRE_AT_BOUNDARY 2
-#define SRE_AT_NON_BOUNDARY 3
-#define SRE_AT_END 4
-#define SRE_AT_END_LINE 5
+#define SRE_AT_BEGINNING_STRING 2
+#define SRE_AT_BOUNDARY 3
+#define SRE_AT_NON_BOUNDARY 4
+#define SRE_AT_END 5
+#define SRE_AT_END_LINE 6
+#define SRE_AT_END_STRING 7
 #define SRE_CATEGORY_DIGIT 0
 #define SRE_CATEGORY_NOT_DIGIT 1
 #define SRE_CATEGORY_SPACE 2