Added findall() to RegexObject -- return a list of all matches in a

string. Added groupdict() to MatchObject -- return the named groups as a dict. Added default argument to groups() to specify what to return for unmatching groups; groupdict() also has this.
author: Guido van Rossum <guido@python.org> 1998-06-29 20:29:08 (GMT)
committer: Guido van Rossum <guido@python.org> 1998-06-29 20:29:08 (GMT)
commit: be0b62cab431089cb8693ca859316a86dbae2f94 (patch)
tree: 56c8002b09bb92b1868be0c072bcca4305a9ec1b /Lib/re.py
parent: 80884075f0106f17f34308f342ceffd896a22b33 (diff)
download: cpython-be0b62cab431089cb8693ca859316a86dbae2f94.zip
cpython-be0b62cab431089cb8693ca859316a86dbae2f94.tar.gz
cpython-be0b62cab431089cb8693ca859316a86dbae2f94.tar.bz2
1 files changed, 61 insertions, 16 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 75905c5..c5b71b8 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -57,6 +57,11 @@ def split(pattern, string, maxsplit=0):
         pattern = _cachecompile(pattern)
     return pattern.split(string, maxsplit)
 
+def findall(pattern, string):
+    if type(pattern) == type(''):
+        pattern = _cachecompile(pattern)
+    return pattern.findall(string)
+
 def escape(pattern):
     "Escape all non-alphanumeric characters in pattern."
     result = []
@@ -80,6 +85,7 @@ def compile(pattern, flags=0):
 #
 
 class RegexObject:
+
     def __init__(self, pattern, flags, code, groupindex):
         self.code = code 
         self.flags = flags
@@ -171,7 +177,7 @@ class RegexObject:
         return (string.join(results, ''), n)
                                                                             
     def split(self, source, maxsplit=0):
-        """Split the \var{source} string by the occurrences of the pattern,
+        """Split the source string by the occurrences of the pattern,
         returning a list containing the resulting substrings."""
 
         if maxsplit < 0:
@@ -198,13 +204,38 @@ class RegexObject:
             results.append(source[lastmatch:i])
             g = m.groups()
             if g:
-                if type(g)==type( "" ): g = [g]
                 results[len(results):] = list(g)
             pos = lastmatch = j
             n = n + 1
         results.append(source[lastmatch:])
         return results
 
+    def findall(self, string):
+        """Return a list of all non-overlapping matches in the string.
+
+        If one or more groups are present in the pattern, return a
+        list of groups; this will be a list of tuples if the pattern
+        has more than one group.
+
+        Empty matches are included in the result.
+
+        """
+        pos = 0
+        n = len(string)
+        result = []
+        while pos <= n:
+            m = self.search(string, pos)
+            if not m:
+                break
+            gr = m.groups()
+            if not gr:
+                gr = m.group()
+            elif len(gr) == 1:
+                gr = gr[0]
+            result.append(gr)
+            pos = max(m.end(), pos+1)
+        return result
+
     # The following 3 functions were contributed by Mike Fletcher, and
     # allow pickling and unpickling of RegexObject instances.
     def __getinitargs__(self):
@@ -221,6 +252,7 @@ class RegexObject:
         self.code = apply(pcre_compile, statetuple)
 
 class MatchObject:
+
     def __init__(self, re, string, pos, endpos, regs):
         self.re = re
         self.string = string
@@ -234,7 +266,7 @@ class MatchObject:
             try:
                 g = self.re.groupindex[g]
             except (KeyError, TypeError):
-                raise IndexError, ('group "' + g + '" is undefined')
+                raise IndexError, 'group %s is undefined' % `g`
         return self.regs[g][0]
     
     def end(self, g = 0):
@@ -243,31 +275,31 @@ class MatchObject:
             try:
                 g = self.re.groupindex[g]
             except (KeyError, TypeError):
-                raise IndexError, ('group "' + g + '" is undefined')
+                raise IndexError, 'group %s is undefined' % `g`
         return self.regs[g][1]
     
     def span(self, g = 0):
-        """Return a tuple containing the start,end of the substring 
-        matched by group g"""
+        "Return (start, end) of the substring matched by group g"
         if type(g) == type(''):
             try:
                 g = self.re.groupindex[g]
             except (KeyError, TypeError):
-                raise IndexError, ('group "' + g + '" is undefined')
+                raise IndexError, 'group %s is undefined' % `g`
         return self.regs[g]
     
-    def groups(self):
+    def groups(self, default=None):
         "Return a tuple containing all subgroups of the match object"
         result = []
         for g in range(1, self.re._num_regs):
-            if (self.regs[g][0] == -1) or (self.regs[g][1] == -1):
-                result.append(None)
+            a, b = self.regs[g]
+            if a == -1 or b == -1:
+                result.append(default)
             else:
-                result.append(self.string[self.regs[g][0]:self.regs[g][1]])
+                result.append(self.string[a:b])
         return tuple(result)
 
     def group(self, *groups):
-        "Return one or more groups of the match."
+        "Return one or more groups of the match"
         if len(groups) == 0:
             groups = (0,)
         result = []
@@ -276,15 +308,28 @@ class MatchObject:
                 try:
                     g = self.re.groupindex[g]
                 except (KeyError, TypeError):
-                    raise IndexError, ('group "' + g + '" is undefined')
-            if len(self.regs)<=g: raise IndexError, ('group "' + str(g) + '" is undefined')
-            elif (self.regs[g][0] == -1) or (self.regs[g][1] == -1):
+                    raise IndexError, 'group %s is undefined' % `g`
+            if g >= len(self.regs):
+                raise IndexError, 'group %s is undefined' % `g`
+            a, b = self.regs[g]
+            if a == -1 or b == -1:
                 result.append(None)
             else:
-                result.append(self.string[self.regs[g][0]:self.regs[g][1]])
+                result.append(self.string[a:b])
         if len(result) > 1:
             return tuple(result)
         elif len(result) == 1:
             return result[0]
         else:
             return ()
+
+    def groupdict(self, default=None):
+        "Return a dictionary containing all named subgroups of the match"
+        dict = {}
+        for name, index in self.re.groupindex.items():
+            a, b = self.regs[index]
+            if a == -1 or b == -1:
+                dict[name] = default
+            else:
+                dict[name] = self.string[a:b]
+        return dict
author	Guido van Rossum <guido@python.org>	1998-06-29 20:29:08 (GMT)
committer	Guido van Rossum <guido@python.org>	1998-06-29 20:29:08 (GMT)
commit	be0b62cab431089cb8693ca859316a86dbae2f94 (patch)
tree	56c8002b09bb92b1868be0c072bcca4305a9ec1b /Lib/re.py
parent	80884075f0106f17f34308f342ceffd896a22b33 (diff)
download	cpython-be0b62cab431089cb8693ca859316a86dbae2f94.zip cpython-be0b62cab431089cb8693ca859316a86dbae2f94.tar.gz cpython-be0b62cab431089cb8693ca859316a86dbae2f94.tar.bz2