summaryrefslogtreecommitdiffstats
path: root/Lib/sre.py
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2001-10-21 21:48:30 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2001-10-21 21:48:30 (GMT)
commitdac58492aa3bf476fa0b4b586edababfe3aaca53 (patch)
tree2f5e120eafe2802ebdcd02178af67439c982b84c /Lib/sre.py
parent0402dd18cb025b7510760142087c97729702e23a (diff)
downloadcpython-dac58492aa3bf476fa0b4b586edababfe3aaca53.zip
cpython-dac58492aa3bf476fa0b4b586edababfe3aaca53.tar.gz
cpython-dac58492aa3bf476fa0b4b586edababfe3aaca53.tar.bz2
fixed character set description in docstring (SRE uses Python
strings, not C strings) removed USE_PYTHON defines, and related sre.py helpers skip calling the subx helper if the template is callable. interestingly enough, this means that def callback(m): return literal result = pattern.sub(callback, string) is much faster than result = pattern.sub(literal, string)
Diffstat (limited to 'Lib/sre.py')
-rw-r--r--Lib/sre.py95
1 files changed, 15 insertions, 80 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index 701334e..e7517f5 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -17,15 +17,13 @@
r"""Support for regular expressions (RE).
This module provides regular expression matching operations similar to
-those found in Perl. It's 8-bit clean: the strings being processed may
-contain both null bytes and characters whose high bit is set. Regular
-expression pattern strings may not contain null bytes, but can specify
-the null byte using the \\number notation. Characters with the high
-bit set may be included.
-
-Regular expressions can contain both special and ordinary
-characters. Most ordinary characters, like "A", "a", or "0", are the
-simplest regular expressions; they simply match themselves. You can
+those found in Perl. It supports both 8-bit and Unicode strings; both
+the pattern and the strings being processed can contain null bytes and
+characters outside the US ASCII range.
+
+Regular expressions can contain both special and ordinary characters.
+Most ordinary characters, like "A", "a", or "0", are the simplest
+regular expressions; they simply match themselves. You can
concatenate ordinary characters, so last matches the string 'last'.
The special characters are:
@@ -45,7 +43,7 @@ The special characters are:
"|" A|B, creates an RE that will match either A or B.
(...) Matches the RE inside the parentheses.
The contents can be retrieved or matched later in the string.
- (?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
+ (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
(?:...) Non-grouping version of regular parentheses.
(?P<name>...) The substring matched by the group is accessible by name.
(?P=name) Matches the text matched earlier by the group named name.
@@ -54,7 +52,7 @@ The special characters are:
(?!...) Matches if ... doesn't match next.
The special sequences consist of "\\" and a character from the list
-below. If the ordinary character is not on the list, then the
+below. If the ordinary character is not on the list, then the
resulting RE will match the second character.
\number Matches the contents of the group of the same number.
\A Matches only at the start of the string.
@@ -246,76 +244,13 @@ def _expand(pattern, match, template):
def _subx(pattern, template):
# internal: pattern.sub/subn implementation helper
- if callable(template):
- filter = template
- else:
- template = _compile_repl(template, pattern)
- if not template[0] and len(template[1]) == 1:
- # literal replacement
- filter = template[1][0]
- else:
- def filter(match, template=template):
- return sre_parse.expand_template(template, match)
- return filter
-
-def _sub(pattern, template, text, count=0):
- # internal: pattern.sub implementation hook
- # FIXME: not used in SRE 2.2.1 and later; will be removed soon
- return _subn(pattern, template, text, count)[0]
-
-def _subn(pattern, template, text, count=0):
- # internal: pattern.subn implementation hook
- # FIXME: not used in SRE 2.2.1 and later; will be removed soon
- filter = _subx(pattern, template)
- if not callable(filter):
+ template = _compile_repl(template, pattern)
+ if not template[0] and len(template[1]) == 1:
# literal replacement
- def filter(match, literal=filter):
- return literal
- n = i = 0
- s = []
- append = s.append
- c = pattern.scanner(text)
- while not count or n < count:
- m = c.search()
- if not m:
- break
- b, e = m.span()
- if i < b:
- append(text[i:b])
- elif i == b == e and n:
- append(text[i:b])
- continue # ignore empty match at previous position
- append(filter(m))
- i = e
- n = n + 1
- append(text[i:])
- return _join(s, text[:0]), n
-
-def _split(pattern, text, maxsplit=0):
- # internal: pattern.split implementation hook
- # FIXME: not used in SRE 2.2.1 and later; will be removed soon
- n = i = 0
- s = []
- append = s.append
- extend = s.extend
- c = pattern.scanner(text)
- g = pattern.groups
- while not maxsplit or n < maxsplit:
- m = c.search()
- if not m:
- break
- b, e = m.span()
- if b == e:
- if i >= len(text):
- break
- continue
- append(text[i:b])
- if g and b != e:
- extend(list(m.groups()))
- i = e
- n = n + 1
- append(text[i:])
- return s
+ return template[1][0]
+ def filter(match, template=template):
+ return sre_parse.expand_template(template, match)
+ return filter
# register myself for pickling