diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2001-10-21 21:48:30 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2001-10-21 21:48:30 (GMT) |
commit | dac58492aa3bf476fa0b4b586edababfe3aaca53 (patch) | |
tree | 2f5e120eafe2802ebdcd02178af67439c982b84c /Lib/sre.py | |
parent | 0402dd18cb025b7510760142087c97729702e23a (diff) | |
download | cpython-dac58492aa3bf476fa0b4b586edababfe3aaca53.zip cpython-dac58492aa3bf476fa0b4b586edababfe3aaca53.tar.gz cpython-dac58492aa3bf476fa0b4b586edababfe3aaca53.tar.bz2 |
fixed character set description in docstring (SRE uses Python
strings, not C strings)
removed USE_PYTHON defines, and related sre.py helpers
skip calling the subx helper if the template is callable.
interestingly enough, this means that
def callback(m):
return literal
result = pattern.sub(callback, string)
is much faster than
result = pattern.sub(literal, string)
Diffstat (limited to 'Lib/sre.py')
-rw-r--r-- | Lib/sre.py | 95 |
1 files changed, 15 insertions, 80 deletions
@@ -17,15 +17,13 @@ r"""Support for regular expressions (RE). This module provides regular expression matching operations similar to -those found in Perl. It's 8-bit clean: the strings being processed may -contain both null bytes and characters whose high bit is set. Regular -expression pattern strings may not contain null bytes, but can specify -the null byte using the \\number notation. Characters with the high -bit set may be included. - -Regular expressions can contain both special and ordinary -characters. Most ordinary characters, like "A", "a", or "0", are the -simplest regular expressions; they simply match themselves. You can +those found in Perl. It supports both 8-bit and Unicode strings; both +the pattern and the strings being processed can contain null bytes and +characters outside the US ASCII range. + +Regular expressions can contain both special and ordinary characters. +Most ordinary characters, like "A", "a", or "0", are the simplest +regular expressions; they simply match themselves. You can concatenate ordinary characters, so last matches the string 'last'. The special characters are: @@ -45,7 +43,7 @@ The special characters are: "|" A|B, creates an RE that will match either A or B. (...) Matches the RE inside the parentheses. The contents can be retrieved or matched later in the string. - (?iLmsx) Set the I, L, M, S, or X flag for the RE (see below). + (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below). (?:...) Non-grouping version of regular parentheses. (?P<name>...) The substring matched by the group is accessible by name. (?P=name) Matches the text matched earlier by the group named name. @@ -54,7 +52,7 @@ The special characters are: (?!...) Matches if ... doesn't match next. The special sequences consist of "\\" and a character from the list -below. If the ordinary character is not on the list, then the +below. If the ordinary character is not on the list, then the resulting RE will match the second character. \number Matches the contents of the group of the same number. \A Matches only at the start of the string. @@ -246,76 +244,13 @@ def _expand(pattern, match, template): def _subx(pattern, template): # internal: pattern.sub/subn implementation helper - if callable(template): - filter = template - else: - template = _compile_repl(template, pattern) - if not template[0] and len(template[1]) == 1: - # literal replacement - filter = template[1][0] - else: - def filter(match, template=template): - return sre_parse.expand_template(template, match) - return filter - -def _sub(pattern, template, text, count=0): - # internal: pattern.sub implementation hook - # FIXME: not used in SRE 2.2.1 and later; will be removed soon - return _subn(pattern, template, text, count)[0] - -def _subn(pattern, template, text, count=0): - # internal: pattern.subn implementation hook - # FIXME: not used in SRE 2.2.1 and later; will be removed soon - filter = _subx(pattern, template) - if not callable(filter): + template = _compile_repl(template, pattern) + if not template[0] and len(template[1]) == 1: # literal replacement - def filter(match, literal=filter): - return literal - n = i = 0 - s = [] - append = s.append - c = pattern.scanner(text) - while not count or n < count: - m = c.search() - if not m: - break - b, e = m.span() - if i < b: - append(text[i:b]) - elif i == b == e and n: - append(text[i:b]) - continue # ignore empty match at previous position - append(filter(m)) - i = e - n = n + 1 - append(text[i:]) - return _join(s, text[:0]), n - -def _split(pattern, text, maxsplit=0): - # internal: pattern.split implementation hook - # FIXME: not used in SRE 2.2.1 and later; will be removed soon - n = i = 0 - s = [] - append = s.append - extend = s.extend - c = pattern.scanner(text) - g = pattern.groups - while not maxsplit or n < maxsplit: - m = c.search() - if not m: - break - b, e = m.span() - if b == e: - if i >= len(text): - break - continue - append(text[i:b]) - if g and b != e: - extend(list(m.groups())) - i = e - n = n + 1 - append(text[i:]) - return s + return template[1][0] + def filter(match, template=template): + return sre_parse.expand_template(template, match) + return filter # register myself for pickling |