diff options
Diffstat (limited to 'Lib/regsub.py')
-rw-r--r-- | Lib/regsub.py | 198 |
1 files changed, 0 insertions, 198 deletions
diff --git a/Lib/regsub.py b/Lib/regsub.py deleted file mode 100644 index 0fc10a5..0000000 --- a/Lib/regsub.py +++ /dev/null @@ -1,198 +0,0 @@ -"""Regexp-based split and replace using the obsolete regex module. - -This module is only for backward compatibility. These operations -are now provided by the new regular expression module, "re". - -sub(pat, repl, str): replace first occurrence of pattern in string -gsub(pat, repl, str): replace all occurrences of pattern in string -split(str, pat, maxsplit): split string using pattern as delimiter -splitx(str, pat, maxsplit): split string using pattern as delimiter plus - return delimiters -""" - -import warnings -warnings.warn("the regsub module is deprecated; please use re.sub()", - DeprecationWarning) - -# Ignore further deprecation warnings about this module -warnings.filterwarnings("ignore", "", DeprecationWarning, __name__) - -import regex - -__all__ = ["sub","gsub","split","splitx","capwords"] - -# Replace first occurrence of pattern pat in string str by replacement -# repl. If the pattern isn't found, the string is returned unchanged. -# The replacement may contain references \digit to subpatterns and -# escaped backslashes. The pattern may be a string or an already -# compiled pattern. - -def sub(pat, repl, str): - prog = compile(pat) - if prog.search(str) >= 0: - regs = prog.regs - a, b = regs[0] - str = str[:a] + expand(repl, regs, str) + str[b:] - return str - - -# Replace all (non-overlapping) occurrences of pattern pat in string -# str by replacement repl. The same rules as for sub() apply. -# Empty matches for the pattern are replaced only when not adjacent to -# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'. - -def gsub(pat, repl, str): - prog = compile(pat) - new = '' - start = 0 - first = 1 - while prog.search(str, start) >= 0: - regs = prog.regs - a, b = regs[0] - if a == b == start and not first: - if start >= len(str) or prog.search(str, start+1) < 0: - break - regs = prog.regs - a, b = regs[0] - new = new + str[start:a] + expand(repl, regs, str) - start = b - first = 0 - new = new + str[start:] - return new - - -# Split string str in fields separated by delimiters matching pattern -# pat. Only non-empty matches for the pattern are considered, so e.g. -# split('abc', '') returns ['abc']. -# The optional 3rd argument sets the number of splits that are performed. - -def split(str, pat, maxsplit = 0): - return intsplit(str, pat, maxsplit, 0) - -# Split string str in fields separated by delimiters matching pattern -# pat. Only non-empty matches for the pattern are considered, so e.g. -# split('abc', '') returns ['abc']. The delimiters are also included -# in the list. -# The optional 3rd argument sets the number of splits that are performed. - - -def splitx(str, pat, maxsplit = 0): - return intsplit(str, pat, maxsplit, 1) - -# Internal function used to implement split() and splitx(). - -def intsplit(str, pat, maxsplit, retain): - prog = compile(pat) - res = [] - start = next = 0 - splitcount = 0 - while prog.search(str, next) >= 0: - regs = prog.regs - a, b = regs[0] - if a == b: - next = next + 1 - if next >= len(str): - break - else: - res.append(str[start:a]) - if retain: - res.append(str[a:b]) - start = next = b - splitcount = splitcount + 1 - if (maxsplit and (splitcount >= maxsplit)): - break - res.append(str[start:]) - return res - - -# Capitalize words split using a pattern - -def capwords(str, pat='[^a-zA-Z0-9_]+'): - words = splitx(str, pat) - for i in range(0, len(words), 2): - words[i] = words[i].capitalize() - return "".join(words) - - -# Internal subroutines: -# compile(pat): compile a pattern, caching already compiled patterns -# expand(repl, regs, str): expand \digit escapes in replacement string - - -# Manage a cache of compiled regular expressions. -# -# If the pattern is a string a compiled version of it is returned. If -# the pattern has been used before we return an already compiled -# version from the cache; otherwise we compile it now and save the -# compiled version in the cache, along with the syntax it was compiled -# with. Instead of a string, a compiled regular expression can also -# be passed. - -cache = {} - -def compile(pat): - if type(pat) != type(''): - return pat # Assume it is a compiled regex - key = (pat, regex.get_syntax()) - if key in cache: - prog = cache[key] # Get it from the cache - else: - prog = cache[key] = regex.compile(pat) - return prog - - -def clear_cache(): - global cache - cache = {} - - -# Expand \digit in the replacement. -# Each occurrence of \digit is replaced by the substring of str -# indicated by regs[digit]. To include a literal \ in the -# replacement, double it; other \ escapes are left unchanged (i.e. -# the \ and the following character are both copied). - -def expand(repl, regs, str): - if '\\' not in repl: - return repl - new = '' - i = 0 - ord0 = ord('0') - while i < len(repl): - c = repl[i]; i = i+1 - if c != '\\' or i >= len(repl): - new = new + c - else: - c = repl[i]; i = i+1 - if '0' <= c <= '9': - a, b = regs[ord(c)-ord0] - new = new + str[a:b] - elif c == '\\': - new = new + c - else: - new = new + '\\' + c - return new - - -# Test program, reads sequences "pat repl str" from stdin. -# Optional argument specifies pattern used to split lines. - -def test(): - import sys - if sys.argv[1:]: - delpat = sys.argv[1] - else: - delpat = '[ \t\n]+' - while 1: - if sys.stdin.isatty(): sys.stderr.write('--> ') - line = sys.stdin.readline() - if not line: break - if line[-1] == '\n': line = line[:-1] - fields = split(line, delpat) - if len(fields) != 3: - print 'Sorry, not three fields' - print 'split:', repr(fields) - continue - [pat, repl, str] = split(line, delpat) - print 'sub :', repr(sub(pat, repl, str)) - print 'gsub:', repr(gsub(pat, repl, str)) |