diff options
Diffstat (limited to 'Lib/regsub.py')
-rw-r--r-- | Lib/regsub.py | 200 |
1 files changed, 100 insertions, 100 deletions
diff --git a/Lib/regsub.py b/Lib/regsub.py index a0ca8fe..4a36512 100644 --- a/Lib/regsub.py +++ b/Lib/regsub.py @@ -12,7 +12,7 @@ splitx(str, pat, maxsplit): split string using pattern as delimiter plus import warnings warnings.warn("the regsub module is deprecated; please use re.sub()", - DeprecationWarning) + DeprecationWarning) # Ignore further deprecation warnings about this module warnings.filterwarnings("ignore", "", DeprecationWarning, __name__) @@ -27,12 +27,12 @@ import regex # compiled pattern. def sub(pat, repl, str): - prog = compile(pat) - if prog.search(str) >= 0: - regs = prog.regs - a, b = regs[0] - str = str[:a] + expand(repl, regs, str) + str[b:] - return str + prog = compile(pat) + if prog.search(str) >= 0: + regs = prog.regs + a, b = regs[0] + str = str[:a] + expand(repl, regs, str) + str[b:] + return str # Replace all (non-overlapping) occurrences of pattern pat in string @@ -41,23 +41,23 @@ def sub(pat, repl, str): # a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'. def gsub(pat, repl, str): - prog = compile(pat) - new = '' - start = 0 - first = 1 - while prog.search(str, start) >= 0: - regs = prog.regs - a, b = regs[0] - if a == b == start and not first: - if start >= len(str) or prog.search(str, start+1) < 0: - break - regs = prog.regs - a, b = regs[0] - new = new + str[start:a] + expand(repl, regs, str) - start = b - first = 0 - new = new + str[start:] - return new + prog = compile(pat) + new = '' + start = 0 + first = 1 + while prog.search(str, start) >= 0: + regs = prog.regs + a, b = regs[0] + if a == b == start and not first: + if start >= len(str) or prog.search(str, start+1) < 0: + break + regs = prog.regs + a, b = regs[0] + new = new + str[start:a] + expand(repl, regs, str) + start = b + first = 0 + new = new + str[start:] + return new # Split string str in fields separated by delimiters matching pattern @@ -66,7 +66,7 @@ def gsub(pat, repl, str): # The optional 3rd argument sets the number of splits that are performed. def split(str, pat, maxsplit = 0): - return intsplit(str, pat, maxsplit, 0) + return intsplit(str, pat, maxsplit, 0) # Split string str in fields separated by delimiters matching pattern # pat. Only non-empty matches for the pattern are considered, so e.g. @@ -76,42 +76,42 @@ def split(str, pat, maxsplit = 0): def splitx(str, pat, maxsplit = 0): - return intsplit(str, pat, maxsplit, 1) - + return intsplit(str, pat, maxsplit, 1) + # Internal function used to implement split() and splitx(). def intsplit(str, pat, maxsplit, retain): - prog = compile(pat) - res = [] - start = next = 0 - splitcount = 0 - while prog.search(str, next) >= 0: - regs = prog.regs - a, b = regs[0] - if a == b: - next = next + 1 - if next >= len(str): - break - else: - res.append(str[start:a]) - if retain: - res.append(str[a:b]) - start = next = b - splitcount = splitcount + 1 - if (maxsplit and (splitcount >= maxsplit)): - break - res.append(str[start:]) - return res + prog = compile(pat) + res = [] + start = next = 0 + splitcount = 0 + while prog.search(str, next) >= 0: + regs = prog.regs + a, b = regs[0] + if a == b: + next = next + 1 + if next >= len(str): + break + else: + res.append(str[start:a]) + if retain: + res.append(str[a:b]) + start = next = b + splitcount = splitcount + 1 + if (maxsplit and (splitcount >= maxsplit)): + break + res.append(str[start:]) + return res # Capitalize words split using a pattern def capwords(str, pat='[^a-zA-Z0-9_]+'): - import string - words = splitx(str, pat) - for i in range(0, len(words), 2): - words[i] = string.capitalize(words[i]) - return string.joinfields(words, "") + import string + words = splitx(str, pat) + for i in range(0, len(words), 2): + words[i] = string.capitalize(words[i]) + return string.joinfields(words, "") # Internal subroutines: @@ -131,19 +131,19 @@ def capwords(str, pat='[^a-zA-Z0-9_]+'): cache = {} def compile(pat): - if type(pat) != type(''): - return pat # Assume it is a compiled regex - key = (pat, regex.get_syntax()) - if cache.has_key(key): - prog = cache[key] # Get it from the cache - else: - prog = cache[key] = regex.compile(pat) - return prog + if type(pat) != type(''): + return pat # Assume it is a compiled regex + key = (pat, regex.get_syntax()) + if cache.has_key(key): + prog = cache[key] # Get it from the cache + else: + prog = cache[key] = regex.compile(pat) + return prog def clear_cache(): - global cache - cache = {} + global cache + cache = {} # Expand \digit in the replacement. @@ -153,46 +153,46 @@ def clear_cache(): # the \ and the following character are both copied). def expand(repl, regs, str): - if '\\' not in repl: - return repl - new = '' - i = 0 - ord0 = ord('0') - while i < len(repl): - c = repl[i]; i = i+1 - if c != '\\' or i >= len(repl): - new = new + c - else: - c = repl[i]; i = i+1 - if '0' <= c <= '9': - a, b = regs[ord(c)-ord0] - new = new + str[a:b] - elif c == '\\': - new = new + c - else: - new = new + '\\' + c - return new + if '\\' not in repl: + return repl + new = '' + i = 0 + ord0 = ord('0') + while i < len(repl): + c = repl[i]; i = i+1 + if c != '\\' or i >= len(repl): + new = new + c + else: + c = repl[i]; i = i+1 + if '0' <= c <= '9': + a, b = regs[ord(c)-ord0] + new = new + str[a:b] + elif c == '\\': + new = new + c + else: + new = new + '\\' + c + return new # Test program, reads sequences "pat repl str" from stdin. # Optional argument specifies pattern used to split lines. def test(): - import sys - if sys.argv[1:]: - delpat = sys.argv[1] - else: - delpat = '[ \t\n]+' - while 1: - if sys.stdin.isatty(): sys.stderr.write('--> ') - line = sys.stdin.readline() - if not line: break - if line[-1] == '\n': line = line[:-1] - fields = split(line, delpat) - if len(fields) != 3: - print 'Sorry, not three fields' - print 'split:', `fields` - continue - [pat, repl, str] = split(line, delpat) - print 'sub :', `sub(pat, repl, str)` - print 'gsub:', `gsub(pat, repl, str)` + import sys + if sys.argv[1:]: + delpat = sys.argv[1] + else: + delpat = '[ \t\n]+' + while 1: + if sys.stdin.isatty(): sys.stderr.write('--> ') + line = sys.stdin.readline() + if not line: break + if line[-1] == '\n': line = line[:-1] + fields = split(line, delpat) + if len(fields) != 3: + print 'Sorry, not three fields' + print 'split:', `fields` + continue + [pat, repl, str] = split(line, delpat) + print 'sub :', `sub(pat, repl, str)` + print 'gsub:', `gsub(pat, repl, str)` |