summaryrefslogtreecommitdiffstats
path: root/Lib/regsub.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/regsub.py')
-rw-r--r--Lib/regsub.py200
1 files changed, 100 insertions, 100 deletions
diff --git a/Lib/regsub.py b/Lib/regsub.py
index a0ca8fe..4a36512 100644
--- a/Lib/regsub.py
+++ b/Lib/regsub.py
@@ -12,7 +12,7 @@ splitx(str, pat, maxsplit): split string using pattern as delimiter plus
import warnings
warnings.warn("the regsub module is deprecated; please use re.sub()",
- DeprecationWarning)
+ DeprecationWarning)
# Ignore further deprecation warnings about this module
warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)
@@ -27,12 +27,12 @@ import regex
# compiled pattern.
def sub(pat, repl, str):
- prog = compile(pat)
- if prog.search(str) >= 0:
- regs = prog.regs
- a, b = regs[0]
- str = str[:a] + expand(repl, regs, str) + str[b:]
- return str
+ prog = compile(pat)
+ if prog.search(str) >= 0:
+ regs = prog.regs
+ a, b = regs[0]
+ str = str[:a] + expand(repl, regs, str) + str[b:]
+ return str
# Replace all (non-overlapping) occurrences of pattern pat in string
@@ -41,23 +41,23 @@ def sub(pat, repl, str):
# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
def gsub(pat, repl, str):
- prog = compile(pat)
- new = ''
- start = 0
- first = 1
- while prog.search(str, start) >= 0:
- regs = prog.regs
- a, b = regs[0]
- if a == b == start and not first:
- if start >= len(str) or prog.search(str, start+1) < 0:
- break
- regs = prog.regs
- a, b = regs[0]
- new = new + str[start:a] + expand(repl, regs, str)
- start = b
- first = 0
- new = new + str[start:]
- return new
+ prog = compile(pat)
+ new = ''
+ start = 0
+ first = 1
+ while prog.search(str, start) >= 0:
+ regs = prog.regs
+ a, b = regs[0]
+ if a == b == start and not first:
+ if start >= len(str) or prog.search(str, start+1) < 0:
+ break
+ regs = prog.regs
+ a, b = regs[0]
+ new = new + str[start:a] + expand(repl, regs, str)
+ start = b
+ first = 0
+ new = new + str[start:]
+ return new
# Split string str in fields separated by delimiters matching pattern
@@ -66,7 +66,7 @@ def gsub(pat, repl, str):
# The optional 3rd argument sets the number of splits that are performed.
def split(str, pat, maxsplit = 0):
- return intsplit(str, pat, maxsplit, 0)
+ return intsplit(str, pat, maxsplit, 0)
# Split string str in fields separated by delimiters matching pattern
# pat. Only non-empty matches for the pattern are considered, so e.g.
@@ -76,42 +76,42 @@ def split(str, pat, maxsplit = 0):
def splitx(str, pat, maxsplit = 0):
- return intsplit(str, pat, maxsplit, 1)
-
+ return intsplit(str, pat, maxsplit, 1)
+
# Internal function used to implement split() and splitx().
def intsplit(str, pat, maxsplit, retain):
- prog = compile(pat)
- res = []
- start = next = 0
- splitcount = 0
- while prog.search(str, next) >= 0:
- regs = prog.regs
- a, b = regs[0]
- if a == b:
- next = next + 1
- if next >= len(str):
- break
- else:
- res.append(str[start:a])
- if retain:
- res.append(str[a:b])
- start = next = b
- splitcount = splitcount + 1
- if (maxsplit and (splitcount >= maxsplit)):
- break
- res.append(str[start:])
- return res
+ prog = compile(pat)
+ res = []
+ start = next = 0
+ splitcount = 0
+ while prog.search(str, next) >= 0:
+ regs = prog.regs
+ a, b = regs[0]
+ if a == b:
+ next = next + 1
+ if next >= len(str):
+ break
+ else:
+ res.append(str[start:a])
+ if retain:
+ res.append(str[a:b])
+ start = next = b
+ splitcount = splitcount + 1
+ if (maxsplit and (splitcount >= maxsplit)):
+ break
+ res.append(str[start:])
+ return res
# Capitalize words split using a pattern
def capwords(str, pat='[^a-zA-Z0-9_]+'):
- import string
- words = splitx(str, pat)
- for i in range(0, len(words), 2):
- words[i] = string.capitalize(words[i])
- return string.joinfields(words, "")
+ import string
+ words = splitx(str, pat)
+ for i in range(0, len(words), 2):
+ words[i] = string.capitalize(words[i])
+ return string.joinfields(words, "")
# Internal subroutines:
@@ -131,19 +131,19 @@ def capwords(str, pat='[^a-zA-Z0-9_]+'):
cache = {}
def compile(pat):
- if type(pat) != type(''):
- return pat # Assume it is a compiled regex
- key = (pat, regex.get_syntax())
- if cache.has_key(key):
- prog = cache[key] # Get it from the cache
- else:
- prog = cache[key] = regex.compile(pat)
- return prog
+ if type(pat) != type(''):
+ return pat # Assume it is a compiled regex
+ key = (pat, regex.get_syntax())
+ if cache.has_key(key):
+ prog = cache[key] # Get it from the cache
+ else:
+ prog = cache[key] = regex.compile(pat)
+ return prog
def clear_cache():
- global cache
- cache = {}
+ global cache
+ cache = {}
# Expand \digit in the replacement.
@@ -153,46 +153,46 @@ def clear_cache():
# the \ and the following character are both copied).
def expand(repl, regs, str):
- if '\\' not in repl:
- return repl
- new = ''
- i = 0
- ord0 = ord('0')
- while i < len(repl):
- c = repl[i]; i = i+1
- if c != '\\' or i >= len(repl):
- new = new + c
- else:
- c = repl[i]; i = i+1
- if '0' <= c <= '9':
- a, b = regs[ord(c)-ord0]
- new = new + str[a:b]
- elif c == '\\':
- new = new + c
- else:
- new = new + '\\' + c
- return new
+ if '\\' not in repl:
+ return repl
+ new = ''
+ i = 0
+ ord0 = ord('0')
+ while i < len(repl):
+ c = repl[i]; i = i+1
+ if c != '\\' or i >= len(repl):
+ new = new + c
+ else:
+ c = repl[i]; i = i+1
+ if '0' <= c <= '9':
+ a, b = regs[ord(c)-ord0]
+ new = new + str[a:b]
+ elif c == '\\':
+ new = new + c
+ else:
+ new = new + '\\' + c
+ return new
# Test program, reads sequences "pat repl str" from stdin.
# Optional argument specifies pattern used to split lines.
def test():
- import sys
- if sys.argv[1:]:
- delpat = sys.argv[1]
- else:
- delpat = '[ \t\n]+'
- while 1:
- if sys.stdin.isatty(): sys.stderr.write('--> ')
- line = sys.stdin.readline()
- if not line: break
- if line[-1] == '\n': line = line[:-1]
- fields = split(line, delpat)
- if len(fields) != 3:
- print 'Sorry, not three fields'
- print 'split:', `fields`
- continue
- [pat, repl, str] = split(line, delpat)
- print 'sub :', `sub(pat, repl, str)`
- print 'gsub:', `gsub(pat, repl, str)`
+ import sys
+ if sys.argv[1:]:
+ delpat = sys.argv[1]
+ else:
+ delpat = '[ \t\n]+'
+ while 1:
+ if sys.stdin.isatty(): sys.stderr.write('--> ')
+ line = sys.stdin.readline()
+ if not line: break
+ if line[-1] == '\n': line = line[:-1]
+ fields = split(line, delpat)
+ if len(fields) != 3:
+ print 'Sorry, not three fields'
+ print 'split:', `fields`
+ continue
+ [pat, repl, str] = split(line, delpat)
+ print 'sub :', `sub(pat, repl, str)`
+ print 'gsub:', `gsub(pat, repl, str)`