diff options
author | Barry Warsaw <barry@python.org> | 1999-10-12 19:54:53 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 1999-10-12 19:54:53 (GMT) |
commit | 226ae6ca122f814dabdc40178c7b9656caf729c2 (patch) | |
tree | abaa15aae569a2334c7516b50ea486ec40bfce66 /Lib/string.py | |
parent | 75260275fe3bcc5d177a1b3ff30fd60681809585 (diff) | |
download | cpython-226ae6ca122f814dabdc40178c7b9656caf729c2.zip cpython-226ae6ca122f814dabdc40178c7b9656caf729c2.tar.gz cpython-226ae6ca122f814dabdc40178c7b9656caf729c2.tar.bz2 |
Mainlining the string_methods branch. See branch revision log
messages for specific changes.
Diffstat (limited to 'Lib/string.py')
-rw-r--r-- | Lib/string.py | 705 |
1 files changed, 275 insertions, 430 deletions
diff --git a/Lib/string.py b/Lib/string.py index e449c20..2c3083e 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -1,8 +1,9 @@ # module 'string' -- A collection of string operations -# Warning: most of the code you see here isn't normally used nowadays. -# At the end of this file most functions are replaced by built-in -# functions imported from built-in module "strop". +# Warning: most of the code you see here isn't normally used nowadays. With +# Python 1.6, many of these functions are implemented as methods on the +# standard string object. They used to be implemented by a built-in module +# called strop, but strop is now obsolete itself. """Common string manipulations. @@ -30,9 +31,6 @@ octdigits = '01234567' # Case conversion helpers _idmap = '' for i in range(256): _idmap = _idmap + chr(i) -_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:] -_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:] -_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:] del i # Backward compatible names for exceptions @@ -43,544 +41,391 @@ atol_error = ValueError # convert UPPER CASE letters to lower case def lower(s): - """lower(s) -> string + """lower(s) -> string - Return a copy of the string s converted to lowercase. + Return a copy of the string s converted to lowercase. - """ - res = '' - for c in s: - res = res + _lower[ord(c)] - return res + """ + return s.lower() # Convert lower case letters to UPPER CASE def upper(s): - """upper(s) -> string + """upper(s) -> string - Return a copy of the string s converted to uppercase. + Return a copy of the string s converted to uppercase. - """ - res = '' - for c in s: - res = res + _upper[ord(c)] - return res + """ + return s.upper() # Swap lower case letters and UPPER CASE def swapcase(s): - """swapcase(s) -> string + """swapcase(s) -> string - Return a copy of the string s with upper case characters - converted to lowercase and vice versa. + Return a copy of the string s with upper case characters + converted to lowercase and vice versa. - """ - res = '' - for c in s: - res = res + _swapcase[ord(c)] - return res + """ + return s.swapcase() # Strip leading and trailing tabs and spaces def strip(s): - """strip(s) -> string + """strip(s) -> string - Return a copy of the string s with leading and trailing - whitespace removed. + Return a copy of the string s with leading and trailing + whitespace removed. - """ - i, j = 0, len(s) - while i < j and s[i] in whitespace: i = i+1 - while i < j and s[j-1] in whitespace: j = j-1 - return s[i:j] + """ + return s.strip() # Strip leading tabs and spaces def lstrip(s): - """lstrip(s) -> string + """lstrip(s) -> string - Return a copy of the string s with leading whitespace removed. + Return a copy of the string s with leading whitespace removed. - """ - i, j = 0, len(s) - while i < j and s[i] in whitespace: i = i+1 - return s[i:j] + """ + return s.lstrip() # Strip trailing tabs and spaces def rstrip(s): - """rstrip(s) -> string + """rstrip(s) -> string - Return a copy of the string s with trailing whitespace - removed. + Return a copy of the string s with trailing whitespace + removed. - """ - i, j = 0, len(s) - while i < j and s[j-1] in whitespace: j = j-1 - return s[i:j] + """ + return s.rstrip() # Split a string into a list of space/tab-separated words # NB: split(s) is NOT the same as splitfields(s, ' ')! def split(s, sep=None, maxsplit=0): - """split(str [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is nonzero, splits into at most - maxsplit words If sep is not specified, any whitespace string - is a separator. Maxsplit defaults to 0. - - (split and splitfields are synonymous) - - """ - if sep is not None: return splitfields(s, sep, maxsplit) - res = [] - i, n = 0, len(s) - if maxsplit <= 0: maxsplit = n - count = 0 - while i < n: - while i < n and s[i] in whitespace: i = i+1 - if i == n: break - if count >= maxsplit: - res.append(s[i:]) - break - j = i - while j < n and s[j] not in whitespace: j = j+1 - count = count + 1 - res.append(s[i:j]) - i = j - return res - -# Split a list into fields separated by a given string -# NB: splitfields(s, ' ') is NOT the same as split(s)! -# splitfields(s, '') returns [s] (in analogy with split() in nawk) -def splitfields(s, sep=None, maxsplit=0): - """splitfields(str [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is nonzero, splits into at most - maxsplit words If sep is not specified, any whitespace string - is a separator. Maxsplit defaults to 0. - - (split and splitfields are synonymous) - - """ - if sep is None: return split(s, None, maxsplit) - res = [] - nsep = len(sep) - if nsep == 0: - return [s] - ns = len(s) - if maxsplit <= 0: maxsplit = ns - i = j = 0 - count = 0 - while j+nsep <= ns: - if s[j:j+nsep] == sep: - count = count + 1 - res.append(s[i:j]) - i = j = j + nsep - if count >= maxsplit: break - else: - j = j + 1 - res.append(s[i:]) - return res - -# Join words with spaces between them -def join(words, sep = ' '): - """join(list [,sep]) -> string + """split(str [,sep [,maxsplit]]) -> list of strings - Return a string composed of the words in list, with - intervening occurences of sep. Sep defaults to a single - space. + Return a list of the words in the string s, using sep as the + delimiter string. If maxsplit is nonzero, splits into at most + maxsplit words If sep is not specified, any whitespace string + is a separator. Maxsplit defaults to 0. - (joinfields and join are synonymous) + (split and splitfields are synonymous) - """ - return joinfields(words, sep) + """ + return s.split(sep, maxsplit) +splitfields = split # Join fields with optional separator -def joinfields(words, sep = ' '): - """joinfields(list [,sep]) -> string +def join(words, sep = ' '): + """join(list [,sep]) -> string - Return a string composed of the words in list, with - intervening occurences of sep. The default separator is a - single space. + Return a string composed of the words in list, with + intervening occurences of sep. The default separator is a + single space. - (joinfields and join are synonymous) + (joinfields and join are synonymous) - """ - res = '' - for w in words: - res = res + (sep + w) - return res[len(sep):] + """ + return sep.join(words) +joinfields = join -# Find substring, raise exception if not found -def index(s, sub, i = 0, last=None): - """index(s, sub [,start [,end]]) -> int +# for a little bit of speed +_apply = apply - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. +# Find substring, raise exception if not found +def index(s, *args): + """index(s, sub [,start [,end]]) -> int - Raise ValueError if not found. + Like find but raises ValueError when the substring is not found. - """ - if last is None: last = len(s) - res = find(s, sub, i, last) - if res < 0: - raise ValueError, 'substring not found in string.index' - return res + """ + return _apply(s.index, args) # Find last substring, raise exception if not found -def rindex(s, sub, i = 0, last=None): - """rindex(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. +def rindex(s, *args): + """rindex(s, sub [,start [,end]]) -> int - Raise ValueError if not found. + Like rfind but raises ValueError when the substring is not found. - """ - if last is None: last = len(s) - res = rfind(s, sub, i, last) - if res < 0: - raise ValueError, 'substring not found in string.index' - return res + """ + return _apply(s.rindex, args) # Count non-overlapping occurrences of substring -def count(s, sub, i = 0, last=None): - """count(s, sub[, start[,end]]) -> int - - Return the number of occurrences of substring sub in string - s[start:end]. Optional arguments start and end are - interpreted as in slice notation. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - if n == 0: return m-i - r = 0 - while i < m: - if sub == s[i:i+n]: - r = r+1 - i = i+n - else: - i = i+1 - return r +def count(s, *args): + """count(s, sub[, start[,end]]) -> int + + Return the number of occurrences of substring sub in string + s[start:end]. Optional arguments start and end are + interpreted as in slice notation. + + """ + return _apply(s.count, args) # Find substring, return -1 if not found -def find(s, sub, i = 0, last=None): - """find(s, sub [,start [,end]]) -> in - - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - while i < m: - if sub == s[i:i+n]: return i - i = i+1 - return -1 +def find(s, *args): + """find(s, sub [,start [,end]]) -> in + + Return the lowest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.find, args) # Find last substring, return -1 if not found -def rfind(s, sub, i = 0, last=None): - """rfind(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - r = -1 - while i < m: - if sub == s[i:i+n]: r = i - i = i+1 - return r - -# "Safe" environment for eval() -_safe_env = {"__builtins__": {}} +def rfind(s, *args): + """rfind(s, sub [,start [,end]]) -> int + + Return the highest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.rfind, args) + +# for a bit of speed +_float = float +_int = int +_long = long +_StringType = type('') # Convert string to float -_re = None -def atof(str): - """atof(s) -> float - - Return the floating point number represented by the string s. - - """ - global _re - if _re is None: - # Don't fail if re doesn't exist -- just skip the syntax check - try: - import re - except ImportError: - _re = 0 - else: - _re = re - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-float argument to string.atof' - while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:] - if _re and not _re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s): - raise ValueError, 'non-float argument to string.atof' - try: - return float(eval(sign + s, _safe_env)) - except SyntaxError: - raise ValueError, 'non-float argument to string.atof' +def atof(s): + """atof(s) -> float + + Return the floating point number represented by the string s. + + """ + if type(s) == _StringType: + return _float(s) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) # Convert string to integer -def atoi(str, base=10): - """atoi(s [,base]) -> int - - Return the integer represented by the string s in the given - base, which defaults to 10. The string s must consist of one - or more digits, possibly preceded by a sign. If base is 0, it - is chosen from the leading characters of s, 0 for octal, 0x or - 0X for hexadecimal. If base is 16, a preceding 0x or 0X is - accepted. - - """ - if base != 10: - # We only get here if strop doesn't define atoi() - raise ValueError, "this string.atoi doesn't support base != 10" - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-integer argument to string.atoi' - while s[0] == '0' and len(s) > 1: s = s[1:] - for c in s: - if c not in digits: - raise ValueError, 'non-integer argument to string.atoi' - return eval(sign + s, _safe_env) +def atoi(*args): + """atoi(s [,base]) -> int + + Return the integer represented by the string s in the given + base, which defaults to 10. The string s must consist of one + or more digits, possibly preceded by a sign. If base is 0, it + is chosen from the leading characters of s, 0 for octal, 0x or + 0X for hexadecimal. If base is 16, a preceding 0x or 0X is + accepted. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to int(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_int, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + # Convert string to long integer -def atol(str, base=10): - """atol(s [,base]) -> long - - Return the long integer represented by the string s in the - given base, which defaults to 10. The string s must consist - of one or more digits, possibly preceded by a sign. If base - is 0, it is chosen from the leading characters of s, 0 for - octal, 0x or 0X for hexadecimal. If base is 16, a preceding - 0x or 0X is accepted. A trailing L or l is not accepted, - unless base is 0. - - """ - if base != 10: - # We only get here if strop doesn't define atol() - raise ValueError, "this string.atol doesn't support base != 10" - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-integer argument to string.atol' - while s[0] == '0' and len(s) > 1: s = s[1:] - for c in s: - if c not in digits: - raise ValueError, 'non-integer argument to string.atol' - return eval(sign + s + 'L', _safe_env) +def atol(*args): + """atol(s [,base]) -> long + + Return the long integer represented by the string s in the + given base, which defaults to 10. The string s must consist + of one or more digits, possibly preceded by a sign. If base + is 0, it is chosen from the leading characters of s, 0 for + octal, 0x or 0X for hexadecimal. If base is 16, a preceding + 0x or 0X is accepted. A trailing L or l is not accepted, + unless base is 0. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to long(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_long, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + # Left-justify a string def ljust(s, width): - """ljust(s, width) -> string + """ljust(s, width) -> string - Return a left-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. + Return a left-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. - """ - n = width - len(s) - if n <= 0: return s - return s + ' '*n + """ + n = width - len(s) + if n <= 0: return s + return s + ' '*n # Right-justify a string def rjust(s, width): - """rjust(s, width) -> string + """rjust(s, width) -> string - Return a right-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. + Return a right-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. - """ - n = width - len(s) - if n <= 0: return s - return ' '*n + s + """ + n = width - len(s) + if n <= 0: return s + return ' '*n + s # Center a string def center(s, width): - """center(s, width) -> string + """center(s, width) -> string - Return a center version of s, in a field of the specified - width. padded with spaces as needed. The string is never - truncated. + Return a center version of s, in a field of the specified + width. padded with spaces as needed. The string is never + truncated. - """ - n = width - len(s) - if n <= 0: return s - half = n/2 - if n%2 and width%2: - # This ensures that center(center(s, i), j) = center(s, j) - half = half+1 - return ' '*half + s + ' '*(n-half) + """ + n = width - len(s) + if n <= 0: return s + half = n/2 + if n%2 and width%2: + # This ensures that center(center(s, i), j) = center(s, j) + half = half+1 + return ' '*half + s + ' '*(n-half) # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' # Decadent feature: the argument may be a string or a number # (Use of this is deprecated; it should be a string as with ljust c.s.) def zfill(x, width): - """zfill(x, width) -> string + """zfill(x, width) -> string - Pad a numeric string x with zeros on the left, to fill a field - of the specified width. The string x is never truncated. + Pad a numeric string x with zeros on the left, to fill a field + of the specified width. The string x is never truncated. - """ - if type(x) == type(''): s = x - else: s = `x` - n = len(s) - if n >= width: return s - sign = '' - if s[:1] in ('-', '+'): - sign, s = s[0], s[1:] - return sign + '0'*(width-n) + s + """ + if type(x) == type(''): s = x + else: s = `x` + n = len(s) + if n >= width: return s + sign = '' + if s[0] in ('-', '+'): + sign, s = s[0], s[1:] + return sign + '0'*(width-n) + s # Expand tabs in a string. # Doesn't take non-printing chars into account, but does understand \n. def expandtabs(s, tabsize=8): - """expandtabs(s [,tabsize]) -> string - - Return a copy of the string s with all tab characters replaced - by the appropriate number of spaces, depending on the current - column, and the tabsize (default 8). - - """ - res = line = '' - for c in s: - if c == '\t': - c = ' '*(tabsize - len(line)%tabsize) - line = line + c - if c == '\n': - res = res + line - line = '' - return res + line + """expandtabs(s [,tabsize]) -> string + + Return a copy of the string s with all tab characters replaced + by the appropriate number of spaces, depending on the current + column, and the tabsize (default 8). + + """ + res = line = '' + for c in s: + if c == '\t': + c = ' '*(tabsize - len(line) % tabsize) + line = line + c + if c == '\n': + res = res + line + line = '' + return res + line # Character translation through look-up table. def translate(s, table, deletions=""): - """translate(s,table [,deletechars]) -> string - - Return a copy of the string s, where all characters occurring - in the optional argument deletechars are removed, and the - remaining characters have been mapped through the given - translation table, which must be a string of length 256. - - """ - if type(table) != type('') or len(table) != 256: - raise TypeError, \ - "translation table must be 256 characters long" - res = "" - for c in s: - if c not in deletions: - res = res + table[ord(c)] - return res + """translate(s,table [,deletechars]) -> string + + Return a copy of the string s, where all characters occurring + in the optional argument deletechars are removed, and the + remaining characters have been mapped through the given + translation table, which must be a string of length 256. + + """ + return s.translate(table, deletions) # Capitalize a string, e.g. "aBc dEf" -> "Abc def". def capitalize(s): - """capitalize(s) -> string + """capitalize(s) -> string - Return a copy of the string s with only its first character - capitalized. + Return a copy of the string s with only its first character + capitalized. - """ - return upper(s[:1]) + lower(s[1:]) + """ + return s.capitalize() # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". # See also regsub.capwords(). def capwords(s, sep=None): - """capwords(s, [sep]) -> string + """capwords(s, [sep]) -> string - Split the argument into words using split, capitalize each - word using capitalize, and join the capitalized words using - join. Note that this replaces runs of whitespace characters by - a single space. + Split the argument into words using split, capitalize each + word using capitalize, and join the capitalized words using + join. Note that this replaces runs of whitespace characters by + a single space. - """ - return join(map(capitalize, split(s, sep)), sep or ' ') + """ + return join(map(capitalize, s.split(sep)), sep or ' ') # Construct a translation string _idmapL = None def maketrans(fromstr, tostr): - """maketrans(frm, to) -> string - - Return a translation table (a string of 256 bytes long) - suitable for use in string.translate. The strings frm and to - must be of the same length. - - """ - if len(fromstr) != len(tostr): - raise ValueError, "maketrans arguments must have same length" - global _idmapL - if not _idmapL: - _idmapL = map(None, _idmap) - L = _idmapL[:] - fromstr = map(ord, fromstr) - for i in range(len(fromstr)): - L[fromstr[i]] = tostr[i] - return joinfields(L, "") + """maketrans(frm, to) -> string + + Return a translation table (a string of 256 bytes long) + suitable for use in string.translate. The strings frm and to + must be of the same length. + + """ + if len(fromstr) != len(tostr): + raise ValueError, "maketrans arguments must have same length" + global _idmapL + if not _idmapL: + _idmapL = map(None, _idmap) + L = _idmapL[:] + fromstr = map(ord, fromstr) + for i in range(len(fromstr)): + L[fromstr[i]] = tostr[i] + return joinfields(L, "") # Substring replacement (global) -def replace(str, old, new, maxsplit=0): - """replace (str, old, new[, maxsplit]) -> string +def replace(s, old, new, maxsplit=0): + """replace (str, old, new[, maxsplit]) -> string - Return a copy of string str with all occurrences of substring - old replaced by new. If the optional argument maxsplit is - given, only the first maxsplit occurrences are replaced. + Return a copy of string str with all occurrences of substring + old replaced by new. If the optional argument maxsplit is + given, only the first maxsplit occurrences are replaced. - """ - return joinfields(splitfields(str, old, maxsplit), new) + """ + return s.replace(old, new, maxsplit) +# XXX: transitional +# +# If string objects do not have methods, then we need to use the old string.py +# library, which uses strop for many more things than just the few outlined +# below. +try: + ''.upper +except AttributeError: + from stringold import * + # Try importing optional built-in module "strop" -- if it exists, # it redefines some string operations that are 100-1000 times faster. # It also defines values for whitespace, lowercase and uppercase # that match <ctype.h>'s definitions. try: - from strop import * - letters = lowercase + uppercase + from strop import maketrans, lowercase, uppercase, whitespace + letters = lowercase + uppercase except ImportError: - pass # Use the original, slow versions + pass # Use the original versions |