diff options
author | Barry Warsaw <barry@python.org> | 1999-10-12 19:54:53 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 1999-10-12 19:54:53 (GMT) |
commit | 226ae6ca122f814dabdc40178c7b9656caf729c2 (patch) | |
tree | abaa15aae569a2334c7516b50ea486ec40bfce66 | |
parent | 75260275fe3bcc5d177a1b3ff30fd60681809585 (diff) | |
download | cpython-226ae6ca122f814dabdc40178c7b9656caf729c2.zip cpython-226ae6ca122f814dabdc40178c7b9656caf729c2.tar.gz cpython-226ae6ca122f814dabdc40178c7b9656caf729c2.tar.bz2 |
Mainlining the string_methods branch. See branch revision log
messages for specific changes.
-rw-r--r-- | Include/floatobject.h | 1 | ||||
-rw-r--r-- | Include/intobject.h | 1 | ||||
-rw-r--r-- | Lib/string.py | 705 | ||||
-rw-r--r-- | Lib/stringold.py | 705 | ||||
-rw-r--r-- | Objects/abstract.c | 180 | ||||
-rw-r--r-- | Objects/floatobject.c | 51 | ||||
-rw-r--r-- | Objects/intobject.c | 43 | ||||
-rw-r--r-- | Objects/stringobject.c | 1045 | ||||
-rw-r--r-- | Python/bltinmodule.c | 109 |
9 files changed, 1818 insertions, 1022 deletions
diff --git a/Include/floatobject.h b/Include/floatobject.h index 4491f59..d8fd376 100644 --- a/Include/floatobject.h +++ b/Include/floatobject.h @@ -50,6 +50,7 @@ extern DL_IMPORT(PyTypeObject) PyFloat_Type; #define PyFloat_Check(op) ((op)->ob_type == &PyFloat_Type) +extern DL_IMPORT(PyObject *) PyFloat_FromString Py_PROTO((PyObject*, char**)); extern DL_IMPORT(PyObject *) PyFloat_FromDouble Py_PROTO((double)); extern DL_IMPORT(double) PyFloat_AsDouble Py_PROTO((PyObject *)); diff --git a/Include/intobject.h b/Include/intobject.h index e6eb49d..35be0ef 100644 --- a/Include/intobject.h +++ b/Include/intobject.h @@ -61,6 +61,7 @@ extern DL_IMPORT(PyTypeObject) PyInt_Type; #define PyInt_Check(op) ((op)->ob_type == &PyInt_Type) +extern DL_IMPORT(PyObject *) PyInt_FromString Py_PROTO((char*, char**, int)); extern DL_IMPORT(PyObject *) PyInt_FromLong Py_PROTO((long)); extern DL_IMPORT(long) PyInt_AsLong Py_PROTO((PyObject *)); extern DL_IMPORT(long) PyInt_GetMax Py_PROTO((void)); diff --git a/Lib/string.py b/Lib/string.py index e449c20..2c3083e 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -1,8 +1,9 @@ # module 'string' -- A collection of string operations -# Warning: most of the code you see here isn't normally used nowadays. -# At the end of this file most functions are replaced by built-in -# functions imported from built-in module "strop". +# Warning: most of the code you see here isn't normally used nowadays. With +# Python 1.6, many of these functions are implemented as methods on the +# standard string object. They used to be implemented by a built-in module +# called strop, but strop is now obsolete itself. """Common string manipulations. @@ -30,9 +31,6 @@ octdigits = '01234567' # Case conversion helpers _idmap = '' for i in range(256): _idmap = _idmap + chr(i) -_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:] -_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:] -_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:] del i # Backward compatible names for exceptions @@ -43,544 +41,391 @@ atol_error = ValueError # convert UPPER CASE letters to lower case def lower(s): - """lower(s) -> string + """lower(s) -> string - Return a copy of the string s converted to lowercase. + Return a copy of the string s converted to lowercase. - """ - res = '' - for c in s: - res = res + _lower[ord(c)] - return res + """ + return s.lower() # Convert lower case letters to UPPER CASE def upper(s): - """upper(s) -> string + """upper(s) -> string - Return a copy of the string s converted to uppercase. + Return a copy of the string s converted to uppercase. - """ - res = '' - for c in s: - res = res + _upper[ord(c)] - return res + """ + return s.upper() # Swap lower case letters and UPPER CASE def swapcase(s): - """swapcase(s) -> string + """swapcase(s) -> string - Return a copy of the string s with upper case characters - converted to lowercase and vice versa. + Return a copy of the string s with upper case characters + converted to lowercase and vice versa. - """ - res = '' - for c in s: - res = res + _swapcase[ord(c)] - return res + """ + return s.swapcase() # Strip leading and trailing tabs and spaces def strip(s): - """strip(s) -> string + """strip(s) -> string - Return a copy of the string s with leading and trailing - whitespace removed. + Return a copy of the string s with leading and trailing + whitespace removed. - """ - i, j = 0, len(s) - while i < j and s[i] in whitespace: i = i+1 - while i < j and s[j-1] in whitespace: j = j-1 - return s[i:j] + """ + return s.strip() # Strip leading tabs and spaces def lstrip(s): - """lstrip(s) -> string + """lstrip(s) -> string - Return a copy of the string s with leading whitespace removed. + Return a copy of the string s with leading whitespace removed. - """ - i, j = 0, len(s) - while i < j and s[i] in whitespace: i = i+1 - return s[i:j] + """ + return s.lstrip() # Strip trailing tabs and spaces def rstrip(s): - """rstrip(s) -> string + """rstrip(s) -> string - Return a copy of the string s with trailing whitespace - removed. + Return a copy of the string s with trailing whitespace + removed. - """ - i, j = 0, len(s) - while i < j and s[j-1] in whitespace: j = j-1 - return s[i:j] + """ + return s.rstrip() # Split a string into a list of space/tab-separated words # NB: split(s) is NOT the same as splitfields(s, ' ')! def split(s, sep=None, maxsplit=0): - """split(str [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is nonzero, splits into at most - maxsplit words If sep is not specified, any whitespace string - is a separator. Maxsplit defaults to 0. - - (split and splitfields are synonymous) - - """ - if sep is not None: return splitfields(s, sep, maxsplit) - res = [] - i, n = 0, len(s) - if maxsplit <= 0: maxsplit = n - count = 0 - while i < n: - while i < n and s[i] in whitespace: i = i+1 - if i == n: break - if count >= maxsplit: - res.append(s[i:]) - break - j = i - while j < n and s[j] not in whitespace: j = j+1 - count = count + 1 - res.append(s[i:j]) - i = j - return res - -# Split a list into fields separated by a given string -# NB: splitfields(s, ' ') is NOT the same as split(s)! -# splitfields(s, '') returns [s] (in analogy with split() in nawk) -def splitfields(s, sep=None, maxsplit=0): - """splitfields(str [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is nonzero, splits into at most - maxsplit words If sep is not specified, any whitespace string - is a separator. Maxsplit defaults to 0. - - (split and splitfields are synonymous) - - """ - if sep is None: return split(s, None, maxsplit) - res = [] - nsep = len(sep) - if nsep == 0: - return [s] - ns = len(s) - if maxsplit <= 0: maxsplit = ns - i = j = 0 - count = 0 - while j+nsep <= ns: - if s[j:j+nsep] == sep: - count = count + 1 - res.append(s[i:j]) - i = j = j + nsep - if count >= maxsplit: break - else: - j = j + 1 - res.append(s[i:]) - return res - -# Join words with spaces between them -def join(words, sep = ' '): - """join(list [,sep]) -> string + """split(str [,sep [,maxsplit]]) -> list of strings - Return a string composed of the words in list, with - intervening occurences of sep. Sep defaults to a single - space. + Return a list of the words in the string s, using sep as the + delimiter string. If maxsplit is nonzero, splits into at most + maxsplit words If sep is not specified, any whitespace string + is a separator. Maxsplit defaults to 0. - (joinfields and join are synonymous) + (split and splitfields are synonymous) - """ - return joinfields(words, sep) + """ + return s.split(sep, maxsplit) +splitfields = split # Join fields with optional separator -def joinfields(words, sep = ' '): - """joinfields(list [,sep]) -> string +def join(words, sep = ' '): + """join(list [,sep]) -> string - Return a string composed of the words in list, with - intervening occurences of sep. The default separator is a - single space. + Return a string composed of the words in list, with + intervening occurences of sep. The default separator is a + single space. - (joinfields and join are synonymous) + (joinfields and join are synonymous) - """ - res = '' - for w in words: - res = res + (sep + w) - return res[len(sep):] + """ + return sep.join(words) +joinfields = join -# Find substring, raise exception if not found -def index(s, sub, i = 0, last=None): - """index(s, sub [,start [,end]]) -> int +# for a little bit of speed +_apply = apply - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. +# Find substring, raise exception if not found +def index(s, *args): + """index(s, sub [,start [,end]]) -> int - Raise ValueError if not found. + Like find but raises ValueError when the substring is not found. - """ - if last is None: last = len(s) - res = find(s, sub, i, last) - if res < 0: - raise ValueError, 'substring not found in string.index' - return res + """ + return _apply(s.index, args) # Find last substring, raise exception if not found -def rindex(s, sub, i = 0, last=None): - """rindex(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. +def rindex(s, *args): + """rindex(s, sub [,start [,end]]) -> int - Raise ValueError if not found. + Like rfind but raises ValueError when the substring is not found. - """ - if last is None: last = len(s) - res = rfind(s, sub, i, last) - if res < 0: - raise ValueError, 'substring not found in string.index' - return res + """ + return _apply(s.rindex, args) # Count non-overlapping occurrences of substring -def count(s, sub, i = 0, last=None): - """count(s, sub[, start[,end]]) -> int - - Return the number of occurrences of substring sub in string - s[start:end]. Optional arguments start and end are - interpreted as in slice notation. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - if n == 0: return m-i - r = 0 - while i < m: - if sub == s[i:i+n]: - r = r+1 - i = i+n - else: - i = i+1 - return r +def count(s, *args): + """count(s, sub[, start[,end]]) -> int + + Return the number of occurrences of substring sub in string + s[start:end]. Optional arguments start and end are + interpreted as in slice notation. + + """ + return _apply(s.count, args) # Find substring, return -1 if not found -def find(s, sub, i = 0, last=None): - """find(s, sub [,start [,end]]) -> in - - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - while i < m: - if sub == s[i:i+n]: return i - i = i+1 - return -1 +def find(s, *args): + """find(s, sub [,start [,end]]) -> in + + Return the lowest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.find, args) # Find last substring, return -1 if not found -def rfind(s, sub, i = 0, last=None): - """rfind(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - r = -1 - while i < m: - if sub == s[i:i+n]: r = i - i = i+1 - return r - -# "Safe" environment for eval() -_safe_env = {"__builtins__": {}} +def rfind(s, *args): + """rfind(s, sub [,start [,end]]) -> int + + Return the highest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.rfind, args) + +# for a bit of speed +_float = float +_int = int +_long = long +_StringType = type('') # Convert string to float -_re = None -def atof(str): - """atof(s) -> float - - Return the floating point number represented by the string s. - - """ - global _re - if _re is None: - # Don't fail if re doesn't exist -- just skip the syntax check - try: - import re - except ImportError: - _re = 0 - else: - _re = re - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-float argument to string.atof' - while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:] - if _re and not _re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s): - raise ValueError, 'non-float argument to string.atof' - try: - return float(eval(sign + s, _safe_env)) - except SyntaxError: - raise ValueError, 'non-float argument to string.atof' +def atof(s): + """atof(s) -> float + + Return the floating point number represented by the string s. + + """ + if type(s) == _StringType: + return _float(s) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) # Convert string to integer -def atoi(str, base=10): - """atoi(s [,base]) -> int - - Return the integer represented by the string s in the given - base, which defaults to 10. The string s must consist of one - or more digits, possibly preceded by a sign. If base is 0, it - is chosen from the leading characters of s, 0 for octal, 0x or - 0X for hexadecimal. If base is 16, a preceding 0x or 0X is - accepted. - - """ - if base != 10: - # We only get here if strop doesn't define atoi() - raise ValueError, "this string.atoi doesn't support base != 10" - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-integer argument to string.atoi' - while s[0] == '0' and len(s) > 1: s = s[1:] - for c in s: - if c not in digits: - raise ValueError, 'non-integer argument to string.atoi' - return eval(sign + s, _safe_env) +def atoi(*args): + """atoi(s [,base]) -> int + + Return the integer represented by the string s in the given + base, which defaults to 10. The string s must consist of one + or more digits, possibly preceded by a sign. If base is 0, it + is chosen from the leading characters of s, 0 for octal, 0x or + 0X for hexadecimal. If base is 16, a preceding 0x or 0X is + accepted. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to int(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_int, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + # Convert string to long integer -def atol(str, base=10): - """atol(s [,base]) -> long - - Return the long integer represented by the string s in the - given base, which defaults to 10. The string s must consist - of one or more digits, possibly preceded by a sign. If base - is 0, it is chosen from the leading characters of s, 0 for - octal, 0x or 0X for hexadecimal. If base is 16, a preceding - 0x or 0X is accepted. A trailing L or l is not accepted, - unless base is 0. - - """ - if base != 10: - # We only get here if strop doesn't define atol() - raise ValueError, "this string.atol doesn't support base != 10" - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-integer argument to string.atol' - while s[0] == '0' and len(s) > 1: s = s[1:] - for c in s: - if c not in digits: - raise ValueError, 'non-integer argument to string.atol' - return eval(sign + s + 'L', _safe_env) +def atol(*args): + """atol(s [,base]) -> long + + Return the long integer represented by the string s in the + given base, which defaults to 10. The string s must consist + of one or more digits, possibly preceded by a sign. If base + is 0, it is chosen from the leading characters of s, 0 for + octal, 0x or 0X for hexadecimal. If base is 16, a preceding + 0x or 0X is accepted. A trailing L or l is not accepted, + unless base is 0. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to long(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_long, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + # Left-justify a string def ljust(s, width): - """ljust(s, width) -> string + """ljust(s, width) -> string - Return a left-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. + Return a left-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. - """ - n = width - len(s) - if n <= 0: return s - return s + ' '*n + """ + n = width - len(s) + if n <= 0: return s + return s + ' '*n # Right-justify a string def rjust(s, width): - """rjust(s, width) -> string + """rjust(s, width) -> string - Return a right-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. + Return a right-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. - """ - n = width - len(s) - if n <= 0: return s - return ' '*n + s + """ + n = width - len(s) + if n <= 0: return s + return ' '*n + s # Center a string def center(s, width): - """center(s, width) -> string + """center(s, width) -> string - Return a center version of s, in a field of the specified - width. padded with spaces as needed. The string is never - truncated. + Return a center version of s, in a field of the specified + width. padded with spaces as needed. The string is never + truncated. - """ - n = width - len(s) - if n <= 0: return s - half = n/2 - if n%2 and width%2: - # This ensures that center(center(s, i), j) = center(s, j) - half = half+1 - return ' '*half + s + ' '*(n-half) + """ + n = width - len(s) + if n <= 0: return s + half = n/2 + if n%2 and width%2: + # This ensures that center(center(s, i), j) = center(s, j) + half = half+1 + return ' '*half + s + ' '*(n-half) # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' # Decadent feature: the argument may be a string or a number # (Use of this is deprecated; it should be a string as with ljust c.s.) def zfill(x, width): - """zfill(x, width) -> string + """zfill(x, width) -> string - Pad a numeric string x with zeros on the left, to fill a field - of the specified width. The string x is never truncated. + Pad a numeric string x with zeros on the left, to fill a field + of the specified width. The string x is never truncated. - """ - if type(x) == type(''): s = x - else: s = `x` - n = len(s) - if n >= width: return s - sign = '' - if s[:1] in ('-', '+'): - sign, s = s[0], s[1:] - return sign + '0'*(width-n) + s + """ + if type(x) == type(''): s = x + else: s = `x` + n = len(s) + if n >= width: return s + sign = '' + if s[0] in ('-', '+'): + sign, s = s[0], s[1:] + return sign + '0'*(width-n) + s # Expand tabs in a string. # Doesn't take non-printing chars into account, but does understand \n. def expandtabs(s, tabsize=8): - """expandtabs(s [,tabsize]) -> string - - Return a copy of the string s with all tab characters replaced - by the appropriate number of spaces, depending on the current - column, and the tabsize (default 8). - - """ - res = line = '' - for c in s: - if c == '\t': - c = ' '*(tabsize - len(line)%tabsize) - line = line + c - if c == '\n': - res = res + line - line = '' - return res + line + """expandtabs(s [,tabsize]) -> string + + Return a copy of the string s with all tab characters replaced + by the appropriate number of spaces, depending on the current + column, and the tabsize (default 8). + + """ + res = line = '' + for c in s: + if c == '\t': + c = ' '*(tabsize - len(line) % tabsize) + line = line + c + if c == '\n': + res = res + line + line = '' + return res + line # Character translation through look-up table. def translate(s, table, deletions=""): - """translate(s,table [,deletechars]) -> string - - Return a copy of the string s, where all characters occurring - in the optional argument deletechars are removed, and the - remaining characters have been mapped through the given - translation table, which must be a string of length 256. - - """ - if type(table) != type('') or len(table) != 256: - raise TypeError, \ - "translation table must be 256 characters long" - res = "" - for c in s: - if c not in deletions: - res = res + table[ord(c)] - return res + """translate(s,table [,deletechars]) -> string + + Return a copy of the string s, where all characters occurring + in the optional argument deletechars are removed, and the + remaining characters have been mapped through the given + translation table, which must be a string of length 256. + + """ + return s.translate(table, deletions) # Capitalize a string, e.g. "aBc dEf" -> "Abc def". def capitalize(s): - """capitalize(s) -> string + """capitalize(s) -> string - Return a copy of the string s with only its first character - capitalized. + Return a copy of the string s with only its first character + capitalized. - """ - return upper(s[:1]) + lower(s[1:]) + """ + return s.capitalize() # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". # See also regsub.capwords(). def capwords(s, sep=None): - """capwords(s, [sep]) -> string + """capwords(s, [sep]) -> string - Split the argument into words using split, capitalize each - word using capitalize, and join the capitalized words using - join. Note that this replaces runs of whitespace characters by - a single space. + Split the argument into words using split, capitalize each + word using capitalize, and join the capitalized words using + join. Note that this replaces runs of whitespace characters by + a single space. - """ - return join(map(capitalize, split(s, sep)), sep or ' ') + """ + return join(map(capitalize, s.split(sep)), sep or ' ') # Construct a translation string _idmapL = None def maketrans(fromstr, tostr): - """maketrans(frm, to) -> string - - Return a translation table (a string of 256 bytes long) - suitable for use in string.translate. The strings frm and to - must be of the same length. - - """ - if len(fromstr) != len(tostr): - raise ValueError, "maketrans arguments must have same length" - global _idmapL - if not _idmapL: - _idmapL = map(None, _idmap) - L = _idmapL[:] - fromstr = map(ord, fromstr) - for i in range(len(fromstr)): - L[fromstr[i]] = tostr[i] - return joinfields(L, "") + """maketrans(frm, to) -> string + + Return a translation table (a string of 256 bytes long) + suitable for use in string.translate. The strings frm and to + must be of the same length. + + """ + if len(fromstr) != len(tostr): + raise ValueError, "maketrans arguments must have same length" + global _idmapL + if not _idmapL: + _idmapL = map(None, _idmap) + L = _idmapL[:] + fromstr = map(ord, fromstr) + for i in range(len(fromstr)): + L[fromstr[i]] = tostr[i] + return joinfields(L, "") # Substring replacement (global) -def replace(str, old, new, maxsplit=0): - """replace (str, old, new[, maxsplit]) -> string +def replace(s, old, new, maxsplit=0): + """replace (str, old, new[, maxsplit]) -> string - Return a copy of string str with all occurrences of substring - old replaced by new. If the optional argument maxsplit is - given, only the first maxsplit occurrences are replaced. + Return a copy of string str with all occurrences of substring + old replaced by new. If the optional argument maxsplit is + given, only the first maxsplit occurrences are replaced. - """ - return joinfields(splitfields(str, old, maxsplit), new) + """ + return s.replace(old, new, maxsplit) +# XXX: transitional +# +# If string objects do not have methods, then we need to use the old string.py +# library, which uses strop for many more things than just the few outlined +# below. +try: + ''.upper +except AttributeError: + from stringold import * + # Try importing optional built-in module "strop" -- if it exists, # it redefines some string operations that are 100-1000 times faster. # It also defines values for whitespace, lowercase and uppercase # that match <ctype.h>'s definitions. try: - from strop import * - letters = lowercase + uppercase + from strop import maketrans, lowercase, uppercase, whitespace + letters = lowercase + uppercase except ImportError: - pass # Use the original, slow versions + pass # Use the original versions diff --git a/Lib/stringold.py b/Lib/stringold.py index 92158ee..2c3083e 100644 --- a/Lib/stringold.py +++ b/Lib/stringold.py @@ -1,8 +1,9 @@ # module 'string' -- A collection of string operations -# Warning: most of the code you see here isn't normally used nowadays. -# At the end of this file most functions are replaced by built-in -# functions imported from built-in module "strop". +# Warning: most of the code you see here isn't normally used nowadays. With +# Python 1.6, many of these functions are implemented as methods on the +# standard string object. They used to be implemented by a built-in module +# called strop, but strop is now obsolete itself. """Common string manipulations. @@ -30,9 +31,6 @@ octdigits = '01234567' # Case conversion helpers _idmap = '' for i in range(256): _idmap = _idmap + chr(i) -_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:] -_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:] -_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:] del i # Backward compatible names for exceptions @@ -43,544 +41,391 @@ atol_error = ValueError # convert UPPER CASE letters to lower case def lower(s): - """lower(s) -> string + """lower(s) -> string - Return a copy of the string s converted to lowercase. + Return a copy of the string s converted to lowercase. - """ - res = '' - for c in s: - res = res + _lower[ord(c)] - return res + """ + return s.lower() # Convert lower case letters to UPPER CASE def upper(s): - """upper(s) -> string + """upper(s) -> string - Return a copy of the string s converted to uppercase. + Return a copy of the string s converted to uppercase. - """ - res = '' - for c in s: - res = res + _upper[ord(c)] - return res + """ + return s.upper() # Swap lower case letters and UPPER CASE def swapcase(s): - """swapcase(s) -> string + """swapcase(s) -> string - Return a copy of the string s with upper case characters - converted to lowercase and vice versa. + Return a copy of the string s with upper case characters + converted to lowercase and vice versa. - """ - res = '' - for c in s: - res = res + _swapcase[ord(c)] - return res + """ + return s.swapcase() # Strip leading and trailing tabs and spaces def strip(s): - """strip(s) -> string + """strip(s) -> string - Return a copy of the string s with leading and trailing - whitespace removed. + Return a copy of the string s with leading and trailing + whitespace removed. - """ - i, j = 0, len(s) - while i < j and s[i] in whitespace: i = i+1 - while i < j and s[j-1] in whitespace: j = j-1 - return s[i:j] + """ + return s.strip() # Strip leading tabs and spaces def lstrip(s): - """lstrip(s) -> string + """lstrip(s) -> string - Return a copy of the string s with leading whitespace removed. + Return a copy of the string s with leading whitespace removed. - """ - i, j = 0, len(s) - while i < j and s[i] in whitespace: i = i+1 - return s[i:j] + """ + return s.lstrip() # Strip trailing tabs and spaces def rstrip(s): - """rstrip(s) -> string + """rstrip(s) -> string - Return a copy of the string s with trailing whitespace - removed. + Return a copy of the string s with trailing whitespace + removed. - """ - i, j = 0, len(s) - while i < j and s[j-1] in whitespace: j = j-1 - return s[i:j] + """ + return s.rstrip() # Split a string into a list of space/tab-separated words # NB: split(s) is NOT the same as splitfields(s, ' ')! def split(s, sep=None, maxsplit=0): - """split(str [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is nonzero, splits into at most - maxsplit words If sep is not specified, any whitespace string - is a separator. Maxsplit defaults to 0. - - (split and splitfields are synonymous) - - """ - if sep is not None: return splitfields(s, sep, maxsplit) - res = [] - i, n = 0, len(s) - if maxsplit <= 0: maxsplit = n - count = 0 - while i < n: - while i < n and s[i] in whitespace: i = i+1 - if i == n: break - if count >= maxsplit: - res.append(s[i:]) - break - j = i - while j < n and s[j] not in whitespace: j = j+1 - count = count + 1 - res.append(s[i:j]) - i = j - return res - -# Split a list into fields separated by a given string -# NB: splitfields(s, ' ') is NOT the same as split(s)! -# splitfields(s, '') returns [s] (in analogy with split() in nawk) -def splitfields(s, sep=None, maxsplit=0): - """splitfields(str [,sep [,maxsplit]]) -> list of strings - - Return a list of the words in the string s, using sep as the - delimiter string. If maxsplit is nonzero, splits into at most - maxsplit words If sep is not specified, any whitespace string - is a separator. Maxsplit defaults to 0. - - (split and splitfields are synonymous) - - """ - if sep is None: return split(s, None, maxsplit) - res = [] - nsep = len(sep) - if nsep == 0: - return [s] - ns = len(s) - if maxsplit <= 0: maxsplit = ns - i = j = 0 - count = 0 - while j+nsep <= ns: - if s[j:j+nsep] == sep: - count = count + 1 - res.append(s[i:j]) - i = j = j + nsep - if count >= maxsplit: break - else: - j = j + 1 - res.append(s[i:]) - return res - -# Join words with spaces between them -def join(words, sep = ' '): - """join(list [,sep]) -> string + """split(str [,sep [,maxsplit]]) -> list of strings - Return a string composed of the words in list, with - intervening occurences of sep. Sep defaults to a single - space. + Return a list of the words in the string s, using sep as the + delimiter string. If maxsplit is nonzero, splits into at most + maxsplit words If sep is not specified, any whitespace string + is a separator. Maxsplit defaults to 0. - (joinfields and join are synonymous) + (split and splitfields are synonymous) - """ - return joinfields(words, sep) + """ + return s.split(sep, maxsplit) +splitfields = split # Join fields with optional separator -def joinfields(words, sep = ' '): - """joinfields(list [,sep]) -> string +def join(words, sep = ' '): + """join(list [,sep]) -> string - Return a string composed of the words in list, with - intervening occurences of sep. The default separator is a - single space. + Return a string composed of the words in list, with + intervening occurences of sep. The default separator is a + single space. - (joinfields and join are synonymous) + (joinfields and join are synonymous) - """ - res = '' - for w in words: - res = res + (sep + w) - return res[len(sep):] + """ + return sep.join(words) +joinfields = join -# Find substring, raise exception if not found -def index(s, sub, i = 0, last=None): - """index(s, sub [,start [,end]]) -> int +# for a little bit of speed +_apply = apply - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. +# Find substring, raise exception if not found +def index(s, *args): + """index(s, sub [,start [,end]]) -> int - Raise ValueError if not found. + Like find but raises ValueError when the substring is not found. - """ - if last is None: last = len(s) - res = find(s, sub, i, last) - if res < 0: - raise ValueError, 'substring not found in string.index' - return res + """ + return _apply(s.index, args) # Find last substring, raise exception if not found -def rindex(s, sub, i = 0, last=None): - """rindex(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. +def rindex(s, *args): + """rindex(s, sub [,start [,end]]) -> int - Raise ValueError if not found. + Like rfind but raises ValueError when the substring is not found. - """ - if last is None: last = len(s) - res = rfind(s, sub, i, last) - if res < 0: - raise ValueError, 'substring not found in string.index' - return res + """ + return _apply(s.rindex, args) # Count non-overlapping occurrences of substring -def count(s, sub, i = 0, last=None): - """count(s, sub[, start[,end]]) -> int - - Return the number of occurrences of substring sub in string - s[start:end]. Optional arguments start and end are - interpreted as in slice notation. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - if n == 0: return m-i - r = 0 - while i < m: - if sub == s[i:i+n]: - r = r+1 - i = i+n - else: - i = i+1 - return r +def count(s, *args): + """count(s, sub[, start[,end]]) -> int + + Return the number of occurrences of substring sub in string + s[start:end]. Optional arguments start and end are + interpreted as in slice notation. + + """ + return _apply(s.count, args) # Find substring, return -1 if not found -def find(s, sub, i = 0, last=None): - """find(s, sub [,start [,end]]) -> in - - Return the lowest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - while i < m: - if sub == s[i:i+n]: return i - i = i+1 - return -1 +def find(s, *args): + """find(s, sub [,start [,end]]) -> in + + Return the lowest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.find, args) # Find last substring, return -1 if not found -def rfind(s, sub, i = 0, last=None): - """rfind(s, sub [,start [,end]]) -> int - - Return the highest index in s where substring sub is found, - such that sub is contained within s[start,end]. Optional - arguments start and end are interpreted as in slice notation. - - Return -1 on failure. - - """ - Slen = len(s) # cache this value, for speed - if last is None: - last = Slen - elif last < 0: - last = max(0, last + Slen) - elif last > Slen: - last = Slen - if i < 0: i = max(0, i + Slen) - n = len(sub) - m = last + 1 - n - r = -1 - while i < m: - if sub == s[i:i+n]: r = i - i = i+1 - return r - -# "Safe" environment for eval() -_safe_env = {"__builtins__": {}} +def rfind(s, *args): + """rfind(s, sub [,start [,end]]) -> int + + Return the highest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.rfind, args) + +# for a bit of speed +_float = float +_int = int +_long = long +_StringType = type('') # Convert string to float -_re = None -def atof(str): - """atof(s) -> float - - Return the floating point number represented by the string s. - - """ - global _re - if _re is None: - # Don't fail if re doesn't exist -- just skip the syntax check - try: - import re - except ImportError: - _re = 0 - else: - _re = re - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-float argument to string.atof' - while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:] - if _re and not _re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s): - raise ValueError, 'non-float argument to string.atof' - try: - return float(eval(sign + s, _safe_env)) - except SyntaxError: - raise ValueError, 'non-float argument to string.atof' +def atof(s): + """atof(s) -> float + + Return the floating point number represented by the string s. + + """ + if type(s) == _StringType: + return _float(s) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) # Convert string to integer -def atoi(str, base=10): - """atoi(s [,base]) -> int - - Return the integer represented by the string s in the given - base, which defaults to 10. The string s must consist of one - or more digits, possibly preceded by a sign. If base is 0, it - is chosen from the leading characters of s, 0 for octal, 0x or - 0X for hexadecimal. If base is 16, a preceding 0x or 0X is - accepted. - - """ - if base != 10: - # We only get here if strop doesn't define atoi() - raise ValueError, "this string.atoi doesn't support base != 10" - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-integer argument to string.atoi' - while s[0] == '0' and len(s) > 1: s = s[1:] - for c in s: - if c not in digits: - raise ValueError, 'non-integer argument to string.atoi' - return eval(sign + s, _safe_env) +def atoi(*args): + """atoi(s [,base]) -> int + + Return the integer represented by the string s in the given + base, which defaults to 10. The string s must consist of one + or more digits, possibly preceded by a sign. If base is 0, it + is chosen from the leading characters of s, 0 for octal, 0x or + 0X for hexadecimal. If base is 16, a preceding 0x or 0X is + accepted. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to int(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_int, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + # Convert string to long integer -def atol(str, base=10): - """atol(s [,base]) -> long - - Return the long integer represented by the string s in the - given base, which defaults to 10. The string s must consist - of one or more digits, possibly preceded by a sign. If base - is 0, it is chosen from the leading characters of s, 0 for - octal, 0x or 0X for hexadecimal. If base is 16, a preceding - 0x or 0X is accepted. A trailing L or l is not accepted, - unless base is 0. - - """ - if base != 10: - # We only get here if strop doesn't define atol() - raise ValueError, "this string.atol doesn't support base != 10" - sign = '' - s = strip(str) - if s and s[0] in '+-': - sign = s[0] - s = s[1:] - if not s: - raise ValueError, 'non-integer argument to string.atol' - while s[0] == '0' and len(s) > 1: s = s[1:] - for c in s: - if c not in digits: - raise ValueError, 'non-integer argument to string.atol' - return eval(sign + s + 'L', _safe_env) +def atol(*args): + """atol(s [,base]) -> long + + Return the long integer represented by the string s in the + given base, which defaults to 10. The string s must consist + of one or more digits, possibly preceded by a sign. If base + is 0, it is chosen from the leading characters of s, 0 for + octal, 0x or 0X for hexadecimal. If base is 16, a preceding + 0x or 0X is accepted. A trailing L or l is not accepted, + unless base is 0. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to long(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_long, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + # Left-justify a string def ljust(s, width): - """ljust(s, width) -> string + """ljust(s, width) -> string - Return a left-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. + Return a left-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. - """ - n = width - len(s) - if n <= 0: return s - return s + ' '*n + """ + n = width - len(s) + if n <= 0: return s + return s + ' '*n # Right-justify a string def rjust(s, width): - """rjust(s, width) -> string + """rjust(s, width) -> string - Return a right-justified version of s, in a field of the - specified width, padded with spaces as needed. The string is - never truncated. + Return a right-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. - """ - n = width - len(s) - if n <= 0: return s - return ' '*n + s + """ + n = width - len(s) + if n <= 0: return s + return ' '*n + s # Center a string def center(s, width): - """center(s, width) -> string + """center(s, width) -> string - Return a center version of s, in a field of the specified - width. padded with spaces as needed. The string is never - truncated. + Return a center version of s, in a field of the specified + width. padded with spaces as needed. The string is never + truncated. - """ - n = width - len(s) - if n <= 0: return s - half = n/2 - if n%2 and width%2: - # This ensures that center(center(s, i), j) = center(s, j) - half = half+1 - return ' '*half + s + ' '*(n-half) + """ + n = width - len(s) + if n <= 0: return s + half = n/2 + if n%2 and width%2: + # This ensures that center(center(s, i), j) = center(s, j) + half = half+1 + return ' '*half + s + ' '*(n-half) # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' # Decadent feature: the argument may be a string or a number # (Use of this is deprecated; it should be a string as with ljust c.s.) def zfill(x, width): - """zfill(x, width) -> string + """zfill(x, width) -> string - Pad a numeric string x with zeros on the left, to fill a field - of the specified width. The string x is never truncated. + Pad a numeric string x with zeros on the left, to fill a field + of the specified width. The string x is never truncated. - """ - if type(x) == type(''): s = x - else: s = `x` - n = len(s) - if n >= width: return s - sign = '' - if s[0] in ('-', '+'): - sign, s = s[0], s[1:] - return sign + '0'*(width-n) + s + """ + if type(x) == type(''): s = x + else: s = `x` + n = len(s) + if n >= width: return s + sign = '' + if s[0] in ('-', '+'): + sign, s = s[0], s[1:] + return sign + '0'*(width-n) + s # Expand tabs in a string. # Doesn't take non-printing chars into account, but does understand \n. def expandtabs(s, tabsize=8): - """expandtabs(s [,tabsize]) -> string - - Return a copy of the string s with all tab characters replaced - by the appropriate number of spaces, depending on the current - column, and the tabsize (default 8). - - """ - res = line = '' - for c in s: - if c == '\t': - c = ' '*(tabsize - len(line)%tabsize) - line = line + c - if c == '\n': - res = res + line - line = '' - return res + line + """expandtabs(s [,tabsize]) -> string + + Return a copy of the string s with all tab characters replaced + by the appropriate number of spaces, depending on the current + column, and the tabsize (default 8). + + """ + res = line = '' + for c in s: + if c == '\t': + c = ' '*(tabsize - len(line) % tabsize) + line = line + c + if c == '\n': + res = res + line + line = '' + return res + line # Character translation through look-up table. def translate(s, table, deletions=""): - """translate(s,table [,deletechars]) -> string - - Return a copy of the string s, where all characters occurring - in the optional argument deletechars are removed, and the - remaining characters have been mapped through the given - translation table, which must be a string of length 256. - - """ - if type(table) != type('') or len(table) != 256: - raise TypeError, \ - "translation table must be 256 characters long" - res = "" - for c in s: - if c not in deletions: - res = res + table[ord(c)] - return res + """translate(s,table [,deletechars]) -> string + + Return a copy of the string s, where all characters occurring + in the optional argument deletechars are removed, and the + remaining characters have been mapped through the given + translation table, which must be a string of length 256. + + """ + return s.translate(table, deletions) # Capitalize a string, e.g. "aBc dEf" -> "Abc def". def capitalize(s): - """capitalize(s) -> string + """capitalize(s) -> string - Return a copy of the string s with only its first character - capitalized. + Return a copy of the string s with only its first character + capitalized. - """ - return upper(s[:1]) + lower(s[1:]) + """ + return s.capitalize() # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". # See also regsub.capwords(). def capwords(s, sep=None): - """capwords(s, [sep]) -> string + """capwords(s, [sep]) -> string - Split the argument into words using split, capitalize each - word using capitalize, and join the capitalized words using - join. Note that this replaces runs of whitespace characters by - a single space. + Split the argument into words using split, capitalize each + word using capitalize, and join the capitalized words using + join. Note that this replaces runs of whitespace characters by + a single space. - """ - return join(map(capitalize, split(s, sep)), sep or ' ') + """ + return join(map(capitalize, s.split(sep)), sep or ' ') # Construct a translation string _idmapL = None def maketrans(fromstr, tostr): - """maketrans(frm, to) -> string - - Return a translation table (a string of 256 bytes long) - suitable for use in string.translate. The strings frm and to - must be of the same length. - - """ - if len(fromstr) != len(tostr): - raise ValueError, "maketrans arguments must have same length" - global _idmapL - if not _idmapL: - _idmapL = map(None, _idmap) - L = _idmapL[:] - fromstr = map(ord, fromstr) - for i in range(len(fromstr)): - L[fromstr[i]] = tostr[i] - return joinfields(L, "") + """maketrans(frm, to) -> string + + Return a translation table (a string of 256 bytes long) + suitable for use in string.translate. The strings frm and to + must be of the same length. + + """ + if len(fromstr) != len(tostr): + raise ValueError, "maketrans arguments must have same length" + global _idmapL + if not _idmapL: + _idmapL = map(None, _idmap) + L = _idmapL[:] + fromstr = map(ord, fromstr) + for i in range(len(fromstr)): + L[fromstr[i]] = tostr[i] + return joinfields(L, "") # Substring replacement (global) -def replace(str, old, new, maxsplit=0): - """replace (str, old, new[, maxsplit]) -> string +def replace(s, old, new, maxsplit=0): + """replace (str, old, new[, maxsplit]) -> string - Return a copy of string str with all occurrences of substring - old replaced by new. If the optional argument maxsplit is - given, only the first maxsplit occurrences are replaced. + Return a copy of string str with all occurrences of substring + old replaced by new. If the optional argument maxsplit is + given, only the first maxsplit occurrences are replaced. - """ - return joinfields(splitfields(str, old, maxsplit), new) + """ + return s.replace(old, new, maxsplit) +# XXX: transitional +# +# If string objects do not have methods, then we need to use the old string.py +# library, which uses strop for many more things than just the few outlined +# below. +try: + ''.upper +except AttributeError: + from stringold import * + # Try importing optional built-in module "strop" -- if it exists, # it redefines some string operations that are 100-1000 times faster. # It also defines values for whitespace, lowercase and uppercase # that match <ctype.h>'s definitions. try: - from strop import * - letters = lowercase + uppercase + from strop import maketrans, lowercase, uppercase, whitespace + letters = lowercase + uppercase except ImportError: - pass # Use the original, slow versions + pass # Use the original versions diff --git a/Objects/abstract.c b/Objects/abstract.c index be986f5..c120769 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -53,123 +53,6 @@ null_error() return NULL; } -/* Copied with modifications from stropmodule.c: atoi, atof, atol */ - -static PyObject * -int_from_string(v) - PyObject *v; -{ - char *s, *end; - long x; - char buffer[256]; /* For errors */ - - s = PyString_AS_STRING(v); - while (*s && isspace(Py_CHARMASK(*s))) - s++; - errno = 0; - x = PyOS_strtol(s, &end, 10); - if (end == s || !isdigit(end[-1])) - goto bad; - while (*end && isspace(Py_CHARMASK(*end))) - end++; - if (*end != '\0') { - bad: - sprintf(buffer, "invalid literal for int(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); - return NULL; - } - else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) { - PyErr_SetString(PyExc_ValueError, - "null byte in argument for int()"); - return NULL; - } - else if (errno != 0) { - sprintf(buffer, "int() literal too large: %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); - return NULL; - } - return PyInt_FromLong(x); -} - -static PyObject * -long_from_string(v) - PyObject *v; -{ - char *s, *end; - PyObject *x; - char buffer[256]; /* For errors */ - - s = PyString_AS_STRING(v); - while (*s && isspace(Py_CHARMASK(*s))) - s++; - x = PyLong_FromString(s, &end, 10); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_ValueError)) - goto bad; - return NULL; - } - while (*end && isspace(Py_CHARMASK(*end))) - end++; - if (*end != '\0') { - bad: - sprintf(buffer, "invalid literal for long(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); - Py_XDECREF(x); - return NULL; - } - else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) { - PyErr_SetString(PyExc_ValueError, - "null byte in argument for long()"); - return NULL; - } - return x; -} - -static PyObject * -float_from_string(v) - PyObject *v; -{ - extern double strtod Py_PROTO((const char *, char **)); - char *s, *last, *end; - double x; - char buffer[256]; /* For errors */ - - s = PyString_AS_STRING(v); - last = s + PyString_GET_SIZE(v); - while (*s && isspace(Py_CHARMASK(*s))) - s++; - if (s[0] == '\0') { - PyErr_SetString(PyExc_ValueError, "empty string for float()"); - return NULL; - } - errno = 0; - PyFPE_START_PROTECT("float_from_string", return 0) - x = strtod(s, &end); - PyFPE_END_PROTECT(x) - /* Believe it or not, Solaris 2.6 can move end *beyond* the null - byte at the end of the string, when the input is inf(inity) */ - if (end > last) - end = last; - while (*end && isspace(Py_CHARMASK(*end))) - end++; - if (*end != '\0') { - sprintf(buffer, "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); - return NULL; - } - else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) { - PyErr_SetString(PyExc_ValueError, - "null byte in argument for float()"); - return NULL; - } - else if (errno != 0) { - sprintf(buffer, "float() literal too large: %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); - return NULL; - } - return PyFloat_FromDouble(x); -} - /* Operations on any object */ int @@ -713,7 +596,7 @@ PyNumber_Int(o) if (o == NULL) return null_error(); if (PyString_Check(o)) - return int_from_string(o); + return PyInt_FromString(PyString_AS_STRING(o), NULL, 10); m = o->ob_type->tp_as_number; if (m && m->nb_int) return m->nb_int(o); @@ -721,6 +604,61 @@ PyNumber_Int(o) return type_error("object can't be converted to int"); } +/* There are two C API functions for converting a string to a long, + * PyNumber_Long() and PyLong_FromString(). Both are used in builtin_long, + * reachable from Python with the built-in function long(). + * + * The difference is this: PyNumber_Long will raise an exception when the + * string cannot be converted to a long. The most common situation is + * where a float string is passed in; this raises a ValueError. + * PyLong_FromString does not raise an exception; it silently truncates the + * float to an integer. + * + * You can see the different behavior from Python with the following: + * + * long('9.5') + * => ValueError: invalid literal for long(): 9.5 + * + * long('9.5', 10) + * => 9L + * + * The first example ends up calling PyNumber_Long(), while the second one + * calls PyLong_FromString(). + */ +static PyObject * +long_from_string(v) + PyObject *v; +{ + char *s, *end; + PyObject *x; + char buffer[256]; /* For errors */ + + s = PyString_AS_STRING(v); + while (*s && isspace(Py_CHARMASK(*s))) + s++; + x = PyLong_FromString(s, &end, 10); + if (x == NULL) { + if (PyErr_ExceptionMatches(PyExc_ValueError)) + goto bad; + return NULL; + } + while (*end && isspace(Py_CHARMASK(*end))) + end++; + if (*end != '\0') { + bad: + sprintf(buffer, "invalid literal for long(): %.200s", s); + PyErr_SetString(PyExc_ValueError, buffer); + Py_XDECREF(x); + return NULL; + } + else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) { + PyErr_SetString(PyExc_ValueError, + "null byte in argument for long()"); + return NULL; + } + return x; +} + PyObject * PyNumber_Long(o) PyObject *o; @@ -730,6 +668,10 @@ PyNumber_Long(o) if (o == NULL) return null_error(); if (PyString_Check(o)) + /* need to do extra error checking that PyLong_FromString() + * doesn't do. In particular long('9.5') must raise an + * exception, not truncate the float. + */ return long_from_string(o); m = o->ob_type->tp_as_number; if (m && m->nb_long) @@ -747,7 +689,7 @@ PyNumber_Float(o) if (o == NULL) return null_error(); if (PyString_Check(o)) - return float_from_string(o); + return PyFloat_FromString(o, NULL); m = o->ob_type->tp_as_number; if (m && m->nb_float) return m->nb_float(o); diff --git a/Objects/floatobject.c b/Objects/floatobject.c index ba37309..cb5d9e3 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -149,6 +149,57 @@ PyFloat_FromDouble(fval) return (PyObject *) op; } +PyObject * +PyFloat_FromString(v, pend) + PyObject *v; + char **pend; +{ + extern double strtod Py_PROTO((const char *, char **)); + char *s, *last, *end; + double x; + char buffer[256]; /* For errors */ + + if (!PyString_Check(v)) + return NULL; + s = PyString_AS_STRING(v); + + last = s + PyString_GET_SIZE(v); + while (*s && isspace(Py_CHARMASK(*s))) + s++; + if (s[0] == '\0') { + PyErr_SetString(PyExc_ValueError, "empty string for float()"); + return NULL; + } + errno = 0; + PyFPE_START_PROTECT("PyFloat_FromString", return 0) + x = strtod(s, &end); + PyFPE_END_PROTECT(x) + /* Believe it or not, Solaris 2.6 can move end *beyond* the null + byte at the end of the string, when the input is inf(inity) */ + if (end > last) + end = last; + while (*end && isspace(Py_CHARMASK(*end))) + end++; + if (*end != '\0') { + sprintf(buffer, "invalid literal for float(): %.200s", s); + PyErr_SetString(PyExc_ValueError, buffer); + return NULL; + } + else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) { + PyErr_SetString(PyExc_ValueError, + "null byte in argument for float()"); + return NULL; + } + else if (errno != 0) { + sprintf(buffer, "float() literal too large: %.200s", s); + PyErr_SetString(PyExc_ValueError, buffer); + return NULL; + } + if (pend) + *pend = end; + return PyFloat_FromDouble(x); +} + static void float_dealloc(op) PyFloatObject *op; diff --git a/Objects/intobject.c b/Objects/intobject.c index f2d77e1..45c2186 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -32,6 +32,7 @@ PERFORMANCE OF THIS SOFTWARE. /* Integer object implementation */ #include "Python.h" +#include <ctype.h> #ifdef HAVE_LIMITS_H #include <limits.h> @@ -218,6 +219,48 @@ PyInt_AsLong(op) return val; } +PyObject * +PyInt_FromString(s, pend, base) + char *s; + char **pend; + int base; +{ + char *end; + long x; + char buffer[256]; /* For errors */ + + if ((base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, "invalid base for int()"); + return NULL; + } + + while (*s && isspace(Py_CHARMASK(*s))) + s++; + errno = 0; + if (base == 0 && s[0] == '0') + x = (long) PyOS_strtoul(s, &end, base); + else + x = PyOS_strtol(s, &end, base); + if (end == s || !isalnum(end[-1])) + goto bad; + while (*end && isspace(Py_CHARMASK(*end))) + end++; + if (*end != '\0') { + bad: + sprintf(buffer, "invalid literal for int(): %.200s", s); + PyErr_SetString(PyExc_ValueError, buffer); + return NULL; + } + else if (errno != 0) { + sprintf(buffer, "int() literal too large: %.200s", s); + PyErr_SetString(PyExc_ValueError, buffer); + return NULL; + } + if (pend) + *pend = end; + return PyInt_FromLong(x); +} + /* Methods */ /* ARGSUSED */ diff --git a/Objects/stringobject.c b/Objects/stringobject.c index eecb006..264ed9a 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -525,6 +525,1049 @@ static PyBufferProcs string_as_buffer = { (getcharbufferproc)string_buffer_getcharbuf, }; + + +#define LEFTSTRIP 0 +#define RIGHTSTRIP 1 +#define BOTHSTRIP 2 + + +static PyObject * +split_whitespace(s, len, maxsplit) + char *s; + int len; + int maxsplit; +{ + int i = 0, j, err; + int countsplit = 0; + PyObject* item; + PyObject *list = PyList_New(0); + + if (list == NULL) + return NULL; + + while (i < len) { + while (i < len && isspace(Py_CHARMASK(s[i]))) { + i = i+1; + } + j = i; + while (i < len && !isspace(Py_CHARMASK(s[i]))) { + i = i+1; + } + if (j < i) { + item = PyString_FromStringAndSize(s+j, (int)(i-j)); + if (item == NULL) + goto finally; + + err = PyList_Append(list, item); + Py_DECREF(item); + if (err < 0) + goto finally; + + countsplit++; + while (i < len && isspace(Py_CHARMASK(s[i]))) { + i = i+1; + } + if (maxsplit && (countsplit >= maxsplit) && i < len) { + item = PyString_FromStringAndSize( + s+i, (int)(len - i)); + if (item == NULL) + goto finally; + + err = PyList_Append(list, item); + Py_DECREF(item); + if (err < 0) + goto finally; + + i = len; + } + } + } + return list; + finally: + Py_DECREF(list); + return NULL; +} + + +static char split__doc__[] = +"S.split([sep [,maxsplit]]) -> list of strings\n\ +\n\ +Return a list of the words in the string S, using sep as the\n\ +delimiter string. If maxsplit is nonzero, splits into at most\n\ +maxsplit words If sep is not specified, any whitespace string\n\ +is a separator. Maxsplit defaults to 0."; + +static PyObject * +string_split(self, args) + PyStringObject *self; + PyObject *args; +{ + int len = PyString_GET_SIZE(self), n, i, j, err; + int splitcount, maxsplit; + char *s = PyString_AS_STRING(self), *sub; + PyObject *list, *item; + + sub = NULL; + n = 0; + splitcount = 0; + maxsplit = 0; + if (!PyArg_ParseTuple(args, "|z#i", &sub, &n, &maxsplit)) + return NULL; + if (sub == NULL) + return split_whitespace(s, len, maxsplit); + if (n == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + list = PyList_New(0); + if (list == NULL) + return NULL; + + i = j = 0; + while (i+n <= len) { + if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) { + item = PyString_FromStringAndSize(s+j, (int)(i-j)); + if (item == NULL) + goto fail; + err = PyList_Append(list, item); + Py_DECREF(item); + if (err < 0) + goto fail; + i = j = i + n; + splitcount++; + if (maxsplit && (splitcount >= maxsplit)) + break; + } + else + i++; + } + item = PyString_FromStringAndSize(s+j, (int)(len-j)); + if (item == NULL) + goto fail; + err = PyList_Append(list, item); + Py_DECREF(item); + if (err < 0) + goto fail; + + return list; + + fail: + Py_DECREF(list); + return NULL; +} + + +static char join__doc__[] = +"S.join(sequence) -> string\n\ +\n\ +Return a string which is the concatenation of the string representation\n\ +of every element in the sequence. The separator between elements is S."; + +static PyObject * +string_join(self, args) + PyStringObject *self; + PyObject *args; +{ + char *sep = PyString_AS_STRING(self); + int seplen = PyString_GET_SIZE(self); + PyObject *res = NULL; + int reslen = 0; + char *p; + int seqlen = 0; + int sz = 100; + int i, slen; + PyObject *seq; + + if (!PyArg_ParseTuple(args, "O", &seq)) + return NULL; + + seqlen = PySequence_Length(seq); + if (seqlen < 0 && PyErr_Occurred()) + return NULL; + + if (seqlen == 1) { + /* Optimization if there's only one item */ + PyObject *item = PySequence_GetItem(seq, 0); + PyObject *stritem = PyObject_Str(item); + Py_DECREF(item); + return stritem; + } + if (!(res = PyString_FromStringAndSize((char*)NULL, sz))) + return NULL; + p = PyString_AsString(res); + + /* optimize for lists. all others (tuples and arbitrary sequences) + * just use the abstract interface. + */ + if (PyList_Check(seq)) { + for (i = 0; i < seqlen; i++) { + PyObject *item = PyList_GET_ITEM(seq, i); + PyObject *sitem = PyObject_Str(item); + if (!sitem) + goto finally; + slen = PyString_GET_SIZE(sitem); + while (reslen + slen + seplen >= sz) { + if (_PyString_Resize(&res, sz*2)) + goto finally; + sz *= 2; + p = PyString_AsString(res) + reslen; + } + if (i > 0) { + memcpy(p, sep, seplen); + p += seplen; + reslen += seplen; + } + memcpy(p, PyString_AS_STRING(sitem), slen); + p += slen; + reslen += slen; + } + } + else { + for (i = 0; i < seqlen; i++) { + PyObject *item = PySequence_GetItem(seq, i); + PyObject *sitem; + if (!item || !(sitem = PyObject_Str(item))) { + Py_XDECREF(item); + goto finally; + } + slen = PyString_GET_SIZE(sitem); + while (reslen + slen + seplen >= sz) { + if (_PyString_Resize(&res, sz*2)) + goto finally; + sz *= 2; + p = PyString_AsString(res) + reslen; + } + if (i > 0) { + memcpy(p, sep, seplen); + p += seplen; + reslen += seplen; + } + memcpy(p, PyString_AS_STRING(sitem), slen); + p += slen; + reslen += slen; + } + } + if (_PyString_Resize(&res, reslen)) + goto finally; + return res; + + finally: + Py_DECREF(res); + return NULL; +} + + + +static long +string_find_internal(self, args) + PyStringObject *self; + PyObject *args; +{ + char *s = PyString_AS_STRING(self), *sub; + int len = PyString_GET_SIZE(self); + int n, i = 0, last = INT_MAX; + + if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last)) + return -2; + + if (last > len) + last = len; + if (last < 0) + last += len; + if (last < 0) + last = 0; + if (i < 0) + i += len; + if (i < 0) + i = 0; + + if (n == 0 && i <= last) + return (long)i; + + last -= n; + for (; i <= last; ++i) + if (s[i] == sub[0] && + (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0)) + return (long)i; + + return -1; +} + + +static char find__doc__[] = +"S.find(sub [,start [,end]]) -> int\n\ +\n\ +Return the lowest index in S where substring sub is found,\n\ +such that sub is contained within s[start,end]. Optional\n\ +arguments start and end are interpreted as in slice notation.\n\ +\n\ +Return -1 on failure."; + +static PyObject * +string_find(self, args) + PyStringObject *self; + PyObject *args; +{ + long result = string_find_internal(self, args); + if (result == -2) + return NULL; + return PyInt_FromLong(result); +} + + +static char index__doc__[] = +"S.index(sub [,start [,end]]) -> int\n\ +\n\ +Like S.find() but raise ValueError when the substring is not found."; + +static PyObject * +string_index(self, args) + PyStringObject *self; + PyObject *args; +{ + long result = string_find_internal(self, args); + if (result == -2) + return NULL; + if (result == -1) { + PyErr_SetString(PyExc_ValueError, + "substring not found in string.index"); + return NULL; + } + return PyInt_FromLong(result); +} + + +static long +string_rfind_internal(self, args) + PyStringObject *self; + PyObject *args; +{ + char *s = PyString_AS_STRING(self), *sub; + int len = PyString_GET_SIZE(self), n, j; + int i = 0, last = INT_MAX; + + if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last)) + return -2; + + if (last > len) + last = len; + if (last < 0) + last += len; + if (last < 0) + last = 0; + if (i < 0) + i += len; + if (i < 0) + i = 0; + + if (n == 0 && i <= last) + return (long)last; + + for (j = last-n; j >= i; --j) + if (s[j] == sub[0] && + (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0)) + return (long)j; + + return -1; +} + + +static char rfind__doc__[] = +"S.rfind(sub [,start [,end]]) -> int\n\ +\n\ +Return the highest index in S where substring sub is found,\n\ +such that sub is contained within s[start,end]. Optional\n\ +arguments start and end are interpreted as in slice notation.\n\ +\n\ +Return -1 on failure."; + +static PyObject * +string_rfind(self, args) + PyStringObject *self; + PyObject *args; +{ + long result = string_rfind_internal(self, args); + if (result == -2) + return NULL; + return PyInt_FromLong(result); +} + + +static char rindex__doc__[] = +"S.rindex(sub [,start [,end]]) -> int\n\ +\n\ +Like S.rfind() but raise ValueError when the substring is not found."; + +static PyObject * +string_rindex(self, args) + PyStringObject *self; + PyObject *args; +{ + long result = string_rfind_internal(self, args); + if (result == -2) + return NULL; + if (result == -1) { + PyErr_SetString(PyExc_ValueError, + "substring not found in string.rindex"); + return NULL; + } + return PyInt_FromLong(result); +} + + +static PyObject * +do_strip(self, args, striptype) + PyStringObject *self; + PyObject *args; + int striptype; +{ + char *s = PyString_AS_STRING(self); + int len = PyString_GET_SIZE(self), i, j; + PyObject *scobj = NULL; + int count = -1; + + if (!PyArg_ParseTuple(args, "|Oi", scobj, count)) + return NULL; + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len && isspace(Py_CHARMASK(s[i]))) { + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + do { + j--; + } while (j >= i && isspace(Py_CHARMASK(s[j]))); + j++; + } + + if (i == 0 && j == len) { + Py_INCREF(self); + return (PyObject*)self; + } + else + return PyString_FromStringAndSize(s+i, j-i); +} + + +static char strip__doc__[] = +"S.strip() -> string\n\ +\n\ +Return a copy of the string S with leading and trailing\n\ +whitespace removed."; + +static PyObject * +string_strip(self, args) + PyStringObject *self; + PyObject *args; +{ + return do_strip(self, args, BOTHSTRIP); +} + + +static char lstrip__doc__[] = +"S.lstrip() -> string\n\ +\n\ +Return a copy of the string S with leading whitespace removed."; + +static PyObject * +string_lstrip(self, args) + PyStringObject *self; + PyObject *args; +{ + return do_strip(self, args, LEFTSTRIP); +} + + +static char rstrip__doc__[] = +"S.rstrip() -> string\n\ +\n\ +Return a copy of the string S with trailing whitespace removed."; + +static PyObject * +string_rstrip(self, args) + PyStringObject *self; + PyObject *args; +{ + return do_strip(self, args, RIGHTSTRIP); +} + + +static char lower__doc__[] = +"S.lower() -> string\n\ +\n\ +Return a copy of the string S converted to lowercase."; + +static PyObject * +string_lower(self, args) + PyStringObject *self; + PyObject *args; +{ + char *s = PyString_AS_STRING(self), *s_new; + int i, n = PyString_GET_SIZE(self); + PyObject *new; + + if (!PyArg_ParseTuple(args, "")) + return NULL; + new = PyString_FromStringAndSize(NULL, n); + if (new == NULL) + return NULL; + s_new = PyString_AsString(new); + for (i = 0; i < n; i++) { + int c = Py_CHARMASK(*s++); + if (isupper(c)) { + *s_new = tolower(c); + } else + *s_new = c; + s_new++; + } + return new; +} + + +static char upper__doc__[] = +"S.upper() -> string\n\ +\n\ +Return a copy of the string S converted to uppercase."; + +static PyObject * +string_upper(self, args) + PyStringObject *self; + PyObject *args; +{ + char *s = PyString_AS_STRING(self), *s_new; + int i, n = PyString_GET_SIZE(self); + PyObject *new; + + if (!PyArg_ParseTuple(args, "")) + return NULL; + new = PyString_FromStringAndSize(NULL, n); + if (new == NULL) + return NULL; + s_new = PyString_AsString(new); + for (i = 0; i < n; i++) { + int c = Py_CHARMASK(*s++); + if (islower(c)) { + *s_new = toupper(c); + } else + *s_new = c; + s_new++; + } + return new; +} + + +static char capitalize__doc__[] = +"S.capitalize() -> string\n\ +\n\ +Return a copy of the string S with only its first character\n\ +capitalized."; + +static PyObject * +string_capitalize(self, args) + PyStringObject *self; + PyObject *args; +{ + char *s = PyString_AS_STRING(self), *s_new; + int i, n = PyString_GET_SIZE(self); + PyObject *new; + + if (!PyArg_ParseTuple(args, "")) + return NULL; + new = PyString_FromStringAndSize(NULL, n); + if (new == NULL) + return NULL; + s_new = PyString_AsString(new); + if (0 < n) { + int c = Py_CHARMASK(*s++); + if (islower(c)) + *s_new = toupper(c); + else + *s_new = c; + s_new++; + } + for (i = 1; i < n; i++) { + int c = Py_CHARMASK(*s++); + if (isupper(c)) + *s_new = tolower(c); + else + *s_new = c; + s_new++; + } + return new; +} + + +static char count__doc__[] = +"S.count(sub[, start[, end]]) -> int\n\ +\n\ +Return the number of occurrences of substring sub in string\n\ +S[start:end]. Optional arguments start and end are\n\ +interpreted as in slice notation."; + +static PyObject * +string_count(self, args) + PyStringObject *self; + PyObject *args; +{ + char *s = PyString_AS_STRING(self), *sub; + int len = PyString_GET_SIZE(self), n; + int i = 0, last = INT_MAX; + int m, r; + + if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last)) + return NULL; + if (last > len) + last = len; + if (last < 0) + last += len; + if (last < 0) + last = 0; + if (i < 0) + i += len; + if (i < 0) + i = 0; + m = last + 1 - n; + if (n == 0) + return PyInt_FromLong((long) (m-i)); + + r = 0; + while (i < m) { + if (!memcmp(s+i, sub, n)) { + r++; + i += n; + } else { + i++; + } + } + return PyInt_FromLong((long) r); +} + + +static char swapcase__doc__[] = +"S.swapcase() -> string\n\ +\n\ +Return a copy of the string S with upper case characters\n\ +converted to lowercase and vice versa."; + +static PyObject * +string_swapcase(self, args) + PyStringObject *self; + PyObject *args; +{ + char *s = PyString_AS_STRING(self), *s_new; + int i, n = PyString_GET_SIZE(self); + PyObject *new; + + if (!PyArg_ParseTuple(args, "")) + return NULL; + new = PyString_FromStringAndSize(NULL, n); + if (new == NULL) + return NULL; + s_new = PyString_AsString(new); + for (i = 0; i < n; i++) { + int c = Py_CHARMASK(*s++); + if (islower(c)) { + *s_new = toupper(c); + } + else if (isupper(c)) { + *s_new = tolower(c); + } + else + *s_new = c; + s_new++; + } + return new; +} + + +static char translate__doc__[] = +"S.translate(table [,deletechars]) -> string\n\ +\n\ +Return a copy of the string S, where all characters occurring\n\ +in the optional argument deletechars are removed, and the\n\ +remaining characters have been mapped through the given\n\ +translation table, which must be a string of length 256."; + +static PyObject * +string_translate(self, args) + PyStringObject *self; + PyObject *args; +{ + register char *input, *table, *output; + register int i, c, changed = 0; + PyObject *input_obj = (PyObject*)self; + char *table1, *output_start, *del_table=NULL; + int inlen, tablen, dellen = 0; + PyObject *result; + int trans_table[256]; + + if (!PyArg_ParseTuple(args, "t#|t#", + &table1, &tablen, &del_table, &dellen)) + return NULL; + if (tablen != 256) { + PyErr_SetString(PyExc_ValueError, + "translation table must be 256 characters long"); + return NULL; + } + + table = table1; + inlen = PyString_Size(input_obj); + result = PyString_FromStringAndSize((char *)NULL, inlen); + if (result == NULL) + return NULL; + output_start = output = PyString_AsString(result); + input = PyString_AsString(input_obj); + + if (dellen == 0) { + /* If no deletions are required, use faster code */ + for (i = inlen; --i >= 0; ) { + c = Py_CHARMASK(*input++); + if (Py_CHARMASK((*output++ = table[c])) != c) + changed = 1; + } + if (changed) + return result; + Py_DECREF(result); + Py_INCREF(input_obj); + return input_obj; + } + + for (i = 0; i < 256; i++) + trans_table[i] = Py_CHARMASK(table[i]); + + for (i = 0; i < dellen; i++) + trans_table[(int) Py_CHARMASK(del_table[i])] = -1; + + for (i = inlen; --i >= 0; ) { + c = Py_CHARMASK(*input++); + if (trans_table[c] != -1) + if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) + continue; + changed = 1; + } + if (!changed) { + Py_DECREF(result); + Py_INCREF(input_obj); + return input_obj; + } + /* Fix the size of the resulting string */ + if (inlen > 0 &&_PyString_Resize(&result, output-output_start)) + return NULL; + return result; +} + + +/* What follows is used for implementing replace(). Perry Stoll. */ + +/* + mymemfind + + strstr replacement for arbitrary blocks of memory. + + Locates the first occurance in the memory pointed to by MEM of the + contents of memory pointed to by PAT. Returns the index into MEM if + found, or -1 if not found. If len of PAT is greater than length of + MEM, the function returns -1. +*/ +static int +mymemfind(mem, len, pat, pat_len) + char *mem; + int len; + char *pat; + int pat_len; +{ + register int ii; + + /* pattern can not occur in the last pat_len-1 chars */ + len -= pat_len; + + for (ii = 0; ii <= len; ii++) { + if (mem[ii] == pat[0] && + (pat_len == 1 || + memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) { + return ii; + } + } + return -1; +} + +/* + mymemcnt + + Return the number of distinct times PAT is found in MEM. + meaning mem=1111 and pat==11 returns 2. + mem=11111 and pat==11 also return 2. + */ +static int +mymemcnt(mem, len, pat, pat_len) + char *mem; + int len; + char *pat; + int pat_len; +{ + register int offset = 0; + int nfound = 0; + + while (len >= 0) { + offset = mymemfind(mem, len, pat, pat_len); + if (offset == -1) + break; + mem += offset + pat_len; + len -= offset + pat_len; + nfound++; + } + return nfound; +} + +/* + mymemreplace + + Return a string in which all occurences of PAT in memory STR are + replaced with SUB. + + If length of PAT is less than length of STR or there are no occurences + of PAT in STR, then the original string is returned. Otherwise, a new + string is allocated here and returned. + + on return, out_len is: + the length of output string, or + -1 if the input string is returned, or + unchanged if an error occurs (no memory). + + return value is: + the new string allocated locally, or + NULL if an error occurred. +*/ +static char * +mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len) + char *str; + int len; /* input string */ + char *pat; + int pat_len; /* pattern string to find */ + char *sub; + int sub_len; /* substitution string */ + int count; /* number of replacements, 0 == all */ + int *out_len; + +{ + char *out_s; + char *new_s; + int nfound, offset, new_len; + + if (len == 0 || pat_len > len) + goto return_same; + + /* find length of output string */ + nfound = mymemcnt(str, len, pat, pat_len); + if (count > 0) + nfound = nfound > count ? count : nfound; + if (nfound == 0) + goto return_same; + new_len = len + nfound*(sub_len - pat_len); + + new_s = (char *)malloc(new_len); + if (new_s == NULL) return NULL; + + *out_len = new_len; + out_s = new_s; + + while (len > 0) { + /* find index of next instance of pattern */ + offset = mymemfind(str, len, pat, pat_len); + /* if not found, break out of loop */ + if (offset == -1) break; + + /* copy non matching part of input string */ + memcpy(new_s, str, offset); /* copy part of str before pat */ + str += offset + pat_len; /* move str past pattern */ + len -= offset + pat_len; /* reduce length of str remaining */ + + /* copy substitute into the output string */ + new_s += offset; /* move new_s to dest for sub string */ + memcpy(new_s, sub, sub_len); /* copy substring into new_s */ + new_s += sub_len; /* offset new_s past sub string */ + + /* break when we've done count replacements */ + if (--count == 0) break; + } + /* copy any remaining values into output string */ + if (len > 0) + memcpy(new_s, str, len); + return out_s; + + return_same: + *out_len = -1; + return str; +} + + +static char replace__doc__[] = +"S.replace (old, new[, maxsplit]) -> string\n\ +\n\ +Return a copy of string S with all occurrences of substring\n\ +old replaced by new. If the optional argument maxsplit is\n\ +given, only the first maxsplit occurrences are replaced."; + +static PyObject * +string_replace(self, args) + PyStringObject *self; + PyObject *args; +{ + char *str = PyString_AS_STRING(self), *pat,*sub,*new_s; + int len = PyString_GET_SIZE(self), pat_len,sub_len,out_len; + int count = 0; + PyObject *new; + + if (!PyArg_ParseTuple(args, "t#t#|i", + &pat, &pat_len, &sub, &sub_len, &count)) + return NULL; + if (pat_len <= 0) { + PyErr_SetString(PyExc_ValueError, "empty pattern string"); + return NULL; + } + new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len); + if (new_s == NULL) { + PyErr_NoMemory(); + return NULL; + } + if (out_len == -1) { + /* we're returning another reference to self */ + new = (PyObject*)self; + Py_INCREF(new); + } + else { + new = PyString_FromStringAndSize(new_s, out_len); + free(new_s); + } + return new; +} + + +static char startswith__doc__[] = +"S.startswith(prefix[, start[, end]]) -> int\n\ +\n\ +Return 1 if S starts with the specified prefix, otherwise return 0. With\n\ +optional start, test S beginning at that position. With optional end, stop\n\ +comparing S at that position."; + +static PyObject * +string_startswith(self, args) + PyStringObject *self; + PyObject *args; +{ + char* str = PyString_AS_STRING(self); + int len = PyString_GET_SIZE(self); + char* prefix; + int plen; + int start = 0; + int end = -1; + + if (!PyArg_ParseTuple(args, "t#|ii", &prefix, &plen, &start, &end)) + return NULL; + + /* adopt Java semantics for index out of range. it is legal for + * offset to be == plen, but this only returns true if prefix is + * the empty string. + */ + if (start < 0 || start+plen > len) + return PyInt_FromLong(0); + + if (!memcmp(str+start, prefix, plen)) { + /* did the match end after the specified end? */ + if (end < 0) + return PyInt_FromLong(1); + else if (end - start < plen) + return PyInt_FromLong(0); + else + return PyInt_FromLong(1); + } + else return PyInt_FromLong(0); +} + + +static char endswith__doc__[] = +"S.endswith(suffix[, start[, end]]) -> int\n\ +\n\ +Return 1 if S ends with the specified suffix, otherwise return 0. With\n\ +optional start, test S beginning at that position. With optional end, stop\n\ +comparing S at that position."; + +static PyObject * +string_endswith(self, args) + PyStringObject *self; + PyObject *args; +{ + char* str = PyString_AS_STRING(self); + int len = PyString_GET_SIZE(self); + char* suffix; + int plen; + int start = 0; + int end = -1; + int lower, upper; + + if (!PyArg_ParseTuple(args, "t#|ii", &suffix, &plen, &start, &end)) + return NULL; + + if (start < 0 || start > len || plen > len) + return PyInt_FromLong(0); + + upper = (end >= 0 && end <= len) ? end : len; + lower = (upper - plen) > start ? (upper - plen) : start; + + if (upper-lower >= plen && !memcmp(str+lower, suffix, plen)) + return PyInt_FromLong(1); + else return PyInt_FromLong(0); +} + + + +static PyMethodDef +string_methods[] = { + /* counterparts of the obsolete stropmodule functions */ + {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__}, + {"count", (PyCFunction)string_count, 1, count__doc__}, + {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__}, + {"find", (PyCFunction)string_find, 1, find__doc__}, + {"index", (PyCFunction)string_index, 1, index__doc__}, + {"join", (PyCFunction)string_join, 1, join__doc__}, + {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__}, + {"lower", (PyCFunction)string_lower, 1, lower__doc__}, + /* maketrans */ + {"replace", (PyCFunction)string_replace, 1, replace__doc__}, + {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__}, + {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__}, + {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__}, + {"split", (PyCFunction)string_split, 1, split__doc__}, + {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__}, + {"strip", (PyCFunction)string_strip, 1, strip__doc__}, + {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__}, + {"translate", (PyCFunction)string_translate, 1, strip__doc__}, + {"upper", (PyCFunction)string_upper, 1, upper__doc__}, + /* TBD */ +/* {"ljust" (PyCFunction)string_ljust, 1, ljust__doc__}, */ +/* {"rjust" (PyCFunction)string_rjust, 1, rjust__doc__}, */ +/* {"center" (PyCFunction)string_center, 1, center__doc__}, */ +/* {"zfill" (PyCFunction)string_zfill, 1, zfill__doc__}, */ +/* {"expandtabs" (PyCFunction)string_expandtabs, 1, ljust__doc__}, */ +/* {"capwords" (PyCFunction)string_capwords, 1, capwords__doc__}, */ + {NULL, NULL} /* sentinel */ +}; + +static PyObject * +string_getattr(s, name) + PyStringObject *s; + char *name; +{ + return Py_FindMethod(string_methods, (PyObject*)s, name); +} + + PyTypeObject PyString_Type = { PyObject_HEAD_INIT(&PyType_Type) 0, @@ -533,7 +1576,7 @@ PyTypeObject PyString_Type = { sizeof(char), (destructor)string_dealloc, /*tp_dealloc*/ (printfunc)string_print, /*tp_print*/ - 0, /*tp_getattr*/ + (getattrfunc)string_getattr, /*tp_getattr*/ 0, /*tp_setattr*/ (cmpfunc)string_compare, /*tp_compare*/ (reprfunc)string_repr, /*tp_repr*/ diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 9bb8784..c220d841 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -812,24 +812,6 @@ globals and locals. If only globals is given, locals defaults to it."; static PyObject * -builtin_float(self, args) - PyObject *self; - PyObject *args; -{ - PyObject *v; - - if (!PyArg_ParseTuple(args, "O:float", &v)) - return NULL; - return PyNumber_Float(v); -} - -static char float_doc[] = -"float(x) -> floating point number\n\ -\n\ -Convert a string or number to a floating point number, if possible."; - - -static PyObject * builtin_getattr(self, args) PyObject *self; PyObject *args; @@ -1251,17 +1233,79 @@ builtin_int(self, args) PyObject *args; { PyObject *v; + int base = -909; /* unlikely! */ - if (!PyArg_ParseTuple(args, "O:int", &v)) + if (!PyArg_ParseTuple(args, "O|i:int", &v, &base)) return NULL; - return PyNumber_Int(v); + if (base == -909) + return PyNumber_Int(v); + else if (!PyString_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "can't convert non-string with explicit base"); + return NULL; + } + return PyInt_FromString(PyString_AS_STRING(v), NULL, base); } static char int_doc[] = -"int(x) -> integer\n\ +"int(x[, base]) -> integer\n\ +\n\ +Convert a string or number to an integer, if possible. A floating point\n\ +argument will be truncated towards zero (this does not include a string\n\ +representation of a floating point number!) When converting a string, use\n\ +the optional base. It is an error to supply a base when converting a\n\ +non-string."; + + +static PyObject * +builtin_long(self, args) + PyObject *self; + PyObject *args; +{ + PyObject *v; + int base = -909; /* unlikely! */ + + if (!PyArg_ParseTuple(args, "O|i:long", &v, &base)) + return NULL; + if (base == -909) + return PyNumber_Long(v); + else if (!PyString_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "can't convert non-string with explicit base"); + return NULL; + } + return PyLong_FromString(PyString_AS_STRING(v), NULL, base); +} + +static char long_doc[] = +"long(x) -> long integer\n\ +long(x, base) -> long integer\n\ +\n\ +Convert a string or number to a long integer, if possible. A floating\n\ +point argument will be truncated towards zero (this does not include a\n\ +string representation of a floating point number!) When converting a\n\ +string, use the given base. It is an error to supply a base when\n\ +converting a non-string."; + + +static PyObject * +builtin_float(self, args) + PyObject *self; + PyObject *args; +{ + PyObject *v; + + if (!PyArg_ParseTuple(args, "O:float", &v)) + return NULL; + if (PyString_Check(v)) + return PyFloat_FromString(v, NULL); + return PyNumber_Float(v); +} + +static char float_doc[] = +"float(x) -> floating point number\n\ \n\ -Convert a string or number to an integer, if possible.\n\ -A floating point argument will be truncated towards zero."; +Convert a string or number to a floating point number, if possible."; static PyObject * @@ -1352,25 +1396,6 @@ Return the dictionary containing the current scope's local variables."; static PyObject * -builtin_long(self, args) - PyObject *self; - PyObject *args; -{ - PyObject *v; - - if (!PyArg_ParseTuple(args, "O:long", &v)) - return NULL; - return PyNumber_Long(v); -} - -static char long_doc[] = -"long(x) -> long integer\n\ -\n\ -Convert a string or number to a long integer, if possible.\n\ -A floating point argument will be truncated towards zero."; - - -static PyObject * min_max(args, sign) PyObject *args; int sign; |