diff options
author | Gregory P. Smith <greg@mad-scientist.com> | 2007-10-16 06:31:30 (GMT) |
---|---|---|
committer | Gregory P. Smith <greg@mad-scientist.com> | 2007-10-16 06:31:30 (GMT) |
commit | 60d241f135f10312f5a638846659d7e471f6cac9 (patch) | |
tree | 620abe8a5e99620cec51a3476ea96c2459956e70 /Objects/stringlib | |
parent | 3d2fd7f923c35fe61c0f26f2ae150d73a42859b9 (diff) | |
download | cpython-60d241f135f10312f5a638846659d7e471f6cac9.zip cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.gz cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.bz2 |
For PEP3137: Adds missing methods to the mutable PyBytes object (soon
to be called a buffer). Shares code with stringobject when possible.
Adds unit tests with common code that should be usable to test the PEPs
mutable buffer() and immutable bytes() types.
http://bugs.python.org/issue1261
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/README.txt | 9 | ||||
-rw-r--r-- | Objects/stringlib/ctype.h | 110 | ||||
-rw-r--r-- | Objects/stringlib/find.h | 2 | ||||
-rw-r--r-- | Objects/stringlib/transmogrify.h | 362 | ||||
-rw-r--r-- | Objects/stringlib/unicodedefs.h | 2 |
5 files changed, 484 insertions, 1 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt index 82a8774..aec3441 100644 --- a/Objects/stringlib/README.txt +++ b/Objects/stringlib/README.txt @@ -32,3 +32,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*) returns the pointer to the character data for the given string object (which must be of the right type) + +int STRINGLIB_CHECK_EXACT(PyObject *) + + returns true if the object is an instance of our type, not a subclass. + +STRINGLIB_MUTABLE + + Must be 0 or 1 to tell the cpp macros in stringlib code if the object + being operated on is mutable or not. diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h new file mode 100644 index 0000000..8951276 --- /dev/null +++ b/Objects/stringlib/ctype.h @@ -0,0 +1,110 @@ +/* NOTE: this API is -ONLY- for use with single byte character strings. */ +/* Do not use it with Unicode. */ + +#include "bytes_methods.h" + +static PyObject* +stringlib_isspace(PyObject *self) +{ + return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isalpha(PyObject *self) +{ + return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isalnum(PyObject *self) +{ + return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isdigit(PyObject *self) +{ + return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_islower(PyObject *self) +{ + return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isupper(PyObject *self) +{ + return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_istitle(PyObject *self) +{ + return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + + +/* functions that return a new object partially translated by ctype funcs: */ + +static PyObject* +stringlib_lower(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_upper(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_title(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_capitalize(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_swapcase(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h index 4cdbb09..3b924b6 100644 --- a/Objects/stringlib/find.h +++ b/Objects/stringlib/find.h @@ -90,7 +90,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, return stringlib_rfind(str + start, end - start, sub, sub_len, start); } -#ifdef STRINGLIB_STR +#ifdef STRINGLIB_WANT_CONTAINS_OBJ Py_LOCAL_INLINE(int) stringlib_contains_obj(PyObject* str, PyObject* sub) diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h new file mode 100644 index 0000000..1ee8e75 --- /dev/null +++ b/Objects/stringlib/transmogrify.h @@ -0,0 +1,362 @@ +/* NOTE: this API is -ONLY- for use with single byte character strings. */ +/* Do not use it with Unicode. */ + +#include "bytes_methods.h" + +#ifndef STRINGLIB_MUTABLE +#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0" +#define STRINGLIB_MUTABLE 0 +#endif + +/* the more complicated methods. parts of these should be pulled out into the + shared code in bytes_methods.c to cut down on duplicate code bloat. */ + +PyDoc_STRVAR(expandtabs__doc__, +"B.expandtabs([tabsize]) -> modified copy of B\n\ +\n\ +Return a copy of B where all tab characters are expanded using spaces.\n\ +If tabsize is not given, a tab size of 8 characters is assumed."); + +static PyObject* +stringlib_expandtabs(PyObject *self, PyObject *args) +{ + const char *e, *p; + char *q; + Py_ssize_t i, j, old_j; + PyObject *u; + int tabsize = 8; + + if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + return NULL; + + /* First pass: determine size of output string */ + i = j = old_j = 0; + e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); + for (p = STRINGLIB_STR(self); p < e; p++) + if (*p == '\t') { + if (tabsize > 0) { + j += tabsize - (j % tabsize); + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + if (old_j > j) { + PyErr_SetString(PyExc_OverflowError, + "result is too long"); + return NULL; + } + old_j = j; + } + } + else { + j++; + if (*p == '\n' || *p == '\r') { + i += j; + old_j = j = 0; + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + if (i < 0) { + PyErr_SetString(PyExc_OverflowError, + "result is too long"); + return NULL; + } + } + } + + if ((i + j) < 0) { + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + PyErr_SetString(PyExc_OverflowError, "result is too long"); + return NULL; + } + + /* Second pass: create output string and fill it */ + u = STRINGLIB_NEW(NULL, i + j); + if (!u) + return NULL; + + j = 0; + q = STRINGLIB_STR(u); + + for (p = STRINGLIB_STR(self); p < e; p++) + if (*p == '\t') { + if (tabsize > 0) { + i = tabsize - (j % tabsize); + j += i; + while (i--) + *q++ = ' '; + } + } + else { + j++; + *q++ = *p; + if (*p == '\n' || *p == '\r') + j = 0; + } + + return u; +} + +Py_LOCAL_INLINE(PyObject *) +pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) +{ + PyObject *u; + + if (left < 0) + left = 0; + if (right < 0) + right = 0; + + if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject *)self; +#endif /* STRINGLIB_MUTABLE */ + } + + u = STRINGLIB_NEW(NULL, + left + STRINGLIB_LEN(self) + right); + if (u) { + if (left) + memset(STRINGLIB_STR(u), fill, left); + Py_MEMCPY(STRINGLIB_STR(u) + left, + STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + if (right) + memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), + fill, right); + } + + return u; +} + +PyDoc_STRVAR(ljust__doc__, +"B.ljust(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B left justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); + +static PyObject * +stringlib_ljust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); +} + + +PyDoc_STRVAR(rjust__doc__, +"B.rjust(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B right justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +static PyObject * +stringlib_rjust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); +} + + +PyDoc_STRVAR(center__doc__, +"B.center(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B centered in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +static PyObject * +stringlib_center(PyObject *self, PyObject *args) +{ + Py_ssize_t marg, left; + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + marg = width - STRINGLIB_LEN(self); + left = marg / 2 + (marg & width & 1); + + return pad(self, left, marg - left, fillchar); +} + +PyDoc_STRVAR(zfill__doc__, +"B.zfill(width) -> modified copy of B\n" +"\n" +"Pad a numeric string B with zeros on the left, to fill a field\n" +"of the specified width. B is never truncated."); + +static PyObject * +stringlib_zfill(PyObject *self, PyObject *args) +{ + Py_ssize_t fill; + PyObject *s; + char *p; + Py_ssize_t width; + + if (!PyArg_ParseTuple(args, "n:zfill", &width)) + return NULL; + + if (STRINGLIB_LEN(self) >= width) { + if (STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + else + return STRINGLIB_NEW( + STRINGLIB_STR(self), + STRINGLIB_LEN(self) + ); + } + + fill = width - STRINGLIB_LEN(self); + + s = pad(self, fill, 0, '0'); + + if (s == NULL) + return NULL; + + p = STRINGLIB_STR(s); + if (p[fill] == '+' || p[fill] == '-') { + /* move sign to beginning of string */ + p[0] = p[fill]; + p[fill] = '0'; + } + + return (PyObject*) s; +} + + +#define _STRINGLIB_SPLIT_APPEND(data, left, right) \ + str = STRINGLIB_NEW((data) + (left), \ + (right) - (left)); \ + if (str == NULL) \ + goto onError; \ + if (PyList_Append(list, str)) { \ + Py_DECREF(str); \ + goto onError; \ + } \ + else \ + Py_DECREF(str); + +PyDoc_STRVAR(splitlines__doc__, +"B.splitlines([keepends]) -> list of lines\n\ +\n\ +Return a list of the lines in B, breaking at line boundaries.\n\ +Line breaks are not included in the resulting list unless keepends\n\ +is given and true."); + +static PyObject* +stringlib_splitlines(PyObject *self, PyObject *args) +{ + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len; + int keepends = 0; + PyObject *list; + PyObject *str; + char *data; + + if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) + return NULL; + + data = STRINGLIB_STR(self); + len = STRINGLIB_LEN(self); + + /* This does not use the preallocated list because splitlines is + usually run with hundreds of newlines. The overhead of + switching between PyList_SET_ITEM and append causes about a + 2-3% slowdown for that common case. A smarter implementation + could move the if check out, so the SET_ITEMs are done first + and the appends only done when the prealloc buffer is full. + That's too much work for little gain.*/ + + list = PyList_New(0); + if (!list) + goto onError; + + for (i = j = 0; i < len; ) { + Py_ssize_t eol; + + /* Find a line and append it */ + while (i < len && data[i] != '\n' && data[i] != '\r') + i++; + + /* Skip the line break reading CRLF as one line break */ + eol = i; + if (i < len) { + if (data[i] == '\r' && i + 1 < len && + data[i+1] == '\n') + i += 2; + else + i++; + if (keepends) + eol = i; + } + _STRINGLIB_SPLIT_APPEND(data, j, eol); + j = i; + } + if (j < len) { + _STRINGLIB_SPLIT_APPEND(data, j, len); + } + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +#undef _STRINGLIB_SPLIT_APPEND + diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index 25c1d4f..fa6140f 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -22,6 +22,8 @@ #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_TOSTR PyObject_Unicode +#define STRINGLIB_WANT_CONTAINS_OBJ 1 + /* STRINGLIB_CMP was defined as: Py_LOCAL_INLINE(int) |