diff options
Diffstat (limited to 'Objects/stringlib/transmogrify.h')
-rw-r--r-- | Objects/stringlib/transmogrify.h | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h new file mode 100644 index 0000000..1ee8e75 --- /dev/null +++ b/Objects/stringlib/transmogrify.h @@ -0,0 +1,362 @@ +/* NOTE: this API is -ONLY- for use with single byte character strings. */ +/* Do not use it with Unicode. */ + +#include "bytes_methods.h" + +#ifndef STRINGLIB_MUTABLE +#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0" +#define STRINGLIB_MUTABLE 0 +#endif + +/* the more complicated methods. parts of these should be pulled out into the + shared code in bytes_methods.c to cut down on duplicate code bloat. */ + +PyDoc_STRVAR(expandtabs__doc__, +"B.expandtabs([tabsize]) -> modified copy of B\n\ +\n\ +Return a copy of B where all tab characters are expanded using spaces.\n\ +If tabsize is not given, a tab size of 8 characters is assumed."); + +static PyObject* +stringlib_expandtabs(PyObject *self, PyObject *args) +{ + const char *e, *p; + char *q; + Py_ssize_t i, j, old_j; + PyObject *u; + int tabsize = 8; + + if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + return NULL; + + /* First pass: determine size of output string */ + i = j = old_j = 0; + e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); + for (p = STRINGLIB_STR(self); p < e; p++) + if (*p == '\t') { + if (tabsize > 0) { + j += tabsize - (j % tabsize); + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + if (old_j > j) { + PyErr_SetString(PyExc_OverflowError, + "result is too long"); + return NULL; + } + old_j = j; + } + } + else { + j++; + if (*p == '\n' || *p == '\r') { + i += j; + old_j = j = 0; + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + if (i < 0) { + PyErr_SetString(PyExc_OverflowError, + "result is too long"); + return NULL; + } + } + } + + if ((i + j) < 0) { + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + PyErr_SetString(PyExc_OverflowError, "result is too long"); + return NULL; + } + + /* Second pass: create output string and fill it */ + u = STRINGLIB_NEW(NULL, i + j); + if (!u) + return NULL; + + j = 0; + q = STRINGLIB_STR(u); + + for (p = STRINGLIB_STR(self); p < e; p++) + if (*p == '\t') { + if (tabsize > 0) { + i = tabsize - (j % tabsize); + j += i; + while (i--) + *q++ = ' '; + } + } + else { + j++; + *q++ = *p; + if (*p == '\n' || *p == '\r') + j = 0; + } + + return u; +} + +Py_LOCAL_INLINE(PyObject *) +pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) +{ + PyObject *u; + + if (left < 0) + left = 0; + if (right < 0) + right = 0; + + if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject *)self; +#endif /* STRINGLIB_MUTABLE */ + } + + u = STRINGLIB_NEW(NULL, + left + STRINGLIB_LEN(self) + right); + if (u) { + if (left) + memset(STRINGLIB_STR(u), fill, left); + Py_MEMCPY(STRINGLIB_STR(u) + left, + STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + if (right) + memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), + fill, right); + } + + return u; +} + +PyDoc_STRVAR(ljust__doc__, +"B.ljust(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B left justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); + +static PyObject * +stringlib_ljust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); +} + + +PyDoc_STRVAR(rjust__doc__, +"B.rjust(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B right justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +static PyObject * +stringlib_rjust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); +} + + +PyDoc_STRVAR(center__doc__, +"B.center(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B centered in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +static PyObject * +stringlib_center(PyObject *self, PyObject *args) +{ + Py_ssize_t marg, left; + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + marg = width - STRINGLIB_LEN(self); + left = marg / 2 + (marg & width & 1); + + return pad(self, left, marg - left, fillchar); +} + +PyDoc_STRVAR(zfill__doc__, +"B.zfill(width) -> modified copy of B\n" +"\n" +"Pad a numeric string B with zeros on the left, to fill a field\n" +"of the specified width. B is never truncated."); + +static PyObject * +stringlib_zfill(PyObject *self, PyObject *args) +{ + Py_ssize_t fill; + PyObject *s; + char *p; + Py_ssize_t width; + + if (!PyArg_ParseTuple(args, "n:zfill", &width)) + return NULL; + + if (STRINGLIB_LEN(self) >= width) { + if (STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + else + return STRINGLIB_NEW( + STRINGLIB_STR(self), + STRINGLIB_LEN(self) + ); + } + + fill = width - STRINGLIB_LEN(self); + + s = pad(self, fill, 0, '0'); + + if (s == NULL) + return NULL; + + p = STRINGLIB_STR(s); + if (p[fill] == '+' || p[fill] == '-') { + /* move sign to beginning of string */ + p[0] = p[fill]; + p[fill] = '0'; + } + + return (PyObject*) s; +} + + +#define _STRINGLIB_SPLIT_APPEND(data, left, right) \ + str = STRINGLIB_NEW((data) + (left), \ + (right) - (left)); \ + if (str == NULL) \ + goto onError; \ + if (PyList_Append(list, str)) { \ + Py_DECREF(str); \ + goto onError; \ + } \ + else \ + Py_DECREF(str); + +PyDoc_STRVAR(splitlines__doc__, +"B.splitlines([keepends]) -> list of lines\n\ +\n\ +Return a list of the lines in B, breaking at line boundaries.\n\ +Line breaks are not included in the resulting list unless keepends\n\ +is given and true."); + +static PyObject* +stringlib_splitlines(PyObject *self, PyObject *args) +{ + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len; + int keepends = 0; + PyObject *list; + PyObject *str; + char *data; + + if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) + return NULL; + + data = STRINGLIB_STR(self); + len = STRINGLIB_LEN(self); + + /* This does not use the preallocated list because splitlines is + usually run with hundreds of newlines. The overhead of + switching between PyList_SET_ITEM and append causes about a + 2-3% slowdown for that common case. A smarter implementation + could move the if check out, so the SET_ITEMs are done first + and the appends only done when the prealloc buffer is full. + That's too much work for little gain.*/ + + list = PyList_New(0); + if (!list) + goto onError; + + for (i = j = 0; i < len; ) { + Py_ssize_t eol; + + /* Find a line and append it */ + while (i < len && data[i] != '\n' && data[i] != '\r') + i++; + + /* Skip the line break reading CRLF as one line break */ + eol = i; + if (i < len) { + if (data[i] == '\r' && i + 1 < len && + data[i+1] == '\n') + i += 2; + else + i++; + if (keepends) + eol = i; + } + _STRINGLIB_SPLIT_APPEND(data, j, eol); + j = i; + } + if (j < len) { + _STRINGLIB_SPLIT_APPEND(data, j, len); + } + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +#undef _STRINGLIB_SPLIT_APPEND + |