summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
authorGregory P. Smith <greg@mad-scientist.com>2007-10-16 06:31:30 (GMT)
committerGregory P. Smith <greg@mad-scientist.com>2007-10-16 06:31:30 (GMT)
commit60d241f135f10312f5a638846659d7e471f6cac9 (patch)
tree620abe8a5e99620cec51a3476ea96c2459956e70 /Objects/stringlib
parent3d2fd7f923c35fe61c0f26f2ae150d73a42859b9 (diff)
downloadcpython-60d241f135f10312f5a638846659d7e471f6cac9.zip
cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.gz
cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.bz2
For PEP3137: Adds missing methods to the mutable PyBytes object (soon
to be called a buffer). Shares code with stringobject when possible. Adds unit tests with common code that should be usable to test the PEPs mutable buffer() and immutable bytes() types. http://bugs.python.org/issue1261
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/README.txt9
-rw-r--r--Objects/stringlib/ctype.h110
-rw-r--r--Objects/stringlib/find.h2
-rw-r--r--Objects/stringlib/transmogrify.h362
-rw-r--r--Objects/stringlib/unicodedefs.h2
5 files changed, 484 insertions, 1 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index 82a8774..aec3441 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -32,3 +32,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
returns the pointer to the character data for the given string
object (which must be of the right type)
+
+int STRINGLIB_CHECK_EXACT(PyObject *)
+
+ returns true if the object is an instance of our type, not a subclass.
+
+STRINGLIB_MUTABLE
+
+ Must be 0 or 1 to tell the cpp macros in stringlib code if the object
+ being operated on is mutable or not.
diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h
new file mode 100644
index 0000000..8951276
--- /dev/null
+++ b/Objects/stringlib/ctype.h
@@ -0,0 +1,110 @@
+/* NOTE: this API is -ONLY- for use with single byte character strings. */
+/* Do not use it with Unicode. */
+
+#include "bytes_methods.h"
+
+static PyObject*
+stringlib_isspace(PyObject *self)
+{
+ return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isalpha(PyObject *self)
+{
+ return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isalnum(PyObject *self)
+{
+ return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isdigit(PyObject *self)
+{
+ return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_islower(PyObject *self)
+{
+ return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isupper(PyObject *self)
+{
+ return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_istitle(PyObject *self)
+{
+ return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+
+/* functions that return a new object partially translated by ctype funcs: */
+
+static PyObject*
+stringlib_lower(PyObject *self)
+{
+ PyObject* newobj;
+ newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+ if (!newobj)
+ return NULL;
+ _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
+ return newobj;
+}
+
+static PyObject*
+stringlib_upper(PyObject *self)
+{
+ PyObject* newobj;
+ newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+ if (!newobj)
+ return NULL;
+ _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
+ return newobj;
+}
+
+static PyObject*
+stringlib_title(PyObject *self)
+{
+ PyObject* newobj;
+ newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+ if (!newobj)
+ return NULL;
+ _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
+ return newobj;
+}
+
+static PyObject*
+stringlib_capitalize(PyObject *self)
+{
+ PyObject* newobj;
+ newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+ if (!newobj)
+ return NULL;
+ _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
+ return newobj;
+}
+
+static PyObject*
+stringlib_swapcase(PyObject *self)
+{
+ PyObject* newobj;
+ newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+ if (!newobj)
+ return NULL;
+ _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
+ return newobj;
+}
+
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index 4cdbb09..3b924b6 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -90,7 +90,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return stringlib_rfind(str + start, end - start, sub, sub_len, start);
}
-#ifdef STRINGLIB_STR
+#ifdef STRINGLIB_WANT_CONTAINS_OBJ
Py_LOCAL_INLINE(int)
stringlib_contains_obj(PyObject* str, PyObject* sub)
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
new file mode 100644
index 0000000..1ee8e75
--- /dev/null
+++ b/Objects/stringlib/transmogrify.h
@@ -0,0 +1,362 @@
+/* NOTE: this API is -ONLY- for use with single byte character strings. */
+/* Do not use it with Unicode. */
+
+#include "bytes_methods.h"
+
+#ifndef STRINGLIB_MUTABLE
+#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
+#define STRINGLIB_MUTABLE 0
+#endif
+
+/* the more complicated methods. parts of these should be pulled out into the
+ shared code in bytes_methods.c to cut down on duplicate code bloat. */
+
+PyDoc_STRVAR(expandtabs__doc__,
+"B.expandtabs([tabsize]) -> modified copy of B\n\
+\n\
+Return a copy of B where all tab characters are expanded using spaces.\n\
+If tabsize is not given, a tab size of 8 characters is assumed.");
+
+static PyObject*
+stringlib_expandtabs(PyObject *self, PyObject *args)
+{
+ const char *e, *p;
+ char *q;
+ Py_ssize_t i, j, old_j;
+ PyObject *u;
+ int tabsize = 8;
+
+ if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+ return NULL;
+
+ /* First pass: determine size of output string */
+ i = j = old_j = 0;
+ e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
+ for (p = STRINGLIB_STR(self); p < e; p++)
+ if (*p == '\t') {
+ if (tabsize > 0) {
+ j += tabsize - (j % tabsize);
+ /* XXX: this depends on a signed integer overflow to < 0 */
+ /* C compilers, including gcc, do -NOT- guarantee this. */
+ if (old_j > j) {
+ PyErr_SetString(PyExc_OverflowError,
+ "result is too long");
+ return NULL;
+ }
+ old_j = j;
+ }
+ }
+ else {
+ j++;
+ if (*p == '\n' || *p == '\r') {
+ i += j;
+ old_j = j = 0;
+ /* XXX: this depends on a signed integer overflow to < 0 */
+ /* C compilers, including gcc, do -NOT- guarantee this. */
+ if (i < 0) {
+ PyErr_SetString(PyExc_OverflowError,
+ "result is too long");
+ return NULL;
+ }
+ }
+ }
+
+ if ((i + j) < 0) {
+ /* XXX: this depends on a signed integer overflow to < 0 */
+ /* C compilers, including gcc, do -NOT- guarantee this. */
+ PyErr_SetString(PyExc_OverflowError, "result is too long");
+ return NULL;
+ }
+
+ /* Second pass: create output string and fill it */
+ u = STRINGLIB_NEW(NULL, i + j);
+ if (!u)
+ return NULL;
+
+ j = 0;
+ q = STRINGLIB_STR(u);
+
+ for (p = STRINGLIB_STR(self); p < e; p++)
+ if (*p == '\t') {
+ if (tabsize > 0) {
+ i = tabsize - (j % tabsize);
+ j += i;
+ while (i--)
+ *q++ = ' ';
+ }
+ }
+ else {
+ j++;
+ *q++ = *p;
+ if (*p == '\n' || *p == '\r')
+ j = 0;
+ }
+
+ return u;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
+{
+ PyObject *u;
+
+ if (left < 0)
+ left = 0;
+ if (right < 0)
+ right = 0;
+
+ if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject *)self;
+#endif /* STRINGLIB_MUTABLE */
+ }
+
+ u = STRINGLIB_NEW(NULL,
+ left + STRINGLIB_LEN(self) + right);
+ if (u) {
+ if (left)
+ memset(STRINGLIB_STR(u), fill, left);
+ Py_MEMCPY(STRINGLIB_STR(u) + left,
+ STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
+ if (right)
+ memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
+ fill, right);
+ }
+
+ return u;
+}
+
+PyDoc_STRVAR(ljust__doc__,
+"B.ljust(width[, fillchar]) -> modified copy of B\n"
+"\n"
+"Return B left justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
+
+static PyObject *
+stringlib_ljust(PyObject *self, PyObject *args)
+{
+ Py_ssize_t width;
+ char fillchar = ' ';
+
+ if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
+ return NULL;
+
+ if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
+ }
+
+ return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
+}
+
+
+PyDoc_STRVAR(rjust__doc__,
+"B.rjust(width[, fillchar]) -> modified copy of B\n"
+"\n"
+"Return B right justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space)");
+
+static PyObject *
+stringlib_rjust(PyObject *self, PyObject *args)
+{
+ Py_ssize_t width;
+ char fillchar = ' ';
+
+ if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
+ return NULL;
+
+ if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
+ }
+
+ return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
+}
+
+
+PyDoc_STRVAR(center__doc__,
+"B.center(width[, fillchar]) -> modified copy of B\n"
+"\n"
+"Return B centered in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space)");
+
+static PyObject *
+stringlib_center(PyObject *self, PyObject *args)
+{
+ Py_ssize_t marg, left;
+ Py_ssize_t width;
+ char fillchar = ' ';
+
+ if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
+ return NULL;
+
+ if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
+ }
+
+ marg = width - STRINGLIB_LEN(self);
+ left = marg / 2 + (marg & width & 1);
+
+ return pad(self, left, marg - left, fillchar);
+}
+
+PyDoc_STRVAR(zfill__doc__,
+"B.zfill(width) -> modified copy of B\n"
+"\n"
+"Pad a numeric string B with zeros on the left, to fill a field\n"
+"of the specified width. B is never truncated.");
+
+static PyObject *
+stringlib_zfill(PyObject *self, PyObject *args)
+{
+ Py_ssize_t fill;
+ PyObject *s;
+ char *p;
+ Py_ssize_t width;
+
+ if (!PyArg_ParseTuple(args, "n:zfill", &width))
+ return NULL;
+
+ if (STRINGLIB_LEN(self) >= width) {
+ if (STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
+ }
+ else
+ return STRINGLIB_NEW(
+ STRINGLIB_STR(self),
+ STRINGLIB_LEN(self)
+ );
+ }
+
+ fill = width - STRINGLIB_LEN(self);
+
+ s = pad(self, fill, 0, '0');
+
+ if (s == NULL)
+ return NULL;
+
+ p = STRINGLIB_STR(s);
+ if (p[fill] == '+' || p[fill] == '-') {
+ /* move sign to beginning of string */
+ p[0] = p[fill];
+ p[fill] = '0';
+ }
+
+ return (PyObject*) s;
+}
+
+
+#define _STRINGLIB_SPLIT_APPEND(data, left, right) \
+ str = STRINGLIB_NEW((data) + (left), \
+ (right) - (left)); \
+ if (str == NULL) \
+ goto onError; \
+ if (PyList_Append(list, str)) { \
+ Py_DECREF(str); \
+ goto onError; \
+ } \
+ else \
+ Py_DECREF(str);
+
+PyDoc_STRVAR(splitlines__doc__,
+"B.splitlines([keepends]) -> list of lines\n\
+\n\
+Return a list of the lines in B, breaking at line boundaries.\n\
+Line breaks are not included in the resulting list unless keepends\n\
+is given and true.");
+
+static PyObject*
+stringlib_splitlines(PyObject *self, PyObject *args)
+{
+ register Py_ssize_t i;
+ register Py_ssize_t j;
+ Py_ssize_t len;
+ int keepends = 0;
+ PyObject *list;
+ PyObject *str;
+ char *data;
+
+ if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
+ return NULL;
+
+ data = STRINGLIB_STR(self);
+ len = STRINGLIB_LEN(self);
+
+ /* This does not use the preallocated list because splitlines is
+ usually run with hundreds of newlines. The overhead of
+ switching between PyList_SET_ITEM and append causes about a
+ 2-3% slowdown for that common case. A smarter implementation
+ could move the if check out, so the SET_ITEMs are done first
+ and the appends only done when the prealloc buffer is full.
+ That's too much work for little gain.*/
+
+ list = PyList_New(0);
+ if (!list)
+ goto onError;
+
+ for (i = j = 0; i < len; ) {
+ Py_ssize_t eol;
+
+ /* Find a line and append it */
+ while (i < len && data[i] != '\n' && data[i] != '\r')
+ i++;
+
+ /* Skip the line break reading CRLF as one line break */
+ eol = i;
+ if (i < len) {
+ if (data[i] == '\r' && i + 1 < len &&
+ data[i+1] == '\n')
+ i += 2;
+ else
+ i++;
+ if (keepends)
+ eol = i;
+ }
+ _STRINGLIB_SPLIT_APPEND(data, j, eol);
+ j = i;
+ }
+ if (j < len) {
+ _STRINGLIB_SPLIT_APPEND(data, j, len);
+ }
+
+ return list;
+
+ onError:
+ Py_XDECREF(list);
+ return NULL;
+}
+
+#undef _STRINGLIB_SPLIT_APPEND
+
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index 25c1d4f..fa6140f 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -22,6 +22,8 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_TOSTR PyObject_Unicode
+#define STRINGLIB_WANT_CONTAINS_OBJ 1
+
/* STRINGLIB_CMP was defined as:
Py_LOCAL_INLINE(int)