For PEP3137: Adds missing methods to the mutable PyBytes object (soon

to be called a buffer). Shares code with stringobject when possible. Adds unit tests with common code that should be usable to test the PEPs mutable buffer() and immutable bytes() types. http://bugs.python.org/issue1261
author: Gregory P. Smith <greg@mad-scientist.com> 2007-10-16 06:31:30 (GMT)
committer: Gregory P. Smith <greg@mad-scientist.com> 2007-10-16 06:31:30 (GMT)
commit: 60d241f135f10312f5a638846659d7e471f6cac9 (patch)
tree: 620abe8a5e99620cec51a3476ea96c2459956e70 /Objects/stringlib
parent: 3d2fd7f923c35fe61c0f26f2ae150d73a42859b9 (diff)
download: cpython-60d241f135f10312f5a638846659d7e471f6cac9.zip
cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.gz
cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.bz2
5 files changed, 484 insertions, 1 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index 82a8774..aec3441 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -32,3 +32,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
 
     returns the pointer to the character data for the given string
     object (which must be of the right type)
+
+int STRINGLIB_CHECK_EXACT(PyObject *)
+
+    returns true if the object is an instance of our type, not a subclass.
+
+STRINGLIB_MUTABLE
+
+    Must be 0 or 1 to tell the cpp macros in stringlib code if the object
+    being operated on is mutable or not.
diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h
new file mode 100644
index 0000000..8951276
--- /dev/null
+++ b/Objects/stringlib/ctype.h
@@ -0,0 +1,110 @@
+/* NOTE: this API is -ONLY- for use with single byte character strings. */
+/* Do not use it with Unicode. */
+
+#include "bytes_methods.h"
+
+static PyObject*
+stringlib_isspace(PyObject *self)
+{
+    return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isalpha(PyObject *self)
+{
+    return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isalnum(PyObject *self)
+{
+    return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isdigit(PyObject *self)
+{
+    return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_islower(PyObject *self)
+{
+    return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isupper(PyObject *self)
+{
+    return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_istitle(PyObject *self)
+{
+    return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+
+/* functions that return a new object partially translated by ctype funcs: */
+
+static PyObject*
+stringlib_lower(PyObject *self)
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                 STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_upper(PyObject *self)
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                 STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_title(PyObject *self)
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                 STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_capitalize(PyObject *self)
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                      STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_swapcase(PyObject *self)
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                    STRINGLIB_LEN(self));
+    return newobj;
+}
+
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index 4cdbb09..3b924b6 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -90,7 +90,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
     return stringlib_rfind(str + start, end - start, sub, sub_len, start);
 }
 
-#ifdef STRINGLIB_STR
+#ifdef STRINGLIB_WANT_CONTAINS_OBJ
 
 Py_LOCAL_INLINE(int)
 stringlib_contains_obj(PyObject* str, PyObject* sub)
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
new file mode 100644
index 0000000..1ee8e75
--- /dev/null
+++ b/Objects/stringlib/transmogrify.h
@@ -0,0 +1,362 @@
+/* NOTE: this API is -ONLY- for use with single byte character strings. */
+/* Do not use it with Unicode. */
+
+#include "bytes_methods.h"
+
+#ifndef STRINGLIB_MUTABLE
+#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
+#define STRINGLIB_MUTABLE 0
+#endif
+
+/* the more complicated methods.  parts of these should be pulled out into the
+   shared code in bytes_methods.c to cut down on duplicate code bloat.  */
+
+PyDoc_STRVAR(expandtabs__doc__,
+"B.expandtabs([tabsize]) -> modified copy of B\n\
+\n\
+Return a copy of B where all tab characters are expanded using spaces.\n\
+If tabsize is not given, a tab size of 8 characters is assumed.");
+
+static PyObject*
+stringlib_expandtabs(PyObject *self, PyObject *args)
+{
+    const char *e, *p;
+    char *q;
+    Py_ssize_t i, j, old_j;
+    PyObject *u;
+    int tabsize = 8;
+
+    if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+	return NULL;
+
+    /* First pass: determine size of output string */
+    i = j = old_j = 0;
+    e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
+    for (p = STRINGLIB_STR(self); p < e; p++)
+        if (*p == '\t') {
+	    if (tabsize > 0) {
+		j += tabsize - (j % tabsize);
+                /* XXX: this depends on a signed integer overflow to < 0 */
+                /* C compilers, including gcc, do -NOT- guarantee this. */
+		if (old_j > j) {
+		    PyErr_SetString(PyExc_OverflowError,
+				    "result is too long");
+		    return NULL;
+		}
+		old_j = j;
+            }
+	}
+        else {
+            j++;
+            if (*p == '\n' || *p == '\r') {
+                i += j;
+                old_j = j = 0;
+                /* XXX: this depends on a signed integer overflow to < 0 */
+                /* C compilers, including gcc, do -NOT- guarantee this. */
+                if (i < 0) {
+                    PyErr_SetString(PyExc_OverflowError,
+                                    "result is too long");
+                    return NULL;
+                }
+            }
+        }
+
+    if ((i + j) < 0) {
+        /* XXX: this depends on a signed integer overflow to < 0 */
+        /* C compilers, including gcc, do -NOT- guarantee this. */
+        PyErr_SetString(PyExc_OverflowError, "result is too long");
+        return NULL;
+    }
+
+    /* Second pass: create output string and fill it */
+    u = STRINGLIB_NEW(NULL, i + j);
+    if (!u)
+        return NULL;
+
+    j = 0;
+    q = STRINGLIB_STR(u);
+
+    for (p = STRINGLIB_STR(self); p < e; p++)
+        if (*p == '\t') {
+	    if (tabsize > 0) {
+		i = tabsize - (j % tabsize);
+		j += i;
+		while (i--)
+		    *q++ = ' ';
+	    }
+	}
+	else {
+            j++;
+	    *q++ = *p;
+            if (*p == '\n' || *p == '\r')
+                j = 0;
+        }
+
+    return u;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
+{
+    PyObject *u;
+
+    if (left < 0)
+        left = 0;
+    if (right < 0)
+        right = 0;
+
+    if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject *)self;
+#endif /* STRINGLIB_MUTABLE */
+    }
+
+    u = STRINGLIB_NEW(NULL,
+				   left + STRINGLIB_LEN(self) + right);
+    if (u) {
+        if (left)
+            memset(STRINGLIB_STR(u), fill, left);
+        Py_MEMCPY(STRINGLIB_STR(u) + left,
+	       STRINGLIB_STR(self),
+	       STRINGLIB_LEN(self));
+        if (right)
+            memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
+		   fill, right);
+    }
+
+    return u;
+}
+
+PyDoc_STRVAR(ljust__doc__,
+"B.ljust(width[, fillchar]) -> modified copy of B\n"
+"\n"
+"Return B left justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
+
+static PyObject *
+stringlib_ljust(PyObject *self, PyObject *args)
+{
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject*) self;
+#endif
+    }
+
+    return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
+}
+
+
+PyDoc_STRVAR(rjust__doc__,
+"B.rjust(width[, fillchar]) -> modified copy of B\n"
+"\n"
+"Return B right justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space)");
+
+static PyObject *
+stringlib_rjust(PyObject *self, PyObject *args)
+{
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject*) self;
+#endif
+    }
+
+    return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
+}
+
+
+PyDoc_STRVAR(center__doc__,
+"B.center(width[, fillchar]) -> modified copy of B\n"
+"\n"
+"Return B centered in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space)");
+
+static PyObject *
+stringlib_center(PyObject *self, PyObject *args)
+{
+    Py_ssize_t marg, left;
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject*) self;
+#endif
+    }
+
+    marg = width - STRINGLIB_LEN(self);
+    left = marg / 2 + (marg & width & 1);
+
+    return pad(self, left, marg - left, fillchar);
+}
+
+PyDoc_STRVAR(zfill__doc__,
+"B.zfill(width) -> modified copy of B\n"
+"\n"
+"Pad a numeric string B with zeros on the left, to fill a field\n"
+"of the specified width.  B is never truncated.");
+
+static PyObject *
+stringlib_zfill(PyObject *self, PyObject *args)
+{
+    Py_ssize_t fill;
+    PyObject *s;
+    char *p;
+    Py_ssize_t width;
+
+    if (!PyArg_ParseTuple(args, "n:zfill", &width))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width) {
+        if (STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+            /* We're defined as returning a copy;  If the object is mutable
+             * that means we must make an identical copy. */
+            return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+            Py_INCREF(self);
+            return (PyObject*) self;
+#endif
+        }
+        else
+            return STRINGLIB_NEW(
+                STRINGLIB_STR(self),
+                STRINGLIB_LEN(self)
+            );
+    }
+
+    fill = width - STRINGLIB_LEN(self);
+
+    s = pad(self, fill, 0, '0');
+
+    if (s == NULL)
+        return NULL;
+
+    p = STRINGLIB_STR(s);
+    if (p[fill] == '+' || p[fill] == '-') {
+        /* move sign to beginning of string */
+        p[0] = p[fill];
+        p[fill] = '0';
+    }
+
+    return (PyObject*) s;
+}
+
+
+#define _STRINGLIB_SPLIT_APPEND(data, left, right)		\
+	str = STRINGLIB_NEW((data) + (left),	                \
+					 (right) - (left));	\
+	if (str == NULL)					\
+		goto onError;					\
+	if (PyList_Append(list, str)) {				\
+		Py_DECREF(str);					\
+		goto onError;					\
+	}							\
+	else							\
+		Py_DECREF(str);
+
+PyDoc_STRVAR(splitlines__doc__,
+"B.splitlines([keepends]) -> list of lines\n\
+\n\
+Return a list of the lines in B, breaking at line boundaries.\n\
+Line breaks are not included in the resulting list unless keepends\n\
+is given and true.");
+
+static PyObject*
+stringlib_splitlines(PyObject *self, PyObject *args)
+{
+    register Py_ssize_t i;
+    register Py_ssize_t j;
+    Py_ssize_t len;
+    int keepends = 0;
+    PyObject *list;
+    PyObject *str;
+    char *data;
+
+    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
+        return NULL;
+
+    data = STRINGLIB_STR(self);
+    len = STRINGLIB_LEN(self);
+
+    /* This does not use the preallocated list because splitlines is
+       usually run with hundreds of newlines.  The overhead of
+       switching between PyList_SET_ITEM and append causes about a
+       2-3% slowdown for that common case.  A smarter implementation
+       could move the if check out, so the SET_ITEMs are done first
+       and the appends only done when the prealloc buffer is full.
+       That's too much work for little gain.*/
+
+    list = PyList_New(0);
+    if (!list)
+        goto onError;
+
+    for (i = j = 0; i < len; ) {
+	Py_ssize_t eol;
+
+	/* Find a line and append it */
+	while (i < len && data[i] != '\n' && data[i] != '\r')
+	    i++;
+
+	/* Skip the line break reading CRLF as one line break */
+	eol = i;
+	if (i < len) {
+	    if (data[i] == '\r' && i + 1 < len &&
+		data[i+1] == '\n')
+		i += 2;
+	    else
+		i++;
+	    if (keepends)
+		eol = i;
+	}
+	_STRINGLIB_SPLIT_APPEND(data, j, eol);
+	j = i;
+    }
+    if (j < len) {
+	_STRINGLIB_SPLIT_APPEND(data, j, len);
+    }
+
+    return list;
+
+ onError:
+    Py_XDECREF(list);
+    return NULL;
+}
+
+#undef _STRINGLIB_SPLIT_APPEND
+
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index 25c1d4f..fa6140f 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -22,6 +22,8 @@
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_TOSTR          PyObject_Unicode
 
+#define STRINGLIB_WANT_CONTAINS_OBJ 1
+
 /* STRINGLIB_CMP was defined as:
 
 Py_LOCAL_INLINE(int)
author	Gregory P. Smith <greg@mad-scientist.com>	2007-10-16 06:31:30 (GMT)
committer	Gregory P. Smith <greg@mad-scientist.com>	2007-10-16 06:31:30 (GMT)
commit	60d241f135f10312f5a638846659d7e471f6cac9 (patch)
tree	620abe8a5e99620cec51a3476ea96c2459956e70 /Objects/stringlib
parent	3d2fd7f923c35fe61c0f26f2ae150d73a42859b9 (diff)
download	cpython-60d241f135f10312f5a638846659d7e471f6cac9.zip cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.gz cpython-60d241f135f10312f5a638846659d7e471f6cac9.tar.bz2