Merging the py3k-pep3137 branch back into the py3k branch.

No detailed change log; just check out the change log for the py3k-pep3137 branch. The most obvious changes: - str8 renamed to bytes (PyString at the C level); - bytes renamed to buffer (PyBytes at the C level); - PyString and PyUnicode are no longer compatible. I.e. we now have an immutable bytes type and a mutable bytes type. The behavior of PyString was modified quite a bit, to make it more bytes-like. Some changes are still on the to-do list.
author: Guido van Rossum <guido@python.org> 2007-11-06 21:34:58 (GMT)
committer: Guido van Rossum <guido@python.org> 2007-11-06 21:34:58 (GMT)
commit: 98297ee7815939b124156e438b22bd652d67b5db (patch)
tree: a9d239ebd87c73af2571ab48003984c4e18e27e5 /Objects/bytesobject.c
parent: a19f80c6df2df5e8a5d0cff37131097835ef971e (diff)
download: cpython-98297ee7815939b124156e438b22bd652d67b5db.zip
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2
1 files changed, 223 insertions, 173 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 3f2dbc2..b28cacf 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1,7 +1,5 @@
 /* Bytes object implementation */
 
-/* XXX TO DO: optimizations */
-
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
 #include "structmember.h"
@@ -214,26 +212,21 @@ PyBytes_Concat(PyObject *a, PyObject *b)
 {
     Py_ssize_t size;
     Py_buffer va, vb;
-    PyBytesObject *result;
+    PyBytesObject *result = NULL;
 
     va.len = -1;
     vb.len = -1;
     if (_getbuffer(a, &va) < 0  ||
         _getbuffer(b, &vb) < 0) {
-            if (va.len != -1)
-                    PyObject_ReleaseBuffer(a, &va);
-            if (vb.len != -1)
-                    PyObject_ReleaseBuffer(b, &vb);
             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
                          Py_Type(a)->tp_name, Py_Type(b)->tp_name);
-            return NULL;
+            goto done;
     }
 
     size = va.len + vb.len;
     if (size < 0) {
-            PyObject_ReleaseBuffer(a, &va);
-            PyObject_ReleaseBuffer(b, &vb);
             return PyErr_NoMemory();
+            goto done;
     }
 
     result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
@@ -242,8 +235,11 @@ PyBytes_Concat(PyObject *a, PyObject *b)
         memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
     }
 
-    PyObject_ReleaseBuffer(a, &va);
-    PyObject_ReleaseBuffer(b, &vb);
+  done:
+    if (va.len != -1)
+        PyObject_ReleaseBuffer(a, &va);
+    if (vb.len != -1)
+        PyObject_ReleaseBuffer(b, &vb);
     return (PyObject *)result;
 }
 
@@ -256,12 +252,6 @@ bytes_length(PyBytesObject *self)
 }
 
 static PyObject *
-bytes_concat(PyBytesObject *self, PyObject *other)
-{
-    return PyBytes_Concat((PyObject *)self, other);
-}
-
-static PyObject *
 bytes_iconcat(PyBytesObject *self, PyObject *other)
 {
     Py_ssize_t mysize;
@@ -351,51 +341,13 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
     return (PyObject *)self;
 }
 
-static int
-bytes_substring(PyBytesObject *self, PyBytesObject *other)
-{
-    Py_ssize_t i;
-
-    if (Py_Size(other) == 1) {
-        return memchr(self->ob_bytes, other->ob_bytes[0],
-                      Py_Size(self)) != NULL;
-    }
-    if (Py_Size(other) == 0)
-        return 1; /* Edge case */
-    for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
-        /* XXX Yeah, yeah, lots of optimizations possible... */
-        if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
-            return 1;
-    }
-    return 0;
-}
-
-static int
-bytes_contains(PyBytesObject *self, PyObject *value)
-{
-    Py_ssize_t ival;
-
-    if (PyBytes_Check(value))
-        return bytes_substring(self, (PyBytesObject *)value);
-
-    ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
-    if (ival == -1 && PyErr_Occurred())
-        return -1;
-    if (ival < 0 || ival >= 256) {
-        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
-        return -1;
-    }
-
-    return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
-}
-
 static PyObject *
 bytes_getitem(PyBytesObject *self, Py_ssize_t i)
 {
     if (i < 0)
         i += Py_Size(self);
     if (i < 0 || i >= Py_Size(self)) {
-        PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+        PyErr_SetString(PyExc_IndexError, "buffer index out of range");
         return NULL;
     }
     return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
@@ -414,7 +366,7 @@ bytes_subscript(PyBytesObject *self, PyObject *item)
             i += PyBytes_GET_SIZE(self);
 
         if (i < 0 || i >= Py_Size(self)) {
-            PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+            PyErr_SetString(PyExc_IndexError, "buffer index out of range");
             return NULL;
         }
         return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
@@ -451,7 +403,7 @@ bytes_subscript(PyBytesObject *self, PyObject *item)
         }
     }
     else {
-        PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
+        PyErr_SetString(PyExc_TypeError, "buffer indices must be integers");
         return NULL;
     }
 }
@@ -551,7 +503,7 @@ bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
         i += Py_Size(self);
 
     if (i < 0 || i >= Py_Size(self)) {
-        PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+        PyErr_SetString(PyExc_IndexError, "buffer index out of range");
         return -1;
     }
 
@@ -587,7 +539,7 @@ bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
             i += PyBytes_GET_SIZE(self);
 
         if (i < 0 || i >= Py_Size(self)) {
-            PyErr_SetString(PyExc_IndexError, "bytes index out of range");
+            PyErr_SetString(PyExc_IndexError, "buffer index out of range");
             return -1;
         }
 
@@ -619,7 +571,7 @@ bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
         }
     }
     else {
-        PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
+        PyErr_SetString(PyExc_TypeError, "buffer indices must be integer");
         return -1;
     }
 
@@ -772,13 +724,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
         encoded = PyCodec_Encode(arg, encoding, errors);
         if (encoded == NULL)
             return -1;
-        if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
-            PyErr_Format(PyExc_TypeError,
-                "encoder did not return a str8 or bytes object (type=%.400s)",
-                Py_Type(encoded)->tp_name);
-            Py_DECREF(encoded);
-            return -1;
-        }
+        assert(PyString_Check(encoded));
         new = bytes_iconcat(self, encoded);
         Py_DECREF(encoded);
         if (new == NULL)
@@ -889,11 +835,15 @@ static PyObject *
 bytes_repr(PyBytesObject *self)
 {
     static const char *hexdigits = "0123456789abcdef";
-    size_t newsize = 3 + 4 * Py_Size(self);
+    const char *quote_prefix = "buffer(b";
+    const char *quote_postfix = ")";
+    Py_ssize_t length = Py_Size(self);
+    /* 9 prefix + 2 postfix */
+    size_t newsize = 11 + 4 * length;
     PyObject *v;
-    if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
+    if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 2 != length) {
         PyErr_SetString(PyExc_OverflowError,
-            "bytes object is too large to make repr");
+            "buffer object is too large to make repr");
         return NULL;
     }
     v = PyUnicode_FromUnicode(NULL, newsize);
@@ -904,17 +854,36 @@ bytes_repr(PyBytesObject *self)
         register Py_ssize_t i;
         register Py_UNICODE c;
         register Py_UNICODE *p;
-        int quote = '\'';
+        int quote;
+
+        /* Figure out which quote to use; single is preferred */
+        quote = '\'';
+        {
+            char *test, *start;
+            start = PyBytes_AS_STRING(self);
+            for (test = start; test < start+length; ++test) {
+                if (*test == '"') {
+                    quote = '\''; /* back to single */
+                    goto decided;
+                }
+                else if (*test == '\'')
+                    quote = '"';
+            }
+          decided:
+            ;
+        }
 
         p = PyUnicode_AS_UNICODE(v);
-        *p++ = 'b';
+        while (*quote_prefix)
+            *p++ = *quote_prefix++;
         *p++ = quote;
-        for (i = 0; i < Py_Size(self); i++) {
+
+        for (i = 0; i < length; i++) {
             /* There's at least enough room for a hex escape
                and a closing quote. */
             assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
             c = self->ob_bytes[i];
-            if (c == quote || c == '\\')
+            if (c == '\'' || c == '\\')
                 *p++ = '\\', *p++ = c;
             else if (c == '\t')
                 *p++ = '\\', *p++ = 't';
@@ -935,6 +904,9 @@ bytes_repr(PyBytesObject *self)
         }
         assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
         *p++ = quote;
+        while (*quote_postfix) {
+           *p++ = *quote_postfix++;
+        }
         *p = '\0';
         if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
             Py_DECREF(v);
@@ -945,9 +917,14 @@ bytes_repr(PyBytesObject *self)
 }
 
 static PyObject *
-bytes_str(PyBytesObject *self)
+bytes_str(PyObject *op)
 {
-    return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
+	if (Py_BytesWarningFlag) {
+		if (PyErr_WarnEx(PyExc_BytesWarning,
+				 "str() on a buffer instance", 1))
+			return NULL;
+	}
+	return bytes_repr((PyBytesObject*)op);
 }
 
 static PyObject *
@@ -964,6 +941,12 @@ bytes_richcompare(PyObject *self, PyObject *other, int op)
        error, even if the comparison is for equality. */
     if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
         PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
+        if (Py_BytesWarningFlag && op == Py_EQ) {
+            if (PyErr_WarnEx(PyExc_BytesWarning,
+                            "Comparsion between buffer and string", 1))
+                return NULL;
+        }
+
         Py_INCREF(Py_NotImplemented);
         return Py_NotImplemented;
     }
@@ -1112,7 +1095,7 @@ bytes_find(PyBytesObject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(count__doc__,
-"B.count(sub[, start[, end]]) -> int\n\
+"B.count(sub [,start [,end]]) -> int\n\
 \n\
 Return the number of non-overlapping occurrences of subsection sub in\n\
 bytes B[start:end].  Optional arguments start and end are interpreted\n\
@@ -1203,6 +1186,30 @@ bytes_rindex(PyBytesObject *self, PyObject *args)
 }
 
 
+static int
+bytes_contains(PyObject *self, PyObject *arg)
+{
+    Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
+    if (ival == -1 && PyErr_Occurred()) {
+        Py_buffer varg;
+        int pos;
+        PyErr_Clear();
+        if (_getbuffer(arg, &varg) < 0)
+            return -1;
+        pos = stringlib_find(PyBytes_AS_STRING(self), Py_Size(self),
+                             varg.buf, varg.len, 0);
+        PyObject_ReleaseBuffer(arg, &varg);
+        return pos >= 0;
+    }
+    if (ival < 0 || ival >= 256) {
+        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+        return -1;
+    }
+
+    return memchr(PyBytes_AS_STRING(self), ival, Py_Size(self)) != NULL;
+}
+
+
 /* Matches the end (direction >= 0) or start (direction < 0) of self
  * against substr, using the start and end arguments. Returns
  * -1 on error, 0 if not found and 1 if found.
@@ -1247,7 +1254,7 @@ done:
 
 
 PyDoc_STRVAR(startswith__doc__,
-"B.startswith(prefix[, start[, end]]) -> bool\n\
+"B.startswith(prefix [,start [,end]]) -> bool\n\
 \n\
 Return True if B starts with the specified prefix, False otherwise.\n\
 With optional start, test B beginning at that position.\n\
@@ -1287,7 +1294,7 @@ bytes_startswith(PyBytesObject *self, PyObject *args)
 }
 
 PyDoc_STRVAR(endswith__doc__,
-"B.endswith(suffix[, start[, end]]) -> bool\n\
+"B.endswith(suffix [,start [,end]]) -> bool\n\
 \n\
 Return True if B ends with the specified suffix, False otherwise.\n\
 With optional start, test B beginning at that position.\n\
@@ -1328,12 +1335,12 @@ bytes_endswith(PyBytesObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(translate__doc__,
-"B.translate(table [,deletechars]) -> bytes\n\
+"B.translate(table[, deletechars]) -> buffer\n\
 \n\
-Return a copy of the bytes B, where all characters occurring\n\
-in the optional argument deletechars are removed, and the\n\
-remaining characters have been mapped through the given\n\
-translation table, which must be a bytes of length 256.");
+Return a copy of B, where all characters occurring in the\n\
+optional argument deletechars are removed, and the remaining\n\
+characters have been mapped through the given translation\n\
+table, which must be a bytes object of length 256.");
 
 static PyObject *
 bytes_translate(PyBytesObject *self, PyObject *args)
@@ -2026,9 +2033,9 @@ replace(PyBytesObject *self,
 
 
 PyDoc_STRVAR(replace__doc__,
-"B.replace (old, new[, count]) -> bytes\n\
+"B.replace(old, new[, count]) -> bytes\n\
 \n\
-Return a copy of bytes B with all occurrences of subsection\n\
+Return a copy of B with all occurrences of subsection\n\
 old replaced by new.  If the optional argument count is\n\
 given, only the first count occurrences are replaced.");
 
@@ -2149,23 +2156,23 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
         return NULL;
 
     for (i = j = 0; i < len; ) {
-	/* find a token */
-	while (i < len && ISSPACE(s[i]))
-	    i++;
-	j = i;
-	while (i < len && !ISSPACE(s[i]))
-	    i++;
-	if (j < i) {
-	    if (maxcount-- <= 0)
-		break;
-	    SPLIT_ADD(s, j, i);
-	    while (i < len && ISSPACE(s[i]))
-		i++;
-	    j = i;
-	}
+        /* find a token */
+        while (i < len && ISSPACE(s[i]))
+            i++;
+        j = i;
+        while (i < len && !ISSPACE(s[i]))
+            i++;
+        if (j < i) {
+            if (maxcount-- <= 0)
+                break;
+            SPLIT_ADD(s, j, i);
+            while (i < len && ISSPACE(s[i]))
+                i++;
+            j = i;
+        }
     }
     if (j < len) {
-	SPLIT_ADD(s, j, len);
+        SPLIT_ADD(s, j, len);
     }
     FIX_PREALLOC_SIZE(list);
     return list;
@@ -2176,10 +2183,10 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
 }
 
 PyDoc_STRVAR(split__doc__,
-"B.split([sep [, maxsplit]]) -> list of bytes\n\
+"B.split([sep[, maxsplit]]) -> list of buffer\n\
 \n\
-Return a list of the bytes in the string B, using sep as the delimiter.\n\
-If sep is not given, B is split on ASCII whitespace charcters\n\
+Return a list of the sections in B, using sep as the delimiter.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
 (space, tab, return, newline, formfeed, vertical tab).\n\
 If maxsplit is given, at most maxsplit splits are done.");
 
@@ -2255,12 +2262,37 @@ bytes_split(PyBytesObject *self, PyObject *args)
     return NULL;
 }
 
+/* stringlib's partition shares nullbytes in some cases.
+   undo this, we don't want the nullbytes to be shared. */
+static PyObject *
+make_nullbytes_unique(PyObject *result)
+{
+    if (result != NULL) {
+        int i;
+        assert(PyTuple_Check(result));
+        assert(PyTuple_GET_SIZE(result) == 3);
+        for (i = 0; i < 3; i++) {
+            if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
+                PyObject *new = PyBytes_FromStringAndSize(NULL, 0);
+                if (new == NULL) {
+                    Py_DECREF(result);
+                    result = NULL;
+                    break;
+                }
+                Py_DECREF(nullbytes);
+                PyTuple_SET_ITEM(result, i, new);
+            }
+        }
+    }
+    return result;
+}
+
 PyDoc_STRVAR(partition__doc__,
 "B.partition(sep) -> (head, sep, tail)\n\
 \n\
 Searches for the separator sep in B, and returns the part before it,\n\
 the separator itself, and the part after it.  If the separator is not\n\
-found, returns B and two empty bytes.");
+found, returns B and two empty buffer.");
 
 static PyObject *
 bytes_partition(PyBytesObject *self, PyObject *sep_obj)
@@ -2279,15 +2311,16 @@ bytes_partition(PyBytesObject *self, PyObject *sep_obj)
             );
 
     Py_DECREF(bytesep);
-    return result;
+    return make_nullbytes_unique(result);
 }
 
 PyDoc_STRVAR(rpartition__doc__,
 "B.rpartition(sep) -> (tail, sep, head)\n\
 \n\
-Searches for the separator sep in B, starting at the end of B, and returns\n\
-the part before it, the separator itself, and the part after it.  If the\n\
-separator is not found, returns two empty bytes and B.");
+Searches for the separator sep in B, starting at the end of B,\n\
+and returns the part before it, the separator itself, and the\n\
+part after it.  If the separator is not found, returns two empty\n\
+buffer objects and B.");
 
 static PyObject *
 bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
@@ -2306,7 +2339,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
             );
 
     Py_DECREF(bytesep);
-    return result;
+    return make_nullbytes_unique(result);
 }
 
 Py_LOCAL_INLINE(PyObject *)
@@ -2354,23 +2387,23 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
         return NULL;
 
     for (i = j = len - 1; i >= 0; ) {
-	/* find a token */
-	while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
-	    i--;
-	j = i;
-	while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
-	    i--;
-	if (j > i) {
-	    if (maxcount-- <= 0)
-		break;
-	    SPLIT_ADD(s, i + 1, j + 1);
-	    while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
-		i--;
-	    j = i;
-	}
+        /* find a token */
+        while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
+            i--;
+        j = i;
+        while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
+            i--;
+        if (j > i) {
+            if (maxcount-- <= 0)
+                break;
+            SPLIT_ADD(s, i + 1, j + 1);
+            while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
+                i--;
+            j = i;
+        }
     }
     if (j >= 0) {
-	SPLIT_ADD(s, 0, j + 1);
+        SPLIT_ADD(s, 0, j + 1);
     }
     FIX_PREALLOC_SIZE(list);
     if (PyList_Reverse(list) < 0)
@@ -2384,10 +2417,10 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
 }
 
 PyDoc_STRVAR(rsplit__doc__,
-"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
+"B.rsplit(sep[, maxsplit]) -> list of buffer\n\
 \n\
-Return a list of the sections in the byte B, using sep as the delimiter,\n\
-starting at the end of the bytes and working to the front.\n\
+Return a list of the sections in B, using sep as the delimiter,\n\
+starting at the end of B and working to the front.\n\
 If sep is not given, B is split on ASCII whitespace characters\n\
 (space, tab, return, newline, formfeed, vertical tab).\n\
 If maxsplit is given, at most maxsplit splits are done.");
@@ -2458,7 +2491,7 @@ PyDoc_STRVAR(extend__doc__,
 "B.extend(iterable int) -> None\n\
 \n\
 Append all the elements from the iterator or sequence to the\n\
-end of the bytes.");
+end of B.");
 static PyObject *
 bytes_extend(PyBytesObject *self, PyObject *arg)
 {
@@ -2475,7 +2508,7 @@ bytes_extend(PyBytesObject *self, PyObject *arg)
 PyDoc_STRVAR(reverse__doc__,
 "B.reverse() -> None\n\
 \n\
-Reverse the order of the values in bytes in place.");
+Reverse the order of the values in B in place.");
 static PyObject *
 bytes_reverse(PyBytesObject *self, PyObject *unused)
 {
@@ -2497,7 +2530,7 @@ bytes_reverse(PyBytesObject *self, PyObject *unused)
 PyDoc_STRVAR(insert__doc__,
 "B.insert(index, int) -> None\n\
 \n\
-Insert a single item into the bytes before the given index.");
+Insert a single item into the buffer before the given index.");
 static PyObject *
 bytes_insert(PyBytesObject *self, PyObject *args)
 {
@@ -2536,7 +2569,7 @@ bytes_insert(PyBytesObject *self, PyObject *args)
 PyDoc_STRVAR(append__doc__,
 "B.append(int) -> None\n\
 \n\
-Append a single item to the end of the bytes.");
+Append a single item to the end of B.");
 static PyObject *
 bytes_append(PyBytesObject *self, PyObject *arg)
 {
@@ -2561,7 +2594,7 @@ bytes_append(PyBytesObject *self, PyObject *arg)
 PyDoc_STRVAR(pop__doc__,
 "B.pop([index]) -> int\n\
 \n\
-Remove and return a single item from the bytes. If no index\n\
+Remove and return a single item from B. If no index\n\
 argument is give, will pop the last value.");
 static PyObject *
 bytes_pop(PyBytesObject *self, PyObject *args)
@@ -2595,7 +2628,7 @@ bytes_pop(PyBytesObject *self, PyObject *args)
 PyDoc_STRVAR(remove__doc__,
 "B.remove(int) -> None\n\
 \n\
-Remove the first occurance of a value in bytes");
+Remove the first occurance of a value in B.");
 static PyObject *
 bytes_remove(PyBytesObject *self, PyObject *arg)
 {
@@ -2644,7 +2677,7 @@ rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
 }
 
 PyDoc_STRVAR(strip__doc__,
-"B.strip([bytes]) -> bytes\n\
+"B.strip([bytes]) -> buffer\n\
 \n\
 Strip leading and trailing bytes contained in the argument.\n\
 If the argument is omitted, strip ASCII whitespace.");
@@ -2662,10 +2695,10 @@ bytes_strip(PyBytesObject *self, PyObject *args)
         argsize = 6;
     }
     else {
-	    if (_getbuffer(arg, &varg) < 0)
-		    return NULL;
-	    argptr = varg.buf;
-	    argsize = varg.len;
+        if (_getbuffer(arg, &varg) < 0)
+            return NULL;
+        argptr = varg.buf;
+        argsize = varg.len;
     }
     myptr = self->ob_bytes;
     mysize = Py_Size(self);
@@ -2675,12 +2708,12 @@ bytes_strip(PyBytesObject *self, PyObject *args)
     else
         right = rstrip_helper(myptr, mysize, argptr, argsize);
     if (arg != Py_None)
-	    PyObject_ReleaseBuffer(arg, &varg);
+        PyObject_ReleaseBuffer(arg, &varg);
     return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
 }
 
 PyDoc_STRVAR(lstrip__doc__,
-"B.lstrip([bytes]) -> bytes\n\
+"B.lstrip([bytes]) -> buffer\n\
 \n\
 Strip leading bytes contained in the argument.\n\
 If the argument is omitted, strip leading ASCII whitespace.");
@@ -2698,22 +2731,22 @@ bytes_lstrip(PyBytesObject *self, PyObject *args)
         argsize = 6;
     }
     else {
-	    if (_getbuffer(arg, &varg) < 0)
-		    return NULL;
-	    argptr = varg.buf;
-	    argsize = varg.len;
+        if (_getbuffer(arg, &varg) < 0)
+            return NULL;
+        argptr = varg.buf;
+        argsize = varg.len;
     }
     myptr = self->ob_bytes;
     mysize = Py_Size(self);
     left = lstrip_helper(myptr, mysize, argptr, argsize);
     right = mysize;
     if (arg != Py_None)
-	    PyObject_ReleaseBuffer(arg, &varg);
+        PyObject_ReleaseBuffer(arg, &varg);
     return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
 }
 
 PyDoc_STRVAR(rstrip__doc__,
-"B.rstrip([bytes]) -> bytes\n\
+"B.rstrip([bytes]) -> buffer\n\
 \n\
 Strip trailing bytes contained in the argument.\n\
 If the argument is omitted, strip trailing ASCII whitespace.");
@@ -2731,27 +2764,27 @@ bytes_rstrip(PyBytesObject *self, PyObject *args)
         argsize = 6;
     }
     else {
-	    if (_getbuffer(arg, &varg) < 0)
-		    return NULL;
-	    argptr = varg.buf;
-	    argsize = varg.len;
+        if (_getbuffer(arg, &varg) < 0)
+            return NULL;
+        argptr = varg.buf;
+        argsize = varg.len;
     }
     myptr = self->ob_bytes;
     mysize = Py_Size(self);
     left = 0;
     right = rstrip_helper(myptr, mysize, argptr, argsize);
     if (arg != Py_None)
-	    PyObject_ReleaseBuffer(arg, &varg);
+        PyObject_ReleaseBuffer(arg, &varg);
     return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
 }
 
 PyDoc_STRVAR(decode_doc,
-"B.decode([encoding[,errors]]) -> unicode obect.\n\
+"B.decode([encoding[, errors]]) -> unicode object.\n\
 \n\
 Decodes B using the codec registered for encoding. encoding defaults\n\
 to the default encoding. errors may be given to set a different error\n\
-handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+handling scheme.  Default is 'strict' meaning that encoding errors raise\n\
+a UnicodeDecodeError.  Other possible values are 'ignore' and 'replace'\n\
 as well as any other name registerd with codecs.register_error that is\n\
 able to handle UnicodeDecodeErrors.");
 
@@ -2782,8 +2815,7 @@ bytes_alloc(PyBytesObject *self)
 PyDoc_STRVAR(join_doc,
 "B.join(iterable_of_bytes) -> bytes\n\
 \n\
-Concatenates any number of bytes objects, with B in between each pair.\n\
-Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
+Concatenates any number of buffer objects, with B in between each pair.");
 
 static PyObject *
 bytes_join(PyBytesObject *self, PyObject *it)
@@ -2804,9 +2836,10 @@ bytes_join(PyBytesObject *self, PyObject *it)
     items = PySequence_Fast_ITEMS(seq);
 
     /* Compute the total size, and check that they are all bytes */
+    /* XXX Shouldn't we use _getbuffer() on these items instead? */
     for (i = 0; i < n; i++) {
         PyObject *obj = items[i];
-        if (!PyBytes_Check(obj)) {
+        if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
             PyErr_Format(PyExc_TypeError,
                          "can only join an iterable of bytes "
                          "(item %ld has type '%.100s')",
@@ -2816,7 +2849,7 @@ bytes_join(PyBytesObject *self, PyObject *it)
         }
         if (i > 0)
             totalsize += mysize;
-        totalsize += PyBytes_GET_SIZE(obj);
+        totalsize += Py_Size(obj);
         if (totalsize < 0) {
             PyErr_NoMemory();
             goto error;
@@ -2830,12 +2863,17 @@ bytes_join(PyBytesObject *self, PyObject *it)
     dest = PyBytes_AS_STRING(result);
     for (i = 0; i < n; i++) {
         PyObject *obj = items[i];
-        Py_ssize_t size = PyBytes_GET_SIZE(obj);
-        if (i > 0) {
+        Py_ssize_t size = Py_Size(obj);
+        char *buf;
+        if (PyBytes_Check(obj))
+           buf = PyBytes_AS_STRING(obj);
+        else
+           buf = PyString_AS_STRING(obj);
+        if (i) {
             memcpy(dest, self->ob_bytes, mysize);
             dest += mysize;
         }
-        memcpy(dest, PyBytes_AS_STRING(obj), size);
+        memcpy(dest, buf, size);
         dest += size;
     }
 
@@ -2850,11 +2888,11 @@ bytes_join(PyBytesObject *self, PyObject *it)
 }
 
 PyDoc_STRVAR(fromhex_doc,
-"bytes.fromhex(string) -> bytes\n\
+"buffer.fromhex(string) -> buffer\n\
 \n\
-Create a bytes object from a string of hexadecimal numbers.\n\
-Spaces between two numbers are accepted. Example:\n\
-bytes.fromhex('10 1112') -> b'\\x10\\x11\\x12'.");
+Create a buffer object from a string of hexadecimal numbers.\n\
+Spaces between two numbers are accepted.\n\
+Example: buffer.fromhex('B9 01EF') -> buffer(b'\\xb9\\x01\\xef').");
 
 static int
 hex_digit_to_int(Py_UNICODE c)
@@ -2940,7 +2978,7 @@ bytes_reduce(PyBytesObject *self)
 
 static PySequenceMethods bytes_as_sequence = {
     (lenfunc)bytes_length,              /* sq_length */
-    (binaryfunc)bytes_concat,           /* sq_concat */
+    (binaryfunc)PyBytes_Concat,         /* sq_concat */
     (ssizeargfunc)bytes_repeat,         /* sq_repeat */
     (ssizeargfunc)bytes_getitem,        /* sq_item */
     0,                                  /* sq_slice */
@@ -3027,15 +3065,27 @@ bytes_methods[] = {
 };
 
 PyDoc_STRVAR(bytes_doc,
-"bytes([iterable]) -> new array of bytes.\n\
+"buffer(iterable_of_ints) -> buffer.\n\
+buffer(string, encoding[, errors]) -> buffer.\n\
+buffer(bytes_or_buffer) -> mutable copy of bytes_or_buffer.\n\
+buffer(memory_view) -> buffer.\n\
+\n\
+Construct an mutable buffer object from:\n\
+  - an iterable yielding integers in range(256)\n\
+  - a text string encoded using the specified encoding\n\
+  - a bytes or a buffer object\n\
+  - any object implementing the buffer API.\n\
 \n\
-If an argument is given it must be an iterable yielding ints in range(256).");
+buffer(int) -> buffer.\n\
+\n\
+Construct a zero-initialized buffer of the given length.");
+
 
 static PyObject *bytes_iter(PyObject *seq);
 
 PyTypeObject PyBytes_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "bytes",
+    "buffer",
     sizeof(PyBytesObject),
     0,
     (destructor)bytes_dealloc,          /* tp_dealloc */
@@ -3049,7 +3099,7 @@ PyTypeObject PyBytes_Type = {
     &bytes_as_mapping,                  /* tp_as_mapping */
     0,                                  /* tp_hash */
     0,                                  /* tp_call */
-    (reprfunc)bytes_str,                /* tp_str */
+    bytes_str,                          /* tp_str */
     PyObject_GenericGetAttr,            /* tp_getattro */
     0,                                  /* tp_setattro */
     &bytes_as_buffer,                   /* tp_as_buffer */
author	Guido van Rossum <guido@python.org>	2007-11-06 21:34:58 (GMT)
committer	Guido van Rossum <guido@python.org>	2007-11-06 21:34:58 (GMT)
commit	98297ee7815939b124156e438b22bd652d67b5db (patch)
tree	a9d239ebd87c73af2571ab48003984c4e18e27e5 /Objects/bytesobject.c
parent	a19f80c6df2df5e8a5d0cff37131097835ef971e (diff)
download	cpython-98297ee7815939b124156e438b22bd652d67b5db.zip cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2