Merging the py3k-pep3137 branch back into the py3k branch.

No detailed change log; just check out the change log for the py3k-pep3137 branch. The most obvious changes: - str8 renamed to bytes (PyString at the C level); - bytes renamed to buffer (PyBytes at the C level); - PyString and PyUnicode are no longer compatible. I.e. we now have an immutable bytes type and a mutable bytes type. The behavior of PyString was modified quite a bit, to make it more bytes-like. Some changes are still on the to-do list.
author: Guido van Rossum <guido@python.org> 2007-11-06 21:34:58 (GMT)
committer: Guido van Rossum <guido@python.org> 2007-11-06 21:34:58 (GMT)
commit: 98297ee7815939b124156e438b22bd652d67b5db (patch)
tree: a9d239ebd87c73af2571ab48003984c4e18e27e5 /Objects/stringobject.c
parent: a19f80c6df2df5e8a5d0cff37131097835ef971e (diff)
download: cpython-98297ee7815939b124156e438b22bd652d67b5db.zip
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2
1 files changed, 347 insertions, 1257 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 3dd1051..8761477 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1,11 +1,32 @@
 /* String object implementation */
 
+/* XXX This is now called 'bytes' as far as the user is concerned.
+   Many docstrings and error messages need to be cleaned up. */
+
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
 
 #include "bytes_methods.h"
 
+static Py_ssize_t
+_getbuffer(PyObject *obj, Py_buffer *view)
+{
+    PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
+
+    if (buffer == NULL || buffer->bf_getbuffer == NULL)
+    {
+        PyErr_Format(PyExc_TypeError,
+                     "Type %.100s doesn't support the buffer API",
+                     Py_Type(obj)->tp_name);
+        return -1;
+    }
+
+    if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
+            return -1;
+    return view->len;
+}
+
 #ifdef COUNT_ALLOCS
 int null_strings, one_strings;
 #endif
@@ -13,16 +34,6 @@ int null_strings, one_strings;
 static PyStringObject *characters[UCHAR_MAX + 1];
 static PyStringObject *nullstring;
 
-/* This dictionary holds all interned strings.  Note that references to
-   strings in this dictionary are *not* counted in the string's ob_refcnt.
-   When the interned string reaches a refcnt of 0 the string deallocation
-   function will delete the reference from this dictionary.
-
-   Another way to look at this is that to say that the actual reference
-   count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
-*/
-static PyObject *interned;
-
 /*
    For both PyString_FromString() and PyString_FromStringAndSize(), the
    parameter `size' denotes number of characters to allocate, not counting any
@@ -77,21 +88,14 @@ PyString_FromStringAndSize(const char *str, Py_ssize_t size)
 		return PyErr_NoMemory();
 	PyObject_INIT_VAR(op, &PyString_Type, size);
 	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
 	if (str != NULL)
 		Py_MEMCPY(op->ob_sval, str, size);
 	op->ob_sval[size] = '\0';
 	/* share short strings */
 	if (size == 0) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		nullstring = op;
 		Py_INCREF(op);
 	} else if (size == 1 && str != NULL) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		characters[*str & UCHAR_MAX] = op;
 		Py_INCREF(op);
 	}
@@ -132,19 +136,12 @@ PyString_FromString(const char *str)
 		return PyErr_NoMemory();
 	PyObject_INIT_VAR(op, &PyString_Type, size);
 	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
 	Py_MEMCPY(op->ob_sval, str, size+1);
 	/* share short strings */
 	if (size == 0) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		nullstring = op;
 		Py_INCREF(op);
 	} else if (size == 1) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		characters[*str & UCHAR_MAX] = op;
 		Py_INCREF(op);
 	}
@@ -351,174 +348,9 @@ PyString_FromFormat(const char *format, ...)
 	return ret;
 }
 
-
-PyObject *PyString_Decode(const char *s,
-			  Py_ssize_t size,
-			  const char *encoding,
-			  const char *errors)
-{
-    PyObject *v, *str;
-
-    str = PyString_FromStringAndSize(s, size);
-    if (str == NULL)
-	return NULL;
-    v = PyString_AsDecodedString(str, encoding, errors);
-    Py_DECREF(str);
-    return v;
-}
-
-PyObject *PyString_AsDecodedObject(PyObject *str,
-				   const char *encoding,
-				   const char *errors)
-{
-    PyObject *v;
-
-    if (!PyString_Check(str)) {
-        PyErr_BadArgument();
-        goto onError;
-    }
-
-    if (encoding == NULL) {
-	encoding = PyUnicode_GetDefaultEncoding();
-    }
-
-    /* Decode via the codec registry */
-    v = PyCodec_Decode(str, encoding, errors);
-    if (v == NULL)
-        goto onError;
-
-    return v;
-
- onError:
-    return NULL;
-}
-
-PyObject *PyString_AsDecodedString(PyObject *str,
-				   const char *encoding,
-				   const char *errors)
-{
-    PyObject *v;
-
-    v = PyString_AsDecodedObject(str, encoding, errors);
-    if (v == NULL)
-        goto onError;
-
-    /* Convert Unicode to a string using the default encoding */
-    if (PyUnicode_Check(v)) {
-	PyObject *temp = v;
-	v = PyUnicode_AsEncodedString(v, NULL, NULL);
-	Py_DECREF(temp);
-	if (v == NULL)
-	    goto onError;
-    }
-    if (!PyString_Check(v)) {
-        PyErr_Format(PyExc_TypeError,
-                     "decoder did not return a string object (type=%.400s)",
-                     Py_Type(v)->tp_name);
-        Py_DECREF(v);
-        goto onError;
-    }
-
-    return v;
-
- onError:
-    return NULL;
-}
-
-PyObject *PyString_Encode(const char *s,
-			  Py_ssize_t size,
-			  const char *encoding,
-			  const char *errors)
-{
-    PyObject *v, *str;
-
-    str = PyString_FromStringAndSize(s, size);
-    if (str == NULL)
-	return NULL;
-    v = PyString_AsEncodedString(str, encoding, errors);
-    Py_DECREF(str);
-    return v;
-}
-
-PyObject *PyString_AsEncodedObject(PyObject *str,
-				   const char *encoding,
-				   const char *errors)
-{
-    PyObject *v;
-
-    if (!PyString_Check(str)) {
-        PyErr_BadArgument();
-        goto onError;
-    }
-
-    if (encoding == NULL) {
-	encoding = PyUnicode_GetDefaultEncoding();
-    }
-
-    /* Encode via the codec registry */
-    v = PyCodec_Encode(str, encoding, errors);
-    if (v == NULL)
-        goto onError;
-
-    return v;
-
- onError:
-    return NULL;
-}
-
-PyObject *PyString_AsEncodedString(PyObject *str,
-				   const char *encoding,
-				   const char *errors)
-{
-    PyObject *v;
-
-    v = PyString_AsEncodedObject(str, encoding, errors);
-    if (v == NULL)
-        goto onError;
-
-    /* Convert Unicode to a string using the default encoding */
-    if (PyUnicode_Check(v)) {
-	PyObject *temp = v;
-	v = PyUnicode_AsEncodedString(v, NULL, NULL);
-	Py_DECREF(temp);
-	if (v == NULL)
-	    goto onError;
-    }
-    if (!PyString_Check(v)) {
-        PyErr_Format(PyExc_TypeError,
-                     "encoder did not return a string object (type=%.400s)",
-                     Py_Type(v)->tp_name);
-        Py_DECREF(v);
-        goto onError;
-    }
-
-    return v;
-
- onError:
-    return NULL;
-}
-
 static void
 string_dealloc(PyObject *op)
 {
-	switch (PyString_CHECK_INTERNED(op)) {
-		case SSTATE_NOT_INTERNED:
-			break;
-
-		case SSTATE_INTERNED_MORTAL:
-			/* revive dead object temporarily for DelItem */
-			Py_Refcnt(op) = 3;
-			if (PyDict_DelItem(interned, op) != 0)
-				Py_FatalError(
-					"deletion of interned string failed");
-			break;
-
-		case SSTATE_INTERNED_IMMORTAL:
-			Py_FatalError("Immortal interned string died.");
-
-		default:
-			Py_FatalError("Inconsistent interned string state.");
-	}
 	Py_Type(op)->tp_free(op);
 }
 
@@ -577,7 +409,7 @@ PyObject *PyString_DecodeEscape(const char *s,
 			continue;
 		}
 		s++;
-                if (s==end) {
+		if (s==end) {
 			PyErr_SetString(PyExc_ValueError,
 					"Trailing \\ in string");
 			goto failed;
@@ -639,8 +471,8 @@ PyObject *PyString_DecodeEscape(const char *s,
 				/* do nothing */;
 			else {
 				PyErr_Format(PyExc_ValueError,
-					     "decoding error; "
-					     "unknown error handling code: %.400s",
+					     "decoding error; unknown "
+					     "error handling code: %.400s",
 					     errors);
 				goto failed;
 			}
@@ -665,8 +497,8 @@ PyObject *PyString_DecodeEscape(const char *s,
 static Py_ssize_t
 string_getsize(register PyObject *op)
 {
-    	char *s;
-    	Py_ssize_t len;
+	char *s;
+	Py_ssize_t len;
 	if (PyString_AsStringAndSize(op, &s, &len))
 		return -1;
 	return len;
@@ -675,8 +507,8 @@ string_getsize(register PyObject *op)
 static /*const*/ char *
 string_getbuffer(register PyObject *op)
 {
-    	char *s;
-    	Py_ssize_t len;
+	char *s;
+	Py_ssize_t len;
 	if (PyString_AsStringAndSize(op, &s, &len))
 		return NULL;
 	return s;
@@ -753,7 +585,7 @@ PyString_AsStringAndSize(register PyObject *obj,
 #define STRINGLIB_LEN PyString_GET_SIZE
 #define STRINGLIB_NEW PyString_FromStringAndSize
 #define STRINGLIB_STR PyString_AS_STRING
-#define STRINGLIB_WANT_CONTAINS_OBJ 1
+/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
 
 #define STRINGLIB_EMPTY nullstring
 #define STRINGLIB_CHECK_EXACT PyString_CheckExact
@@ -773,12 +605,12 @@ PyString_Repr(PyObject *obj, int smartquotes)
 {
 	static const char *hexdigits = "0123456789abcdef";
 	register PyStringObject* op = (PyStringObject*) obj;
-	Py_ssize_t length = PyString_GET_SIZE(op);
-	size_t newsize = 3 + 4 * Py_Size(op);
+	Py_ssize_t length = Py_Size(op);
+	size_t newsize = 3 + 4 * length;
 	PyObject *v;
-	if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
+	if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
 		PyErr_SetString(PyExc_OverflowError,
-			"string is too large to make repr");
+			"bytes object is too large to make repr");
 	}
 	v = PyUnicode_FromUnicode(NULL, newsize);
 	if (v == NULL) {
@@ -790,14 +622,14 @@ PyString_Repr(PyObject *obj, int smartquotes)
 		register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
 		int quote;
 
-		/* figure out which quote to use; single is preferred */
+		/* Figure out which quote to use; single is preferred */
 		quote = '\'';
 		if (smartquotes) {
 			char *test, *start;
 			start = PyString_AS_STRING(op);
 			for (test = start; test < start+length; ++test) {
 				if (*test == '"') {
-					quote = '\''; /* switch back to single quote */
+					quote = '\''; /* back to single */
 					goto decided;
 				}
 				else if (*test == '\'')
@@ -807,8 +639,8 @@ PyString_Repr(PyObject *obj, int smartquotes)
 			;
 		}
 
-		*p++ = 's', *p++ = quote;
-		for (i = 0; i < Py_Size(op); i++) {
+		*p++ = 'b', *p++ = quote;
+		for (i = 0; i < length; i++) {
 			/* There's at least enough room for a hex escape
 			   and a closing quote. */
 			assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
@@ -848,18 +680,14 @@ string_repr(PyObject *op)
 }
 
 static PyObject *
-string_str(PyObject *s)
+string_str(PyObject *op)
 {
-	assert(PyString_Check(s));
-	if (PyString_CheckExact(s)) {
-		Py_INCREF(s);
-		return s;
-	}
-	else {
-		/* Subtype -- return genuine string with the same value. */
-		PyStringObject *t = (PyStringObject *) s;
-		return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
+	if (Py_BytesWarningFlag) {
+		if (PyErr_WarnEx(PyExc_BytesWarning,
+				 "str() on a bytes instance", 1))
+			return NULL;
 	}
+	return string_repr(op);
 }
 
 static Py_ssize_t
@@ -868,51 +696,53 @@ string_length(PyStringObject *a)
 	return Py_Size(a);
 }
 
+/* This is also used by PyString_Concat() */
 static PyObject *
-string_concat(register PyStringObject *a, register PyObject *bb)
+string_concat(PyObject *a, PyObject *b)
 {
-	register Py_ssize_t size;
-	register PyStringObject *op;
-	if (!PyString_Check(bb)) {
-		if (PyUnicode_Check(bb))
-		    return PyUnicode_Concat((PyObject *)a, bb);
-                if (PyBytes_Check(bb))
-			return PyBytes_Concat((PyObject *)a, bb);
-		PyErr_Format(PyExc_TypeError,
-			     "cannot concatenate 'str8' and '%.200s' objects",
-			     Py_Type(bb)->tp_name);
-		return NULL;
+	Py_ssize_t size;
+	Py_buffer va, vb;
+	PyObject *result = NULL;
+
+	va.len = -1;
+	vb.len = -1;
+	if (_getbuffer(a, &va) < 0  ||
+	    _getbuffer(b, &vb) < 0) {
+		PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
+			     Py_Type(a)->tp_name, Py_Type(b)->tp_name);
+		goto done;
 	}
-#define b ((PyStringObject *)bb)
-	/* Optimize cases with empty left or right operand */
-	if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
-	    PyString_CheckExact(a) && PyString_CheckExact(b)) {
-		if (Py_Size(a) == 0) {
-			Py_INCREF(bb);
-			return bb;
-		}
-		Py_INCREF(a);
-		return (PyObject *)a;
+
+	/* Optimize end cases */
+	if (va.len == 0 && PyString_CheckExact(b)) {
+		result = b;
+		Py_INCREF(result);
+		goto done;
+	}
+	if (vb.len == 0 && PyString_CheckExact(a)) {
+		result = a;
+		Py_INCREF(result);
+		goto done;
 	}
-	size = Py_Size(a) + Py_Size(b);
+
+	size = va.len + vb.len;
 	if (size < 0) {
-		PyErr_SetString(PyExc_OverflowError,
-				"strings are too large to concat");
-		return NULL;
+		PyErr_NoMemory();
+		goto done;
 	}
 
-	/* Inline PyObject_NewVar */
-	op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
-	if (op == NULL)
-		return PyErr_NoMemory();
-	PyObject_INIT_VAR(op, &PyString_Type, size);
-	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
-	Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
-	Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
-	op->ob_sval[size] = '\0';
-	return (PyObject *) op;
-#undef b
+	result = PyString_FromStringAndSize(NULL, size);
+	if (result != NULL) {
+		memcpy(PyString_AS_STRING(result), va.buf, va.len);
+		memcpy(PyString_AS_STRING(result) + va.len, vb.buf, vb.len);
+	}
+
+  done:
+	if (va.len != -1)
+		PyObject_ReleaseBuffer(a, &va);
+	if (vb.len != -1)
+		PyObject_ReleaseBuffer(b, &vb);
+	return result;
 }
 
 static PyObject *
@@ -950,7 +780,6 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n)
 		return PyErr_NoMemory();
 	PyObject_INIT_VAR(op, &PyString_Type, size);
 	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
 	op->ob_sval[size] = '\0';
 	if (Py_Size(a) == 1 && n > 0) {
 		memset(op->ob_sval, a->ob_sval[0] , n);
@@ -970,20 +799,36 @@ string_repeat(register PyStringObject *a, register Py_ssize_t n)
 }
 
 static int
-string_contains(PyObject *str_obj, PyObject *sub_obj)
+string_contains(PyObject *self, PyObject *arg)
+{
+    Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
+    if (ival == -1 && PyErr_Occurred()) {
+        Py_buffer varg;
+        int pos;
+        PyErr_Clear();
+        if (_getbuffer(arg, &varg) < 0)
+            return -1;
+        pos = stringlib_find(PyString_AS_STRING(self), Py_Size(self),
+                             varg.buf, varg.len, 0);
+        PyObject_ReleaseBuffer(arg, &varg);
+        return pos >= 0;
+    }
+    if (ival < 0 || ival >= 256) {
+        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+        return -1;
+    }
+
+    return memchr(PyString_AS_STRING(self), ival, Py_Size(self)) != NULL;
+}
+
+static PyObject *
+string_item(PyStringObject *a, register Py_ssize_t i)
 {
-	if (!PyString_CheckExact(sub_obj)) {
-		if (PyUnicode_Check(sub_obj))
-			return PyUnicode_Contains(str_obj, sub_obj);
-		if (!PyString_Check(sub_obj)) {
-			PyErr_Format(PyExc_TypeError,
-			    "'in <string>' requires string as left operand, "
-			    "not %.200s", Py_Type(sub_obj)->tp_name);
-			return -1;
-		}
+	if (i < 0 || i >= Py_Size(a)) {
+		PyErr_SetString(PyExc_IndexError, "string index out of range");
+		return NULL;
 	}
-
-	return stringlib_contains_obj(str_obj, sub_obj);
+	return PyInt_FromLong((unsigned char)a->ob_sval[i]);
 }
 
 static PyObject*
@@ -996,6 +841,15 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
 
 	/* Make sure both arguments are strings. */
 	if (!(PyString_Check(a) && PyString_Check(b))) {
+		if (Py_BytesWarningFlag && (op == Py_EQ) &&
+		    (PyObject_IsInstance((PyObject*)a,
+					 (PyObject*)&PyUnicode_Type) ||
+		    PyObject_IsInstance((PyObject*)b,
+					 (PyObject*)&PyUnicode_Type))) {
+			if (PyErr_WarnEx(PyExc_BytesWarning,
+				    "Comparsion between bytes and string", 1))
+				return NULL;
+		}
 		result = Py_NotImplemented;
 		goto out;
 	}
@@ -1053,9 +907,9 @@ _PyString_Eq(PyObject *o1, PyObject *o2)
 {
 	PyStringObject *a = (PyStringObject*) o1;
 	PyStringObject *b = (PyStringObject*) o2;
-        return Py_Size(a) == Py_Size(b)
-          && *a->ob_sval == *b->ob_sval
-          && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
+	return Py_Size(a) == Py_Size(b)
+		&& *a->ob_sval == *b->ob_sval
+		&& memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
 }
 
 static long
@@ -1088,12 +942,12 @@ string_subscript(PyStringObject* self, PyObject* item)
 			return NULL;
 		if (i < 0)
 			i += PyString_GET_SIZE(self);
-                if (i < 0 || i >= PyString_GET_SIZE(self)) {
+		if (i < 0 || i >= PyString_GET_SIZE(self)) {
 			PyErr_SetString(PyExc_IndexError,
 					"string index out of range");
 			return NULL;
-                }
-                return PyInt_FromLong((unsigned char)self->ob_sval[i]);
+		}
+		return PyInt_FromLong((unsigned char)self->ob_sval[i]);
 	}
 	else if (PySlice_Check(item)) {
 		Py_ssize_t start, stop, step, slicelength, cur, i;
@@ -1149,14 +1003,15 @@ string_subscript(PyStringObject* self, PyObject* item)
 static int
 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
 {
-        return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
+	return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self),
+				 0, flags);
 }
 
 static PySequenceMethods string_as_sequence = {
 	(lenfunc)string_length, /*sq_length*/
 	(binaryfunc)string_concat, /*sq_concat*/
 	(ssizeargfunc)string_repeat, /*sq_repeat*/
-	0,		/*sq_item*/
+	(ssizeargfunc)string_item, /*sq_item*/
 	0,		/*sq_slice*/
 	0,		/*sq_ass_item*/
 	0,		/*sq_ass_slice*/
@@ -1171,7 +1026,7 @@ static PyMappingMethods string_as_mapping = {
 
 static PyBufferProcs string_as_buffer = {
 	(getbufferproc)string_buffer_getbuffer,
-        NULL,
+	NULL,
 };
 
 
@@ -1297,12 +1152,12 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
 }
 
 PyDoc_STRVAR(split__doc__,
-"S.split([sep [,maxsplit]]) -> list of strings\n\
+"B.split([sep[, maxsplit]]) -> list of bytes\n\
 \n\
-Return a list of the words in the string S, using sep as the\n\
-delimiter string.  If maxsplit is given, at most maxsplit\n\
-splits are done. If sep is not specified or is None, any\n\
-whitespace string is a separator.");
+Return a list of the sections in B, using sep as the delimiter.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
+(space, tab, return, newline, formfeed, vertical tab).\n\
+If maxsplit is given, at most maxsplit splits are done.");
 
 static PyObject *
 string_split(PyStringObject *self, PyObject *args)
@@ -1310,6 +1165,7 @@ string_split(PyStringObject *self, PyObject *args)
 	Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
 	Py_ssize_t maxsplit = -1, count=0;
 	const char *s = PyString_AS_STRING(self), *sub;
+	Py_buffer vsub;
 	PyObject *list, *str, *subobj = Py_None;
 #ifdef USE_FAST
 	Py_ssize_t pos;
@@ -1321,25 +1177,27 @@ string_split(PyStringObject *self, PyObject *args)
 		maxsplit = PY_SSIZE_T_MAX;
 	if (subobj == Py_None)
 		return split_whitespace(s, len, maxsplit);
-	if (PyString_Check(subobj)) {
-		sub = PyString_AS_STRING(subobj);
-		n = PyString_GET_SIZE(subobj);
-	}
-	else if (PyUnicode_Check(subobj))
-		return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
-	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+	if (_getbuffer(subobj, &vsub) < 0)
 		return NULL;
+	sub = vsub.buf;
+	n = vsub.len;
 
 	if (n == 0) {
 		PyErr_SetString(PyExc_ValueError, "empty separator");
+		PyObject_ReleaseBuffer(subobj, &vsub);
 		return NULL;
 	}
-	else if (n == 1)
-		return split_char(s, len, sub[0], maxsplit);
+	else if (n == 1) {
+		char ch = sub[0];
+		PyObject_ReleaseBuffer(subobj, &vsub);
+		return split_char(s, len, ch, maxsplit);
+	}
 
 	list = PyList_New(PREALLOC_SIZE(maxsplit));
-	if (list == NULL)
+	if (list == NULL) {
+		PyObject_ReleaseBuffer(subobj, &vsub);
 		return NULL;
+	}
 
 #ifdef USE_FAST
 	i = j = 0;
@@ -1365,19 +1223,21 @@ string_split(PyStringObject *self, PyObject *args)
 #endif
 	SPLIT_ADD(s, i, len);
 	FIX_PREALLOC_SIZE(list);
+	PyObject_ReleaseBuffer(subobj, &vsub);
 	return list;
 
  onError:
 	Py_DECREF(list);
+	PyObject_ReleaseBuffer(subobj, &vsub);
 	return NULL;
 }
 
 PyDoc_STRVAR(partition__doc__,
-"S.partition(sep) -> (head, sep, tail)\n\
+"B.partition(sep) -> (head, sep, tail)\n\
 \n\
-Searches for the separator sep in S, and returns the part before it,\n\
+Searches for the separator sep in B, and returns the part before it,\n\
 the separator itself, and the part after it.  If the separator is not\n\
-found, returns S and two empty strings.");
+found, returns B and two empty bytes objects.");
 
 static PyObject *
 string_partition(PyStringObject *self, PyObject *sep_obj)
@@ -1402,11 +1262,12 @@ string_partition(PyStringObject *self, PyObject *sep_obj)
 }
 
 PyDoc_STRVAR(rpartition__doc__,
-"S.rpartition(sep) -> (tail, sep, head)\n\
+"B.rpartition(sep) -> (tail, sep, head)\n\
 \n\
-Searches for the separator sep in S, starting at the end of S, and returns\n\
-the part before it, the separator itself, and the part after it.  If the\n\
-separator is not found, returns two empty strings and S.");
+Searches for the separator sep in B, starting at the end of B,\n\
+and returns the part before it, the separator itself, and the\n\
+part after it.  If the separator is not found, returns two empty\n\
+bytes objects and B.");
 
 static PyObject *
 string_rpartition(PyStringObject *self, PyObject *sep_obj)
@@ -1450,8 +1311,8 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
 		SPLIT_ADD(s, i + 1, j + 1);
 	}
 	if (i >= 0) {
-		/* Only occurs when maxsplit was reached */
-		/* Skip any remaining whitespace and copy to beginning of string */
+		/* Only occurs when maxsplit was reached.  Skip any remaining
+		   whitespace and copy to beginning of string. */
 		RSKIP_SPACE(s, i);
 		if (i >= 0)
 			SPLIT_ADD(s, 0, i + 1);
@@ -1500,13 +1361,14 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
 }
 
 PyDoc_STRVAR(rsplit__doc__,
-"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
+"B.rsplit([sep[, maxsplit]]) -> list of strings\n\
 \n\
-Return a list of the words in the string S, using sep as the\n\
-delimiter string, starting at the end of the string and working\n\
-to the front.  If maxsplit is given, at most maxsplit splits are\n\
-done. If sep is not specified or is None, any whitespace string\n\
-is a separator.");
+Return a list of the sections in B, using sep as the delimiter,\n\
+starting at the end of B and working to the front.\n\
+If sep is not given, B is split on ASCII whitespace characters\n\
+(space, tab, return, newline, formfeed, vertical tab).\n\
+If maxsplit is given, at most maxsplit splits are done.");
+
 
 static PyObject *
 string_rsplit(PyStringObject *self, PyObject *args)
@@ -1514,6 +1376,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
 	Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
 	Py_ssize_t maxsplit = -1, count=0;
 	const char *s = PyString_AS_STRING(self), *sub;
+	Py_buffer vsub;
 	PyObject *list, *str, *subobj = Py_None;
 
 	if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
@@ -1522,25 +1385,27 @@ string_rsplit(PyStringObject *self, PyObject *args)
 		maxsplit = PY_SSIZE_T_MAX;
 	if (subobj == Py_None)
 		return rsplit_whitespace(s, len, maxsplit);
-	if (PyString_Check(subobj)) {
-		sub = PyString_AS_STRING(subobj);
-		n = PyString_GET_SIZE(subobj);
-	}
-	else if (PyUnicode_Check(subobj))
-		return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
-	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+	if (_getbuffer(subobj, &vsub) < 0)
 		return NULL;
+	sub = vsub.buf;
+	n = vsub.len;
 
 	if (n == 0) {
 		PyErr_SetString(PyExc_ValueError, "empty separator");
+		PyObject_ReleaseBuffer(subobj, &vsub);
 		return NULL;
 	}
-	else if (n == 1)
-		return rsplit_char(s, len, sub[0], maxsplit);
+	else if (n == 1) {
+		char ch = sub[0];
+		PyObject_ReleaseBuffer(subobj, &vsub);
+		return rsplit_char(s, len, ch, maxsplit);
+	}
 
 	list = PyList_New(PREALLOC_SIZE(maxsplit));
-	if (list == NULL)
+	if (list == NULL) {
+		PyObject_ReleaseBuffer(subobj, &vsub);
 		return NULL;
+	}
 
 	j = len;
 	i = j - n;
@@ -1559,10 +1424,12 @@ string_rsplit(PyStringObject *self, PyObject *args)
 	FIX_PREALLOC_SIZE(list);
 	if (PyList_Reverse(list) < 0)
 		goto onError;
+	PyObject_ReleaseBuffer(subobj, &vsub);
 	return list;
 
 onError:
 	Py_DECREF(list);
+	PyObject_ReleaseBuffer(subobj, &vsub);
 	return NULL;
 }
 
@@ -1572,13 +1439,13 @@ onError:
 
 
 PyDoc_STRVAR(join__doc__,
-"S.join(sequence) -> string\n\
+"B.join(iterable_of_bytes) -> bytes\n\
 \n\
-Return a string which is the concatenation of the strings in the\n\
-sequence.  The separator between elements is S.");
+Concatenates any number of bytes objects, with B in between each pair.\n\
+Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
 
 static PyObject *
-string_join(PyStringObject *self, PyObject *orig)
+string_join(PyObject *self, PyObject *orig)
 {
 	char *sep = PyString_AS_STRING(self);
 	const Py_ssize_t seplen = PyString_GET_SIZE(self);
@@ -1601,7 +1468,7 @@ string_join(PyStringObject *self, PyObject *orig)
 	}
 	if (seqlen == 1) {
 		item = PySequence_Fast_GET_ITEM(seq, 0);
-		if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
+		if (PyString_CheckExact(item)) {
 			Py_INCREF(item);
 			Py_DECREF(seq);
 			return item;
@@ -1611,37 +1478,26 @@ string_join(PyStringObject *self, PyObject *orig)
 	/* There are at least two things to join, or else we have a subclass
 	 * of the builtin types in the sequence.
 	 * Do a pre-pass to figure out the total amount of space we'll
-	 * need (sz), see whether any argument is absurd, and defer to
-	 * the Unicode join if appropriate.
+	 * need (sz), and see whether all argument are bytes.
 	 */
+	/* XXX Shouldn't we use _getbuffer() on these items instead? */
 	for (i = 0; i < seqlen; i++) {
 		const size_t old_sz = sz;
 		item = PySequence_Fast_GET_ITEM(seq, i);
-		if (!PyString_Check(item)){
-			if (PyUnicode_Check(item)) {
-				/* Defer to Unicode join.
-				 * CAUTION:  There's no gurantee that the
-				 * original sequence can be iterated over
-				 * again, so we must pass seq here.
-				 */
-				PyObject *result;
-				result = PyUnicode_Join((PyObject *)self, seq);
-				Py_DECREF(seq);
-				return result;
-			}
+		if (!PyString_Check(item) && !PyBytes_Check(item)) {
 			PyErr_Format(PyExc_TypeError,
-				     "sequence item %zd: expected string,"
+				     "sequence item %zd: expected bytes,"
 				     " %.80s found",
 				     i, Py_Type(item)->tp_name);
 			Py_DECREF(seq);
 			return NULL;
 		}
-		sz += PyString_GET_SIZE(item);
+		sz += Py_Size(item);
 		if (i != 0)
 			sz += seplen;
 		if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
 			PyErr_SetString(PyExc_OverflowError,
-				"join() result is too long for a Python string");
+			    "join() result is too long for a Python string");
 			Py_DECREF(seq);
 			return NULL;
 		}
@@ -1655,17 +1511,24 @@ string_join(PyStringObject *self, PyObject *orig)
 	}
 
 	/* Catenate everything. */
+	/* I'm not worried about a PyBytes item growing because there's
+	   nowhere in this function where we release the GIL. */
 	p = PyString_AS_STRING(res);
 	for (i = 0; i < seqlen; ++i) {
 		size_t n;
-		item = PySequence_Fast_GET_ITEM(seq, i);
-		n = PyString_GET_SIZE(item);
-		Py_MEMCPY(p, PyString_AS_STRING(item), n);
-		p += n;
-		if (i < seqlen - 1) {
+                char *q;
+		if (i) {
 			Py_MEMCPY(p, sep, seplen);
 			p += seplen;
 		}
+		item = PySequence_Fast_GET_ITEM(seq, i);
+		n = Py_Size(item);
+                if (PyString_Check(item))
+			q = PyString_AS_STRING(item);
+		else
+			q = PyBytes_AS_STRING(item);
+		Py_MEMCPY(p, q, n);
+		p += n;
 	}
 
 	Py_DECREF(seq);
@@ -1677,7 +1540,7 @@ _PyString_Join(PyObject *sep, PyObject *x)
 {
 	assert(sep != NULL && PyString_Check(sep));
 	assert(x != NULL);
-	return string_join((PyStringObject *)sep, x);
+	return string_join(sep, x);
 }
 
 Py_LOCAL_INLINE(void)
@@ -1730,7 +1593,7 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir)
 
 
 PyDoc_STRVAR(find__doc__,
-"S.find(sub [,start [,end]]) -> int\n\
+"B.find(sub [,start [,end]]) -> int\n\
 \n\
 Return the lowest index in S where substring sub is found,\n\
 such that sub is contained within s[start:end].  Optional\n\
@@ -1749,9 +1612,9 @@ string_find(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(index__doc__,
-"S.index(sub [,start [,end]]) -> int\n\
+"B.index(sub [,start [,end]]) -> int\n\
 \n\
-Like S.find() but raise ValueError when the substring is not found.");
+Like B.find() but raise ValueError when the substring is not found.");
 
 static PyObject *
 string_index(PyStringObject *self, PyObject *args)
@@ -1769,9 +1632,9 @@ string_index(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(rfind__doc__,
-"S.rfind(sub [,start [,end]]) -> int\n\
+"B.rfind(sub [,start [,end]]) -> int\n\
 \n\
-Return the highest index in S where substring sub is found,\n\
+Return the highest index in B where substring sub is found,\n\
 such that sub is contained within s[start:end].  Optional\n\
 arguments start and end are interpreted as in slice notation.\n\
 \n\
@@ -1788,9 +1651,9 @@ string_rfind(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(rindex__doc__,
-"S.rindex(sub [,start [,end]]) -> int\n\
+"B.rindex(sub [,start [,end]]) -> int\n\
 \n\
-Like S.rfind() but raise ValueError when the substring is not found.");
+Like B.rfind() but raise ValueError when the substring is not found.");
 
 static PyObject *
 string_rindex(PyStringObject *self, PyObject *args)
@@ -1810,12 +1673,18 @@ string_rindex(PyStringObject *self, PyObject *args)
 Py_LOCAL_INLINE(PyObject *)
 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
 {
+	Py_buffer vsep;
 	char *s = PyString_AS_STRING(self);
 	Py_ssize_t len = PyString_GET_SIZE(self);
-	char *sep = PyString_AS_STRING(sepobj);
-	Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
+	char *sep;
+	Py_ssize_t seplen;
 	Py_ssize_t i, j;
 
+	if (_getbuffer(sepobj, &vsep) < 0)
+		return NULL;
+	sep = vsep.buf;
+	seplen = vsep.len;
+
 	i = 0;
 	if (striptype != RIGHTSTRIP) {
 		while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
@@ -1831,6 +1700,8 @@ do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
 		j++;
 	}
 
+	PyObject_ReleaseBuffer(sepobj, &vsep);
+
 	if (i == 0 && j == len && PyString_CheckExact(self)) {
 		Py_INCREF(self);
 		return (PyObject*)self;
@@ -1879,36 +1750,17 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
 		return NULL;
 
 	if (sep != NULL && sep != Py_None) {
-		if (PyString_Check(sep))
-			return do_xstrip(self, striptype, sep);
-		else if (PyUnicode_Check(sep)) {
-			PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
-			PyObject *res;
-			if (uniself==NULL)
-				return NULL;
-			res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
-				striptype, sep);
-			Py_DECREF(uniself);
-			return res;
-		}
-		PyErr_Format(PyExc_TypeError,
-			     "%s arg must be None or string",
-			     STRIPNAME(striptype));
-		return NULL;
+		return do_xstrip(self, striptype, sep);
 	}
-
 	return do_strip(self, striptype);
 }
 
 
 PyDoc_STRVAR(strip__doc__,
-"S.strip([chars]) -> string\n\
+"B.strip([bytes]) -> bytes\n\
 \n\
-Return a copy of the string S with leading and trailing\n\
-whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip leading and trailing bytes contained in the argument.\n\
+If the argument is omitted, strip trailing ASCII whitespace.");
 static PyObject *
 string_strip(PyStringObject *self, PyObject *args)
 {
@@ -1920,12 +1772,10 @@ string_strip(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(lstrip__doc__,
-"S.lstrip([chars]) -> string\n\
+"B.lstrip([bytes]) -> bytes\n\
 \n\
-Return a copy of the string S with leading whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip leading bytes contained in the argument.\n\
+If the argument is omitted, strip leading ASCII whitespace.");
 static PyObject *
 string_lstrip(PyStringObject *self, PyObject *args)
 {
@@ -1937,12 +1787,10 @@ string_lstrip(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(rstrip__doc__,
-"S.rstrip([chars]) -> string\n\
+"B.rstrip([bytes]) -> bytes\n\
 \n\
-Return a copy of the string S with trailing whitespace removed.\n\
-If chars is given and not None, remove characters in chars instead.\n\
-If chars is unicode, S will be converted to unicode before stripping");
-
+Strip trailing bytes contained in the argument.\n\
+If the argument is omitted, strip trailing ASCII whitespace.");
 static PyObject *
 string_rstrip(PyStringObject *self, PyObject *args)
 {
@@ -1954,7 +1802,7 @@ string_rstrip(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(count__doc__,
-"S.count(sub[, start[, end]]) -> int\n\
+"B.count(sub [,start [,end]]) -> int\n\
 \n\
 Return the number of non-overlapping occurrences of substring sub in\n\
 string S[start:end].  Optional arguments start and end are interpreted\n\
@@ -1996,12 +1844,12 @@ string_count(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(translate__doc__,
-"S.translate(table [,deletechars]) -> string\n\
+"B.translate(table[, deletechars]) -> bytes\n\
 \n\
-Return a copy of the string S, where all characters occurring\n\
-in the optional argument deletechars are removed, and the\n\
-remaining characters have been mapped through the given\n\
-translation table, which must be a string of length 256.");
+Return a copy of B, where all characters occurring in the\n\
+optional argument deletechars are removed, and the remaining\n\
+characters have been mapped through the given translation\n\
+table, which must be a bytes object of length 256.");
 
 static PyObject *
 string_translate(PyStringObject *self, PyObject *args)
@@ -2187,7 +2035,7 @@ findstring(const char *target, Py_ssize_t target_len,
 				return end;
 	} else {
 		for (; start <= end; start++)
-			if (Py_STRING_MATCH(target, start, pattern, pattern_len))
+			if (Py_STRING_MATCH(target, start,pattern,pattern_len))
 				return start;
 	}
 	return -1;
@@ -2225,14 +2073,15 @@ countstring(const char *target, Py_ssize_t target_len,
 	end -= pattern_len;
 	if (direction < 0) {
 		for (; (end >= start); end--)
-			if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
+			if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
 				count++;
 				if (--maxcount <= 0) break;
 				end -= pattern_len-1;
 			}
 	} else {
 		for (; (start <= end); start++)
-			if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
+			if (Py_STRING_MATCH(target, start,
+					    pattern, pattern_len)) {
 				count++;
 				if (--maxcount <= 0)
 					break;
@@ -2522,12 +2371,14 @@ replace_single_character(PyStringObject *self,
 	/*   result_len = self_len + count * (to_len-1)  */
 	product = count * (to_len-1);
 	if (product / (to_len-1) != count) {
-		PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+		PyErr_SetString(PyExc_OverflowError,
+				"replace string is too long");
 		return NULL;
 	}
 	result_len = self_len + product;
 	if (result_len < 0) {
-		PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+		PyErr_SetString(PyExc_OverflowError,
+				"replace string is too long");
 		return NULL;
 	}
 
@@ -2590,12 +2441,14 @@ replace_substring(PyStringObject *self,
 	/*    result_len = self_len + count * (to_len-from_len) */
 	product = count * (to_len-from_len);
 	if (product / (to_len-from_len) != count) {
-		PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+		PyErr_SetString(PyExc_OverflowError,
+				"replace string is too long");
 		return NULL;
 	}
 	result_len = self_len + product;
 	if (result_len < 0) {
-		PyErr_SetString(PyExc_OverflowError, "replace string is too long");
+		PyErr_SetString(PyExc_OverflowError,
+				"replace string is too long");
 		return NULL;
 	}
 
@@ -2675,7 +2528,8 @@ replace(PyStringObject *self,
 			return replace_delete_single_character(
 				self, from_s[0], maxcount);
 		} else {
-			return replace_delete_substring(self, from_s, from_len, maxcount);
+			return replace_delete_substring(self, from_s,
+							from_len, maxcount);
 		}
 	}
 
@@ -2690,7 +2544,8 @@ replace(PyStringObject *self,
 				maxcount);
 		} else {
 			return replace_substring_in_place(
-				self, from_s, from_len, to_s, to_len, maxcount);
+				self, from_s, from_len, to_s, to_len,
+				maxcount);
 		}
 	}
 
@@ -2700,14 +2555,15 @@ replace(PyStringObject *self,
 						to_s, to_len, maxcount);
 	} else {
 		/* len('from')>=2, len('to')>=1 */
-		return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
+		return replace_substring(self, from_s, from_len, to_s, to_len,
+					 maxcount);
 	}
 }
 
 PyDoc_STRVAR(replace__doc__,
-"S.replace (old, new[, count]) -> string\n\
+"B.replace(old, new[, count]) -> bytes\n\
 \n\
-Return a copy of string S with all occurrences of substring\n\
+Return a copy of B with all occurrences of subsection\n\
 old replaced by new.  If the optional argument count is\n\
 given, only the first count occurrences are replaced.");
 
@@ -2794,11 +2650,11 @@ _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
 
 
 PyDoc_STRVAR(startswith__doc__,
-"S.startswith(prefix[, start[, end]]) -> bool\n\
+"B.startswith(prefix [,start [,end]]) -> bool\n\
 \n\
-Return True if S starts with the specified prefix, False otherwise.\n\
-With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.\n\
+Return True if B starts with the specified prefix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
 prefix can also be a tuple of strings to try.");
 
 static PyObject *
@@ -2835,11 +2691,11 @@ string_startswith(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(endswith__doc__,
-"S.endswith(suffix[, start[, end]]) -> bool\n\
+"B.endswith(suffix [,start [,end]]) -> bool\n\
 \n\
-Return True if S ends with the specified suffix, False otherwise.\n\
-With optional start, test S beginning at that position.\n\
-With optional end, stop comparing S at that position.\n\
+Return True if B ends with the specified suffix, False otherwise.\n\
+With optional start, test B beginning at that position.\n\
+With optional end, stop comparing B at that position.\n\
 suffix can also be a tuple of strings to try.");
 
 static PyObject *
@@ -2876,63 +2732,50 @@ string_endswith(PyStringObject *self, PyObject *args)
 
 
 PyDoc_STRVAR(decode__doc__,
-"S.decode([encoding[,errors]]) -> object\n\
+"B.decode([encoding[, errors]]) -> object\n\
 \n\
 Decodes S using the codec registered for encoding. encoding defaults\n\
 to the default encoding. errors may be given to set a different error\n\
-handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+handling scheme.  Default is 'strict' meaning that encoding errors raise\n\
+a UnicodeDecodeError.  Other possible values are 'ignore' and 'replace'\n\
 as well as any other name registerd with codecs.register_error that is\n\
 able to handle UnicodeDecodeErrors.");
 
 static PyObject *
-string_decode(PyStringObject *self, PyObject *args)
+string_decode(PyObject *self, PyObject *args)
 {
-    char *encoding = NULL;
-    char *errors = NULL;
-    PyObject *v;
-
-    if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
-        return NULL;
-    v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
-    if (v == NULL)
-        goto onError;
-    if (!PyString_Check(v) && !PyUnicode_Check(v)) {
-        PyErr_Format(PyExc_TypeError,
-                     "decoder did not return a string/unicode object "
-                     "(type=%.400s)",
-                     Py_Type(v)->tp_name);
-        Py_DECREF(v);
-        return NULL;
-    }
-    return v;
+	const char *encoding = NULL;
+	const char *errors = NULL;
 
- onError:
-    return NULL;
+	if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+		return NULL;
+	if (encoding == NULL)
+		encoding = PyUnicode_GetDefaultEncoding();
+	return PyCodec_Decode(self, encoding, errors);
 }
 
 
 PyDoc_STRVAR(fromhex_doc,
-"str8.fromhex(string) -> str8\n\
+"bytes.fromhex(string) -> bytes\n\
 \n\
-Create a str8 object from a string of hexadecimal numbers.\n\
-Spaces between two numbers are accepted. Example:\n\
-str8.fromhex('10 1112') -> s'\\x10\\x11\\x12'.");
+Create a bytes object from a string of hexadecimal numbers.\n\
+Spaces between two numbers are accepted.\n\
+Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
 
 static int
 hex_digit_to_int(Py_UNICODE c)
 {
-    if (c >= 128)
-        return -1;
-    if (ISDIGIT(c))
-        return c - '0';
-    else {
-        if (ISUPPER(c))
-            c = TOLOWER(c);
-        if (c >= 'a' && c <= 'f')
-            return c - 'a' + 10;
-    }
-    return -1;
+	if (c >= 128)
+		return -1;
+	if (ISDIGIT(c))
+		return c - '0';
+	else {
+		if (ISUPPER(c))
+			c = TOLOWER(c);
+		if (c >= 'a' && c <= 'f')
+			return c - 'a' + 10;
+	}
+	return -1;
 }
 
 static PyObject *
@@ -2975,7 +2818,7 @@ string_fromhex(PyObject *cls, PyObject *args)
 	return newstring;
 
   error:
-	Py_DECREF(newstring);
+	Py_XDECREF(newstring);
 	return NULL;
 }
 
@@ -3058,11 +2901,11 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 	const char *errors = NULL;
 	PyObject *new = NULL;
 	Py_ssize_t i, size;
-	static char *kwlist[] = {"object", "encoding", "errors", 0};
+	static char *kwlist[] = {"source", "encoding", "errors", 0};
 
 	if (type != &PyString_Type)
 		return str_subtype_new(type, args, kwds);
-	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x,
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
 					 &encoding, &errors))
 		return NULL;
 	if (x == NULL) {
@@ -3085,34 +2928,37 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 		new = PyCodec_Encode(x, encoding, errors);
 		if (new == NULL)
 			return NULL;
-		/* XXX(gb): must accept bytes here since codecs output bytes
-		   at the moment */
-		if (PyBytes_Check(new)) {
-			PyObject *str;
-			str = PyString_FromString(PyBytes_AsString(new));
-			Py_DECREF(new);
-			if (!str)
-				return NULL;
-			return str;
-		}
-		if (!PyString_Check(new)) {
-			PyErr_Format(PyExc_TypeError,
-				     "encoder did not return a str8 "
-				     "object (type=%.400s)",
-				     Py_Type(new)->tp_name);
-			Py_DECREF(new);
-			return NULL;
-		}
+		assert(PyString_Check(new));
 		return new;
 	}
 
 	/* If it's not unicode, there can't be encoding or errors */
 	if (encoding != NULL || errors != NULL) {
 		PyErr_SetString(PyExc_TypeError,
-				"encoding or errors without a string argument");
+			"encoding or errors without a string argument");
 		return NULL;
 	}
 
+	/* Is it an int? */
+	size = PyNumber_AsSsize_t(x, PyExc_ValueError);
+	if (size == -1 && PyErr_Occurred()) {
+		PyErr_Clear();
+	}
+	else {
+		if (size < 0) {
+			PyErr_SetString(PyExc_ValueError, "negative count");
+			return NULL;
+		}
+		new = PyString_FromStringAndSize(NULL, size);
+		if (new == NULL) {
+			return NULL;
+		}
+		if (size > 0) {
+			memset(((PyStringObject*)new)->ob_sval, 0, size);
+		}
+		return new;
+	}
+
 	/* Use the modern buffer interface */
 	if (PyObject_CheckBuffer(x)) {
 		Py_buffer view;
@@ -3133,8 +2979,10 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 		return NULL;
 	}
 
-	/* For the iterator version, create a string object and resize as needed. */
-	/* XXX(gb): is 64 a good value? also, optimize this if length is known */
+	/* For iterator version, create a string object and resize as needed */
+	/* XXX(gb): is 64 a good value? also, optimize if length is known */
+	/* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
+	   input being a truly long iterator. */
 	size = 64;
 	new = PyString_FromStringAndSize(NULL, size);
 	if (new == NULL)
@@ -3158,9 +3006,9 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 		item = iternext(it);
 		if (item == NULL) {
 			if (PyErr_Occurred()) {
-				if (!PyErr_ExceptionMatches(PyExc_StopIteration))
-					goto error;
-				PyErr_Clear();
+			    if (!PyErr_ExceptionMatches(PyExc_StopIteration))
+				    goto error;
+			    PyErr_Clear();
 			}
 			break;
 		}
@@ -3193,7 +3041,7 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 	return new;
 
   error:
-	/* Error handling when it != NULL */
+	/* Error handling when new != NULL */
 	Py_XDECREF(it);
 	Py_DECREF(new);
 	return NULL;
@@ -3213,43 +3061,32 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 	n = PyString_GET_SIZE(tmp);
 	pnew = type->tp_alloc(type, n);
 	if (pnew != NULL) {
-		Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
+		Py_MEMCPY(PyString_AS_STRING(pnew),
+			  PyString_AS_STRING(tmp), n+1);
 		((PyStringObject *)pnew)->ob_shash =
 			((PyStringObject *)tmp)->ob_shash;
-		((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
 	}
 	Py_DECREF(tmp);
 	return pnew;
 }
 
-static PyObject *
-string_mod(PyObject *v, PyObject *w)
-{
-	if (!PyString_Check(v)) {
-		Py_INCREF(Py_NotImplemented);
-		return Py_NotImplemented;
-	}
-	return PyString_Format(v, w);
-}
-
-static PyNumberMethods string_as_number = {
-	0,			/*nb_add*/
-	0,			/*nb_subtract*/
-	0,			/*nb_multiply*/
-	string_mod,		/*nb_remainder*/
-};
-
 PyDoc_STRVAR(string_doc,
-"str(object) -> string\n\
+"bytes(iterable_of_ints) -> bytes.\n\
+bytes(string, encoding[, errors]) -> bytes\n\
+bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer.\n\
+bytes(memory_view) -> bytes.\n\
 \n\
-Return a nice string representation of the object.\n\
-If the argument is a string, the return value is the same object.");
+Construct an immutable array of bytes from:\n\
+  - an iterable yielding integers in range(256)\n\
+  - a text string encoded using the specified encoding\n\
+  - a bytes or a buffer object\n\
+  - any object implementing the buffer API.");
 
 static PyObject *str_iter(PyObject *seq);
 
 PyTypeObject PyString_Type = {
 	PyVarObject_HEAD_INIT(&PyType_Type, 0)
-	"str8",
+	"bytes",
 	sizeof(PyStringObject),
 	sizeof(char),
  	string_dealloc, 			/* tp_dealloc */
@@ -3257,8 +3094,8 @@ PyTypeObject PyString_Type = {
 	0,					/* tp_getattr */
 	0,					/* tp_setattr */
 	0,					/* tp_compare */
-	string_repr, 				/* tp_repr */
-	&string_as_number,			/* tp_as_number */
+	(reprfunc)string_repr, 			/* tp_repr */
+	0,					/* tp_as_number */
 	&string_as_sequence,			/* tp_as_sequence */
 	&string_as_mapping,			/* tp_as_mapping */
 	(hashfunc)string_hash, 			/* tp_hash */
@@ -3294,14 +3131,15 @@ void
 PyString_Concat(register PyObject **pv, register PyObject *w)
 {
 	register PyObject *v;
+	assert(pv != NULL);
 	if (*pv == NULL)
 		return;
-	if (w == NULL || !PyString_Check(*pv)) {
+	if (w == NULL) {
 		Py_DECREF(*pv);
 		*pv = NULL;
 		return;
 	}
-	v = string_concat((PyStringObject *) *pv, w);
+	v = string_concat(*pv, w);
 	Py_DECREF(*pv);
 	*pv = v;
 }
@@ -3334,8 +3172,7 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
 	register PyObject *v;
 	register PyStringObject *sv;
 	v = *pv;
-	if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
-	    PyString_CHECK_INTERNED(v)) {
+	if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
 		*pv = 0;
 		Py_DECREF(v);
 		PyErr_BadInternalCall();
@@ -3359,85 +3196,6 @@ _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
 	return 0;
 }
 
-/* Helpers for formatstring */
-
-Py_LOCAL_INLINE(PyObject *)
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
-{
-	Py_ssize_t argidx = *p_argidx;
-	if (argidx < arglen) {
-		(*p_argidx)++;
-		if (arglen < 0)
-			return args;
-		else
-			return PyTuple_GetItem(args, argidx);
-	}
-	PyErr_SetString(PyExc_TypeError,
-			"not enough arguments for format string");
-	return NULL;
-}
-
-/* Format codes
- * F_LJUST	'-'
- * F_SIGN	'+'
- * F_BLANK	' '
- * F_ALT	'#'
- * F_ZERO	'0'
- */
-#define F_LJUST (1<<0)
-#define F_SIGN	(1<<1)
-#define F_BLANK (1<<2)
-#define F_ALT	(1<<3)
-#define F_ZERO	(1<<4)
-
-Py_LOCAL_INLINE(int)
-formatfloat(char *buf, size_t buflen, int flags,
-            int prec, int type, PyObject *v)
-{
-	/* fmt = '%#.' + `prec` + `type`
-	   worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
-	char fmt[20];
-	double x;
-	x = PyFloat_AsDouble(v);
-	if (x == -1.0 && PyErr_Occurred()) {
-		PyErr_Format(PyExc_TypeError, "float argument required, "
-			     "not %.200s", Py_Type(v)->tp_name);
-		return -1;
-	}
-	if (prec < 0)
-		prec = 6;
-	if (type == 'f' && fabs(x)/1e25 >= 1e25)
-		type = 'g';
-	/* Worst case length calc to ensure no buffer overrun:
-
-	   'g' formats:
-	     fmt = %#.<prec>g
-	     buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
-	        for any double rep.)
-	     len = 1 + prec + 1 + 2 + 5 = 9 + prec
-
-	   'f' formats:
-	     buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
-	     len = 1 + 50 + 1 + prec = 52 + prec
-
-	   If prec=0 the effective precision is 1 (the leading digit is
-	   always given), therefore increase the length by one.
-
-	*/
-	if (((type == 'g' || type == 'G') &&
-              buflen <= (size_t)10 + (size_t)prec) ||
-	    (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
-		PyErr_SetString(PyExc_OverflowError,
-			"formatted float is too long (precision too large?)");
-		return -1;
-	}
-	PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
-		      (flags&F_ALT) ? "#" : "",
-		      prec, type);
-        PyOS_ascii_formatd(buf, buflen, fmt, x);
-	return (int)strlen(buf);
-}
-
 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
  * Python's regular ints.
@@ -3516,7 +3274,8 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
 	}
 	llen = PyString_Size(result);
 	if (llen > INT_MAX) {
-		PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
+		PyErr_SetString(PyExc_ValueError,
+				"string too large in _PyString_FormatLong");
 		return NULL;
 	}
 	len = (int)llen;
@@ -3534,7 +3293,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
 	    (type == 'o' || type == 'x' || type == 'X'))) {
 		assert(buf[sign] == '0');
 		assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
-                       buf[sign+1] == 'o');
+		       buf[sign+1] == 'o');
 		numnondigits -= 2;
 		buf += 2;
 		len -= 2;
@@ -3580,623 +3339,6 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
 	return result;
 }
 
-Py_LOCAL_INLINE(int)
-formatint(char *buf, size_t buflen, int flags,
-          int prec, int type, PyObject *v)
-{
-	/* fmt = '%#.' + `prec` + 'l' + `type`
-	   worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
-	   + 1 + 1 = 24 */
-	char fmt[64];	/* plenty big enough! */
-	char *sign;
-	long x;
-
-	x = PyInt_AsLong(v);
-	if (x == -1 && PyErr_Occurred()) {
-		PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
-			     Py_Type(v)->tp_name);
-		return -1;
-	}
-	if (x < 0 && type == 'u') {
-		type = 'd';
-	}
-	if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
-		sign = "-";
-	else
-		sign = "";
-	if (prec < 0)
-		prec = 1;
-
-	if ((flags & F_ALT) &&
-	    (type == 'x' || type == 'X' || type == 'o')) {
-		/* When converting under %#o, %#x or %#X, there are a number
-		 * of issues that cause pain:
-		 * - for %#o, we want a different base marker than C
-		 * - when 0 is being converted, the C standard leaves off
-		 *   the '0x' or '0X', which is inconsistent with other
-		 *   %#x/%#X conversions and inconsistent with Python's
-		 *   hex() function
-		 * - there are platforms that violate the standard and
-		 *   convert 0 with the '0x' or '0X'
-		 *   (Metrowerks, Compaq Tru64)
-		 * - there are platforms that give '0x' when converting
-		 *   under %#X, but convert 0 in accordance with the
-		 *   standard (OS/2 EMX)
-		 *
-		 * We can achieve the desired consistency by inserting our
-		 * own '0x' or '0X' prefix, and substituting %x/%X in place
-		 * of %#x/%#X.
-		 *
-		 * Note that this is the same approach as used in
-		 * formatint() in unicodeobject.c
-		 */
-		PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
-			      sign, type, prec, type);
-	}
-	else {
-		PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
-			      sign, (flags&F_ALT) ? "#" : "",
-			      prec, type);
-	}
-
-	/* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
-	 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
-	 */
-	if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
-		PyErr_SetString(PyExc_OverflowError,
-		    "formatted integer is too long (precision too large?)");
-		return -1;
-	}
-	if (sign[0])
-		PyOS_snprintf(buf, buflen, fmt, -x);
-	else
-		PyOS_snprintf(buf, buflen, fmt, x);
-	return (int)strlen(buf);
-}
-
-Py_LOCAL_INLINE(int)
-formatchar(char *buf, size_t buflen, PyObject *v)
-{
-	/* presume that the buffer is at least 2 characters long */
-	if (PyString_Check(v)) {
-		if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
-			return -1;
-	}
-	else {
-		if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
-			return -1;
-	}
-	buf[1] = '\0';
-	return 1;
-}
-
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
-   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
-   chars are formatted. XXX This is a magic number. Each formatting
-   routine does bounds checking to ensure no overflow, but a better
-   solution may be to malloc a buffer of appropriate size for each
-   format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
-
-PyObject *
-PyString_Format(PyObject *format, PyObject *args)
-{
-	char *fmt, *res;
-	Py_ssize_t arglen, argidx;
-	Py_ssize_t reslen, rescnt, fmtcnt;
-	int args_owned = 0;
-	PyObject *result, *orig_args;
-	PyObject *v, *w;
-	PyObject *dict = NULL;
-	if (format == NULL || !PyString_Check(format) || args == NULL) {
-		PyErr_BadInternalCall();
-		return NULL;
-	}
-	orig_args = args;
-	fmt = PyString_AS_STRING(format);
-	fmtcnt = PyString_GET_SIZE(format);
-	reslen = rescnt = fmtcnt + 100;
-	result = PyString_FromStringAndSize((char *)NULL, reslen);
-	if (result == NULL)
-		return NULL;
-	res = PyString_AsString(result);
-	if (PyTuple_Check(args)) {
-		arglen = PyTuple_GET_SIZE(args);
-		argidx = 0;
-	}
-	else {
-		arglen = -1;
-		argidx = -2;
-	}
-	if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
-	    !PyString_Check(args) && !PyUnicode_Check(args))
-		dict = args;
-	while (--fmtcnt >= 0) {
-		if (*fmt != '%') {
-			if (--rescnt < 0) {
-				rescnt = fmtcnt + 100;
-				reslen += rescnt;
-				if (_PyString_Resize(&result, reslen) < 0)
-					return NULL;
-				res = PyString_AS_STRING(result)
-					+ reslen - rescnt;
-				--rescnt;
-			}
-			*res++ = *fmt++;
-		}
-		else {
-			/* Got a format specifier */
-			int flags = 0;
-			Py_ssize_t width = -1;
-			int prec = -1;
-			int c = '\0';
-			int fill;
-			PyObject *v = NULL;
-			PyObject *temp = NULL;
-			char *pbuf;
-			int sign;
-			Py_ssize_t len;
-			char formatbuf[FORMATBUFLEN];
-			     /* For format{float,int,char}() */
-			char *fmt_start = fmt;
-			Py_ssize_t argidx_start = argidx;
-
-			fmt++;
-			if (*fmt == '(') {
-				char *keystart;
-				Py_ssize_t keylen;
-				PyObject *key;
-				int pcount = 1;
-
-				if (dict == NULL) {
-					PyErr_SetString(PyExc_TypeError,
-						 "format requires a mapping");
-					goto error;
-				}
-				++fmt;
-				--fmtcnt;
-				keystart = fmt;
-				/* Skip over balanced parentheses */
-				while (pcount > 0 && --fmtcnt >= 0) {
-					if (*fmt == ')')
-						--pcount;
-					else if (*fmt == '(')
-						++pcount;
-					fmt++;
-				}
-				keylen = fmt - keystart - 1;
-				if (fmtcnt < 0 || pcount > 0) {
-					PyErr_SetString(PyExc_ValueError,
-						   "incomplete format key");
-					goto error;
-				}
-				key = PyString_FromStringAndSize(keystart,
-								 keylen);
-				if (key == NULL)
-					goto error;
-				if (args_owned) {
-					Py_DECREF(args);
-					args_owned = 0;
-				}
-				args = PyObject_GetItem(dict, key);
-				Py_DECREF(key);
-				if (args == NULL) {
-					goto error;
-				}
-				args_owned = 1;
-				arglen = -1;
-				argidx = -2;
-			}
-			while (--fmtcnt >= 0) {
-				switch (c = *fmt++) {
-				case '-': flags |= F_LJUST; continue;
-				case '+': flags |= F_SIGN; continue;
-				case ' ': flags |= F_BLANK; continue;
-				case '#': flags |= F_ALT; continue;
-				case '0': flags |= F_ZERO; continue;
-				}
-				break;
-			}
-			if (c == '*') {
-				v = getnextarg(args, arglen, &argidx);
-				if (v == NULL)
-					goto error;
-				if (!PyInt_Check(v)) {
-					PyErr_SetString(PyExc_TypeError,
-							"* wants int");
-					goto error;
-				}
-				width = PyInt_AsLong(v);
-				if (width == -1 && PyErr_Occurred())
-					goto error;
-				if (width < 0) {
-					flags |= F_LJUST;
-					width = -width;
-				}
-				if (--fmtcnt >= 0)
-					c = *fmt++;
-			}
-			else if (c >= 0 && ISDIGIT(c)) {
-				width = c - '0';
-				while (--fmtcnt >= 0) {
-					c = Py_CHARMASK(*fmt++);
-					if (!ISDIGIT(c))
-						break;
-					if ((width*10) / 10 != width) {
-						PyErr_SetString(
-							PyExc_ValueError,
-							"width too big");
-						goto error;
-					}
-					width = width*10 + (c - '0');
-				}
-			}
-			if (c == '.') {
-				prec = 0;
-				if (--fmtcnt >= 0)
-					c = *fmt++;
-				if (c == '*') {
-					v = getnextarg(args, arglen, &argidx);
-					if (v == NULL)
-						goto error;
-					if (!PyInt_Check(v)) {
-						PyErr_SetString(
-							PyExc_TypeError,
-							"* wants int");
-						goto error;
-					}
-					prec = PyInt_AsLong(v);
-					if (prec == -1 && PyErr_Occurred())
-						goto error;
-					if (prec < 0)
-						prec = 0;
-					if (--fmtcnt >= 0)
-						c = *fmt++;
-				}
-				else if (c >= 0 && ISDIGIT(c)) {
-					prec = c - '0';
-					while (--fmtcnt >= 0) {
-						c = Py_CHARMASK(*fmt++);
-						if (!ISDIGIT(c))
-							break;
-						if ((prec*10) / 10 != prec) {
-							PyErr_SetString(
-							    PyExc_ValueError,
-							    "prec too big");
-							goto error;
-						}
-						prec = prec*10 + (c - '0');
-					}
-				}
-			} /* prec */
-			if (fmtcnt >= 0) {
-				if (c == 'h' || c == 'l' || c == 'L') {
-					if (--fmtcnt >= 0)
-						c = *fmt++;
-				}
-			}
-			if (fmtcnt < 0) {
-				PyErr_SetString(PyExc_ValueError,
-						"incomplete format");
-				goto error;
-			}
-			if (c != '%') {
-				v = getnextarg(args, arglen, &argidx);
-				if (v == NULL)
-					goto error;
-			}
-			sign = 0;
-			fill = ' ';
-			switch (c) {
-			case '%':
-				pbuf = "%";
-				len = 1;
-				break;
-			case 's':
-				if (PyUnicode_Check(v)) {
-					fmt = fmt_start;
-					argidx = argidx_start;
-					goto unicode;
-				}
-				temp = _PyObject_Str(v);
-				if (temp != NULL && PyUnicode_Check(temp)) {
-					Py_DECREF(temp);
-					fmt = fmt_start;
-					argidx = argidx_start;
-					goto unicode;
-				}
-				/* Fall through */
-			case 'r':
-				if (c == 'r')
-					temp = PyObject_ReprStr8(v);
-				if (temp == NULL)
-					goto error;
-				if (!PyString_Check(temp)) {
-					PyErr_SetString(PyExc_TypeError,
-					  "%s argument has non-string str()/repr()");
-					Py_DECREF(temp);
-					goto error;
-				}
-				pbuf = PyString_AS_STRING(temp);
-				len = PyString_GET_SIZE(temp);
-				if (prec >= 0 && len > prec)
-					len = prec;
-				break;
-			case 'i':
-			case 'd':
-			case 'u':
-			case 'o':
-			case 'x':
-			case 'X':
-				if (c == 'i')
-					c = 'd';
-				if (PyLong_Check(v)) {
-					int ilen;
-					temp = _PyString_FormatLong(v, flags,
-						prec, c, &pbuf, &ilen);
-					len = ilen;
-					if (!temp)
-						goto error;
-					sign = 1;
-				}
-				else {
-					pbuf = formatbuf;
-					len = formatint(pbuf,
-							sizeof(formatbuf),
-							flags, prec, c, v);
-					if (len < 0)
-						goto error;
-					sign = 1;
-				}
-				if (flags & F_ZERO)
-					fill = '0';
-				break;
-			case 'e':
-			case 'E':
-			case 'f':
-			case 'F':
-			case 'g':
-			case 'G':
-				if (c == 'F')
-					c = 'f';
-				pbuf = formatbuf;
-				len = formatfloat(pbuf, sizeof(formatbuf),
-						  flags, prec, c, v);
-				if (len < 0)
-					goto error;
-				sign = 1;
-				if (flags & F_ZERO)
-					fill = '0';
-				break;
-			case 'c':
-				if (PyUnicode_Check(v)) {
-					fmt = fmt_start;
-					argidx = argidx_start;
-					goto unicode;
-				}
-				pbuf = formatbuf;
-				len = formatchar(pbuf, sizeof(formatbuf), v);
-				if (len < 0)
-					goto error;
-				break;
-			default:
-				PyErr_Format(PyExc_ValueError,
-				  "unsupported format character '%c' (0x%x) "
-				  "at index %zd",
-				  c, c,
-				  (Py_ssize_t)(fmt - 1 -
-					       PyString_AsString(format)));
-				goto error;
-			}
-			if (sign) {
-				if (*pbuf == '-' || *pbuf == '+') {
-					sign = *pbuf++;
-					len--;
-				}
-				else if (flags & F_SIGN)
-					sign = '+';
-				else if (flags & F_BLANK)
-					sign = ' ';
-				else
-					sign = 0;
-			}
-			if (width < len)
-				width = len;
-			if (rescnt - (sign != 0) < width) {
-				reslen -= rescnt;
-				rescnt = width + fmtcnt + 100;
-				reslen += rescnt;
-				if (reslen < 0) {
-					Py_DECREF(result);
-					Py_XDECREF(temp);
-					return PyErr_NoMemory();
-				}
-				if (_PyString_Resize(&result, reslen) < 0) {
-					Py_XDECREF(temp);
-					return NULL;
-				}
-				res = PyString_AS_STRING(result)
-					+ reslen - rescnt;
-			}
-			if (sign) {
-				if (fill != ' ')
-					*res++ = sign;
-				rescnt--;
-				if (width > len)
-					width--;
-			}
-			if ((flags & F_ALT) &&
-			    (c == 'x' || c == 'X' || c == 'o')) {
-				assert(pbuf[0] == '0');
-				assert(pbuf[1] == c);
-				if (fill != ' ') {
-					*res++ = *pbuf++;
-					*res++ = *pbuf++;
-				}
-				rescnt -= 2;
-				width -= 2;
-				if (width < 0)
-					width = 0;
-				len -= 2;
-			}
-			if (width > len && !(flags & F_LJUST)) {
-				do {
-					--rescnt;
-					*res++ = fill;
-				} while (--width > len);
-			}
-			if (fill == ' ') {
-				if (sign)
-					*res++ = sign;
-				if ((flags & F_ALT) &&
-				    (c == 'x' || c == 'X' || c == 'o')) {
-					assert(pbuf[0] == '0');
-					assert(pbuf[1] == c);
-					*res++ = *pbuf++;
-					*res++ = *pbuf++;
-				}
-			}
-			Py_MEMCPY(res, pbuf, len);
-			res += len;
-			rescnt -= len;
-			while (--width >= len) {
-				--rescnt;
-				*res++ = ' ';
-			}
-                        if (dict && (argidx < arglen) && c != '%') {
-                                PyErr_SetString(PyExc_TypeError,
-                                           "not all arguments converted during string formatting");
-                                Py_XDECREF(temp);
-                                goto error;
-                        }
-			Py_XDECREF(temp);
-		} /* '%' */
-	} /* until end */
-	if (argidx < arglen && !dict) {
-		PyErr_SetString(PyExc_TypeError,
-				"not all arguments converted during string formatting");
-		goto error;
-	}
-	if (args_owned) {
-		Py_DECREF(args);
-	}
-	_PyString_Resize(&result, reslen - rescnt);
-	return result;
-
- unicode:
-	if (args_owned) {
-		Py_DECREF(args);
-		args_owned = 0;
-	}
-	/* Fiddle args right (remove the first argidx arguments) */
-	if (PyTuple_Check(orig_args) && argidx > 0) {
-		PyObject *v;
-		Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
-		v = PyTuple_New(n);
-		if (v == NULL)
-			goto error;
-		while (--n >= 0) {
-			PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
-			Py_INCREF(w);
-			PyTuple_SET_ITEM(v, n, w);
-		}
-		args = v;
-	} else {
-		Py_INCREF(orig_args);
-		args = orig_args;
-	}
-	args_owned = 1;
-	/* Take what we have of the result and let the Unicode formatting
-	   function format the rest of the input. */
-	rescnt = res - PyString_AS_STRING(result);
-	if (_PyString_Resize(&result, rescnt))
-		goto error;
-	fmtcnt = PyString_GET_SIZE(format) - \
-		 (fmt - PyString_AS_STRING(format));
-	format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
-	if (format == NULL)
-		goto error;
-	v = PyUnicode_Format(format, args);
-	Py_DECREF(format);
-	if (v == NULL)
-		goto error;
-	/* Paste what we have (result) to what the Unicode formatting
-	   function returned (v) and return the result (or error) */
-	w = PyUnicode_Concat(result, v);
-	Py_DECREF(result);
-	Py_DECREF(v);
-	Py_DECREF(args);
-	return w;
-
- error:
-	Py_DECREF(result);
-	if (args_owned) {
-		Py_DECREF(args);
-	}
-	return NULL;
-}
-
-void
-PyString_InternInPlace(PyObject **p)
-{
-	register PyStringObject *s = (PyStringObject *)(*p);
-	PyObject *t;
-	if (s == NULL || !PyString_Check(s))
-		Py_FatalError("PyString_InternInPlace: strings only please!");
-	/* If it's a string subclass, we don't really know what putting
-	   it in the interned dict might do. */
-	if (!PyString_CheckExact(s))
-		return;
-	if (PyString_CHECK_INTERNED(s))
-		return;
-	if (interned == NULL) {
-		interned = PyDict_New();
-		if (interned == NULL) {
-			PyErr_Clear(); /* Don't leave an exception */
-			return;
-		}
-	}
-	t = PyDict_GetItem(interned, (PyObject *)s);
-	if (t) {
-		Py_INCREF(t);
-		Py_DECREF(*p);
-		*p = t;
-		return;
-	}
-
-	if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
-		PyErr_Clear();
-		return;
-	}
-	/* The two references in interned are not counted by refcnt.
-	   The string deallocator will take care of this */
-	Py_Refcnt(s) -= 2;
-	PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
-}
-
-void
-PyString_InternImmortal(PyObject **p)
-{
-	PyString_InternInPlace(p);
-	if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
-		PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
-		Py_INCREF(*p);
-	}
-}
-
-
-PyObject *
-PyString_InternFromString(const char *cp)
-{
-	PyObject *s = PyString_FromString(cp);
-	if (s == NULL)
-		return NULL;
-	PyString_InternInPlace(&s);
-	return s;
-}
-
 void
 PyString_Fini(void)
 {
@@ -4209,58 +3351,6 @@ PyString_Fini(void)
 	nullstring = NULL;
 }
 
-void _Py_ReleaseInternedStrings(void)
-{
-	PyObject *keys;
-	PyStringObject *s;
-	Py_ssize_t i, n;
-	Py_ssize_t immortal_size = 0, mortal_size = 0;
-
-	if (interned == NULL || !PyDict_Check(interned))
-		return;
-	keys = PyDict_Keys(interned);
-	if (keys == NULL || !PyList_Check(keys)) {
-		PyErr_Clear();
-		return;
-	}
-
-	/* Since _Py_ReleaseInternedStrings() is intended to help a leak
-	   detector, interned strings are not forcibly deallocated; rather, we
-	   give them their stolen references back, and then clear and DECREF
-	   the interned dict. */
-
-	n = PyList_GET_SIZE(keys);
-	fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
-		n);
-	for (i = 0; i < n; i++) {
-		s = (PyStringObject *) PyList_GET_ITEM(keys, i);
-		switch (s->ob_sstate) {
-		case SSTATE_NOT_INTERNED:
-			/* XXX Shouldn't happen */
-			break;
-		case SSTATE_INTERNED_IMMORTAL:
-			Py_Refcnt(s) += 1;
-			immortal_size += Py_Size(s);
-			break;
-		case SSTATE_INTERNED_MORTAL:
-			Py_Refcnt(s) += 2;
-			mortal_size += Py_Size(s);
-			break;
-		default:
-			Py_FatalError("Inconsistent interned string state.");
-		}
-		s->ob_sstate = SSTATE_NOT_INTERNED;
-	}
-	fprintf(stderr, "total size of all interned strings: "
-			"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
-			"mortal/immortal\n", mortal_size, immortal_size);
-	Py_DECREF(keys);
-	PyDict_Clear(interned);
-	Py_DECREF(interned);
-	interned = NULL;
-}
-
-
 /*********************** Str Iterator ****************************/
 
 typedef struct {
author	Guido van Rossum <guido@python.org>	2007-11-06 21:34:58 (GMT)
committer	Guido van Rossum <guido@python.org>	2007-11-06 21:34:58 (GMT)
commit	98297ee7815939b124156e438b22bd652d67b5db (patch)
tree	a9d239ebd87c73af2571ab48003984c4e18e27e5 /Objects/stringobject.c
parent	a19f80c6df2df5e8a5d0cff37131097835ef971e (diff)
download	cpython-98297ee7815939b124156e438b22bd652d67b5db.zip cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2