3 files changed, 76 insertions, 72 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 41feae2..368a212 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -454,14 +454,12 @@ extern DL_IMPORT(int) PyUnicode_Resize(
 
    Coercion is done in the following way:
 
-   1. Unicode objects are passed back as-is with incremented
-      refcount.
-
-   2. String and other char buffer compatible objects are decoded
+   1. String and other char buffer compatible objects are decoded
       under the assumptions that they contain data using the current
       default encoding. Decoding is done in "strict" mode.
 
-   3. All other objects raise an exception.
+   2. All other objects (including Unicode objects) raise an
+      exception.
 
    The API returns NULL in case of an error. The caller is responsible
    for decref'ing the returned objects.
@@ -474,12 +472,13 @@ extern DL_IMPORT(PyObject*) PyUnicode_FromEncodedObject(
     const char *errors          /* error handling */
     );
 
-/* Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict");
-   which results in using the default encoding as basis for 
-   decoding the object.
-
-   Coerces obj to an Unicode object and return a reference with
+/* Coerce obj to an Unicode object and return a reference with
    *incremented* refcount.
+   
+   Unicode objects are passed back as-is (subclasses are converted to
+   true Unicode objects), all other objects are delegated to
+   PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
+   using the default encoding as basis for decoding the object.
 
    The API returns NULL in case of an error. The caller is responsible
    for decref'ing the returned objects.
diff --git a/Objects/object.c b/Objects/object.c
index af0c0bb..aa5f87c 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -296,39 +296,50 @@ PyObject_Unicode(PyObject *v)
 	
 	if (v == NULL)
 		res = PyString_FromString("<NULL>");
-	else if (PyUnicode_Check(v)) {
+	if (PyUnicode_CheckExact(v)) {
 		Py_INCREF(v);
 		return v;
 	}
-	else if (PyString_Check(v)) {
+	if (PyUnicode_Check(v)) {
+		/* For a Unicode subtype that's not a Unicode object,
+		   return a true Unicode object with the same data. */
+		return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v),
+					     PyUnicode_GET_SIZE(v));
+	}
+	if (PyString_Check(v)) {
 		Py_INCREF(v);
 	    	res = v;
     	}
-	else if (v->ob_type->tp_str != NULL)
-		res = (*v->ob_type->tp_str)(v);
 	else {
 		PyObject *func;
-		static PyObject *strstr;
-		if (strstr == NULL) {
-			strstr= PyString_InternFromString("__str__");
-			if (strstr == NULL)
+		static PyObject *unicodestr;
+		/* XXX As soon as we have a tp_unicode slot, we should
+		       check this before trying the __unicode__
+		       method. */
+		if (unicodestr == NULL) {
+			unicodestr= PyString_InternFromString(
+						       "__unicode__");
+			if (unicodestr == NULL)
 				return NULL;
 		}
-		if (!PyInstance_Check(v) ||
-		    (func = PyObject_GetAttr(v, strstr)) == NULL) {
-			PyErr_Clear();
-			res = PyObject_Repr(v);
-		}
-		else {
+		func = PyObject_GetAttr(v, unicodestr);
+		if (func != NULL) {
 		    	res = PyEval_CallObject(func, (PyObject *)NULL);
 			Py_DECREF(func);
 		}
+		else {
+			PyErr_Clear();
+			if (v->ob_type->tp_str != NULL)
+				res = (*v->ob_type->tp_str)(v);
+			else
+				res = PyObject_Repr(v);
+		}
 	}
 	if (res == NULL)
 		return NULL;
 	if (!PyUnicode_Check(res)) {
-		PyObject* str;
-		str = PyUnicode_FromObject(res);
+		PyObject *str;
+		str = PyUnicode_FromEncodedObject(res, NULL, "strict");
 		Py_DECREF(res);
 		if (str)
 			res = str;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a252587..a29c75b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -395,6 +395,18 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
 
 PyObject *PyUnicode_FromObject(register PyObject *obj)
 {
+    /* XXX Perhaps we should make this API an alias of
+           PyObject_Unicode() instead ?! */
+    if (PyUnicode_CheckExact(obj)) {
+	Py_INCREF(obj);
+	return obj;
+    }
+    if (PyUnicode_Check(obj)) {
+	/* For a Unicode subtype that's not a Unicode object,
+	   return a true Unicode object with the same data. */
+	return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
+				     PyUnicode_GET_SIZE(obj));
+    }
     return PyUnicode_FromEncodedObject(obj, NULL, "strict");
 }
 
@@ -406,69 +418,49 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
     int len;
     int owned = 0;
     PyObject *v;
-    int reclevel;
     
     if (obj == NULL) {
 	PyErr_BadInternalCall();
 	return NULL;
     }
 
-    /* Coerce object */
-    for (reclevel = 0; reclevel < 2; reclevel++) {
+#if 0
+    /* For b/w compatibility we also accept Unicode objects provided
+       that no encodings is given and then redirect to PyObject_Unicode() 
+       which then applies the additional logic for Unicode subclasses.
+
+       NOTE: This API should really only be used for object which
+             represent *encoded* Unicode !
 
+    */
 	if (PyUnicode_Check(obj)) {
 	    if (encoding) {
 		PyErr_SetString(PyExc_TypeError,
 				"decoding Unicode is not supported");
-		goto onError;
-	    }
-	    if (PyUnicode_CheckExact(obj)) {
-		Py_INCREF(obj);
-		v = obj;
+	    return NULL;
 	    }
-	    else {
-		/* For a subclass of unicode, return a true unicode object
-		   with the same string value. */
-		v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
-					  PyUnicode_GET_SIZE(obj));
+	return PyObject_Unicode(obj);
 	    }
-	    goto done;
+#else
+    if (PyUnicode_Check(obj)) {
+	PyErr_SetString(PyExc_TypeError,
+			"decoding Unicode is not supported");
+	return NULL;
 	}
-	else if (PyString_Check(obj)) {
+#endif
+
+    /* Coerce object */
+    if (PyString_Check(obj)) {
 	    s = PyString_AS_STRING(obj);
 	    len = PyString_GET_SIZE(obj);
-	    break;
-	}
-	else {
-	    PyObject *w;
-
-	    /* Try char buffer interface */
-            if (PyObject_AsCharBuffer(obj, &s, &len))
-		PyErr_Clear();
-	    else
-		break;
-    
-	    /* Mimic the behaviour of str(object) if everything else
-    	       fails (see PyObject_Str()); this also covers instances
-    	       which implement __str__. */
-	    if (obj->ob_type->tp_str == NULL)
-		w = PyObject_Repr(obj);
-	    else
-		w = (*obj->ob_type->tp_str)(obj);
-	    if (w == NULL)
-		goto onError;
-	    if (owned) {
-		Py_DECREF(obj);
 	    }
-	    obj = w;
-	    owned = 1;
-	}
-    }
-
-    if (s == NULL) {
+    else if (PyObject_AsCharBuffer(obj, &s, &len)) {
+	/* Overwrite the error message with something more useful in
+	   case of a TypeError. */
+	if (PyErr_ExceptionMatches(PyExc_TypeError))
 	PyErr_Format(PyExc_TypeError,
-		     "coercing to Unicode: __str__ recursion limit exceeded "
-		     "(last type: %.80s)",
+			 "coercing to Unicode: need string or buffer, "
+			 "%.80s found",
 		     obj->ob_type->tp_name);
 	goto onError;
     }
@@ -481,7 +473,6 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
     else 
 	v = PyUnicode_Decode(s, len, encoding, errors);
 
- done:
     if (owned) {
 	Py_DECREF(obj);
     }
@@ -5653,6 +5644,9 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 	    return NULL;
 	if (x == NULL)
 		return (PyObject *)_PyUnicode_New(0);
+	if (encoding == NULL && errors == NULL)
+	    return PyObject_Unicode(x);
+	else
 	return PyUnicode_FromEncodedObject(x, encoding, errors);
 }