Implement the changes proposed in patch #413333. unicode(obj) now

works just like str(obj) in that it tries __str__/tp_str on the object in case it finds that the object is not a string or buffer.
author: Marc-André Lemburg <mal@egenix.com> 2001-09-20 12:53:16 (GMT)
committer: Marc-André Lemburg <mal@egenix.com> 2001-09-20 12:53:16 (GMT)
commit: 6871f6ac57baca19facf5b49846b101c60ef3334 (patch)
tree: 033ba35ab876fc889c2691704e3d7e651ae57fa5
parent: c60e6f777114f43c64f1b83f9ad2b6e4efd220e7 (diff)
download: cpython-6871f6ac57baca19facf5b49846b101c60ef3334.zip
cpython-6871f6ac57baca19facf5b49846b101c60ef3334.tar.gz
cpython-6871f6ac57baca19facf5b49846b101c60ef3334.tar.bz2
2 files changed, 60 insertions, 42 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index d57328d..d508bef 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -429,6 +429,7 @@ verify(unicode('hello','utf-8') == u'hello')
 verify(unicode('hello','utf8') == u'hello')
 verify(unicode('hello','latin-1') == u'hello')
 
+# Compatibility to str():
 class String:
     x = ''
     def __str__(self):
@@ -444,6 +445,10 @@ o.x = u'abc'
 verify(unicode(o) == u'abc')
 verify(str(o) == 'abc')
 
+for obj in (123, 123.45, 123L):
+    verify(unicode(obj) == unicode(str(obj)))
+
+# Error handling
 try:
     u'Andr\202 x'.encode('ascii')
     u'Andr\202 x'.encode('ascii','strict')
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 50f2f5c..896e80f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -398,10 +398,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
 				      const char *encoding,
 				      const char *errors)
 {
-    const char *s;
+    const char *s = NULL;
     int len;
     int owned = 0;
     PyObject *v;
+    int reclevel;
     
     if (obj == NULL) {
 	PyErr_BadInternalCall();
@@ -409,53 +410,65 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
     }
 
     /* Coerce object */
-    if (PyInstance_Check(obj)) {
-	PyObject *func;
-	func = PyObject_GetAttrString(obj, "__str__");
-	if (func == NULL) {
-	    PyErr_SetString(PyExc_TypeError,
-		  "coercing to Unicode: instance doesn't define __str__");
-	    return NULL;
+    for (reclevel = 0; reclevel < 2; reclevel++) {
+
+	if (PyUnicode_Check(obj)) {
+	    if (encoding) {
+		PyErr_SetString(PyExc_TypeError,
+				"decoding Unicode is not supported");
+		goto onError;
+	    }
+	    if (PyUnicode_CheckExact(obj)) {
+		Py_INCREF(obj);
+		v = obj;
+	    }
+	    else {
+		/* For a subclass of unicode, return a true unicode object
+		   with the same string value. */
+		v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
+					  PyUnicode_GET_SIZE(obj));
+	    }
+	    goto done;
 	}
-	obj = PyEval_CallObject(func, NULL);
-	Py_DECREF(func);
-	if (obj == NULL)
-	    return NULL;
-	owned = 1;
-    }
-    if (PyUnicode_Check(obj)) {
-	if (encoding) {
-            PyErr_SetString(PyExc_TypeError,
-			    "decoding Unicode is not supported");
-            return NULL;
+	else if (PyString_Check(obj)) {
+	    s = PyString_AS_STRING(obj);
+	    len = PyString_GET_SIZE(obj);
+	    break;
 	}
-        if (PyUnicode_CheckExact(obj)) {
-	    Py_INCREF(obj);
-            v = obj;
+	else {
+	    PyObject *w;
+
+	    /* Try char buffer interface */
+            if (PyObject_AsCharBuffer(obj, &s, &len))
+		PyErr_Clear();
+	    else
+		break;
+    
+	    /* Mimic the behaviour of str(object) if everything else
+    	       fails (see PyObject_Str()); this also covers instances
+    	       which implement __str__. */
+	    if (obj->ob_type->tp_str == NULL)
+		w = PyObject_Repr(obj);
+	    else
+		w = (*obj->ob_type->tp_str)(obj);
+	    if (w == NULL)
+		goto onError;
+	    if (owned) {
+		Py_DECREF(obj);
+	    }
+	    obj = w;
+	    owned = 1;
 	}
-        else {
-            /* For a subclass of unicode, return a true unicode object
-               with the same string value. */
-            v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
-                                      PyUnicode_GET_SIZE(obj));
-        }
-	goto done;
     }
-    else if (PyString_Check(obj)) {
-	s = PyString_AS_STRING(obj);
-	len = PyString_GET_SIZE(obj);
-    }
-    else if (PyObject_AsCharBuffer(obj, &s, &len)) {
-	/* Overwrite the error message with something more useful in
-	   case of a TypeError. */
-	if (PyErr_ExceptionMatches(PyExc_TypeError))
-	    PyErr_Format(PyExc_TypeError,
-			 "coercing to Unicode: need string or buffer, "
-			 "%.80s found",
-			 obj->ob_type->tp_name);
+
+    if (s == NULL) {
+	PyErr_Format(PyExc_TypeError,
+		     "coercing to Unicode: __str__ recursion limit exceeded "
+		     "(last type: %.80s)",
+		     obj->ob_type->tp_name);
 	goto onError;
     }
-
+    
     /* Convert to Unicode */
     if (len == 0) {
 	Py_INCREF(unicode_empty);
author	Marc-André Lemburg <mal@egenix.com>	2001-09-20 12:53:16 (GMT)
committer	Marc-André Lemburg <mal@egenix.com>	2001-09-20 12:53:16 (GMT)
commit	6871f6ac57baca19facf5b49846b101c60ef3334 (patch)
tree	033ba35ab876fc889c2691704e3d7e651ae57fa5
parent	c60e6f777114f43c64f1b83f9ad2b6e4efd220e7 (diff)
download	cpython-6871f6ac57baca19facf5b49846b101c60ef3334.zip cpython-6871f6ac57baca19facf5b49846b101c60ef3334.tar.gz cpython-6871f6ac57baca19facf5b49846b101c60ef3334.tar.bz2