summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2001-09-20 12:53:16 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2001-09-20 12:53:16 (GMT)
commit6871f6ac57baca19facf5b49846b101c60ef3334 (patch)
tree033ba35ab876fc889c2691704e3d7e651ae57fa5
parentc60e6f777114f43c64f1b83f9ad2b6e4efd220e7 (diff)
downloadcpython-6871f6ac57baca19facf5b49846b101c60ef3334.zip
cpython-6871f6ac57baca19facf5b49846b101c60ef3334.tar.gz
cpython-6871f6ac57baca19facf5b49846b101c60ef3334.tar.bz2
Implement the changes proposed in patch #413333. unicode(obj) now
works just like str(obj) in that it tries __str__/tp_str on the object in case it finds that the object is not a string or buffer.
-rw-r--r--Lib/test/test_unicode.py5
-rw-r--r--Objects/unicodeobject.c97
2 files changed, 60 insertions, 42 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index d57328d..d508bef 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -429,6 +429,7 @@ verify(unicode('hello','utf-8') == u'hello')
verify(unicode('hello','utf8') == u'hello')
verify(unicode('hello','latin-1') == u'hello')
+# Compatibility to str():
class String:
x = ''
def __str__(self):
@@ -444,6 +445,10 @@ o.x = u'abc'
verify(unicode(o) == u'abc')
verify(str(o) == 'abc')
+for obj in (123, 123.45, 123L):
+ verify(unicode(obj) == unicode(str(obj)))
+
+# Error handling
try:
u'Andr\202 x'.encode('ascii')
u'Andr\202 x'.encode('ascii','strict')
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 50f2f5c..896e80f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -398,10 +398,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
const char *encoding,
const char *errors)
{
- const char *s;
+ const char *s = NULL;
int len;
int owned = 0;
PyObject *v;
+ int reclevel;
if (obj == NULL) {
PyErr_BadInternalCall();
@@ -409,53 +410,65 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
}
/* Coerce object */
- if (PyInstance_Check(obj)) {
- PyObject *func;
- func = PyObject_GetAttrString(obj, "__str__");
- if (func == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "coercing to Unicode: instance doesn't define __str__");
- return NULL;
+ for (reclevel = 0; reclevel < 2; reclevel++) {
+
+ if (PyUnicode_Check(obj)) {
+ if (encoding) {
+ PyErr_SetString(PyExc_TypeError,
+ "decoding Unicode is not supported");
+ goto onError;
+ }
+ if (PyUnicode_CheckExact(obj)) {
+ Py_INCREF(obj);
+ v = obj;
+ }
+ else {
+ /* For a subclass of unicode, return a true unicode object
+ with the same string value. */
+ v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
+ PyUnicode_GET_SIZE(obj));
+ }
+ goto done;
}
- obj = PyEval_CallObject(func, NULL);
- Py_DECREF(func);
- if (obj == NULL)
- return NULL;
- owned = 1;
- }
- if (PyUnicode_Check(obj)) {
- if (encoding) {
- PyErr_SetString(PyExc_TypeError,
- "decoding Unicode is not supported");
- return NULL;
+ else if (PyString_Check(obj)) {
+ s = PyString_AS_STRING(obj);
+ len = PyString_GET_SIZE(obj);
+ break;
}
- if (PyUnicode_CheckExact(obj)) {
- Py_INCREF(obj);
- v = obj;
+ else {
+ PyObject *w;
+
+ /* Try char buffer interface */
+ if (PyObject_AsCharBuffer(obj, &s, &len))
+ PyErr_Clear();
+ else
+ break;
+
+ /* Mimic the behaviour of str(object) if everything else
+ fails (see PyObject_Str()); this also covers instances
+ which implement __str__. */
+ if (obj->ob_type->tp_str == NULL)
+ w = PyObject_Repr(obj);
+ else
+ w = (*obj->ob_type->tp_str)(obj);
+ if (w == NULL)
+ goto onError;
+ if (owned) {
+ Py_DECREF(obj);
+ }
+ obj = w;
+ owned = 1;
}
- else {
- /* For a subclass of unicode, return a true unicode object
- with the same string value. */
- v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
- PyUnicode_GET_SIZE(obj));
- }
- goto done;
}
- else if (PyString_Check(obj)) {
- s = PyString_AS_STRING(obj);
- len = PyString_GET_SIZE(obj);
- }
- else if (PyObject_AsCharBuffer(obj, &s, &len)) {
- /* Overwrite the error message with something more useful in
- case of a TypeError. */
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- PyErr_Format(PyExc_TypeError,
- "coercing to Unicode: need string or buffer, "
- "%.80s found",
- obj->ob_type->tp_name);
+
+ if (s == NULL) {
+ PyErr_Format(PyExc_TypeError,
+ "coercing to Unicode: __str__ recursion limit exceeded "
+ "(last type: %.80s)",
+ obj->ob_type->tp_name);
goto onError;
}
-
+
/* Convert to Unicode */
if (len == 0) {
Py_INCREF(unicode_empty);