diff options
author | Guido van Rossum <guido@python.org> | 2001-10-19 02:01:31 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2001-10-19 02:01:31 (GMT) |
commit | b8c65bc27ffc61c659180c351d3cc283abd1be45 (patch) | |
tree | 45f9f5c11d6ea41fa7e78aaab2b9531ec1f9cf92 /Objects/object.c | |
parent | 93505a2f2b468bed3ec2ba013f07ee063cf618e8 (diff) | |
download | cpython-b8c65bc27ffc61c659180c351d3cc283abd1be45.zip cpython-b8c65bc27ffc61c659180c351d3cc283abd1be45.tar.gz cpython-b8c65bc27ffc61c659180c351d3cc283abd1be45.tar.bz2 |
SF patch #470578: Fixes to synchronize unicode() and str()
This patch implements what we have discussed on python-dev late in
September: str(obj) and unicode(obj) should behave similar, while
the old behaviour is retained for unicode(obj, encoding, errors).
The patch also adds a new feature with which objects can provide
unicode(obj) with input data: the __unicode__ method. Currently no
new tp_unicode slot is implemented; this is left as option for the
future.
Note that PyUnicode_FromEncodedObject() no longer accepts Unicode
objects as input. The API name already suggests that Unicode
objects do not belong in the list of acceptable objects and the
functionality was only needed because
PyUnicode_FromEncodedObject() was being used directly by
unicode(). The latter was changed in the discussed way:
* unicode(obj) calls PyObject_Unicode()
* unicode(obj, encoding, errors) calls PyUnicode_FromEncodedObject()
One thing left open to discussion is whether to leave the
PyUnicode_FromObject() API as a thin API extension on top of
PyUnicode_FromEncodedObject() or to turn it into a (macro) alias
for PyObject_Unicode() and deprecate it. Doing so would have some
surprising consequences though, e.g. u"abc" + 123 would turn out
as u"abc123"...
[Marc-Andre didn't have time to check this in before the deadline. I
hope this is OK, Marc-Andre! You can still make changes and commit
them on the trunk after the branch has been made, but then please mail
Barry a context diff if you want the change to be merged into the
2.2b1 release branch. GvR]
Diffstat (limited to 'Objects/object.c')
-rw-r--r-- | Objects/object.c | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/Objects/object.c b/Objects/object.c index af0c0bb..aa5f87c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -296,39 +296,50 @@ PyObject_Unicode(PyObject *v) if (v == NULL) res = PyString_FromString("<NULL>"); - else if (PyUnicode_Check(v)) { + if (PyUnicode_CheckExact(v)) { Py_INCREF(v); return v; } - else if (PyString_Check(v)) { + if (PyUnicode_Check(v)) { + /* For a Unicode subtype that's not a Unicode object, + return a true Unicode object with the same data. */ + return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); + } + if (PyString_Check(v)) { Py_INCREF(v); res = v; } - else if (v->ob_type->tp_str != NULL) - res = (*v->ob_type->tp_str)(v); else { PyObject *func; - static PyObject *strstr; - if (strstr == NULL) { - strstr= PyString_InternFromString("__str__"); - if (strstr == NULL) + static PyObject *unicodestr; + /* XXX As soon as we have a tp_unicode slot, we should + check this before trying the __unicode__ + method. */ + if (unicodestr == NULL) { + unicodestr= PyString_InternFromString( + "__unicode__"); + if (unicodestr == NULL) return NULL; } - if (!PyInstance_Check(v) || - (func = PyObject_GetAttr(v, strstr)) == NULL) { - PyErr_Clear(); - res = PyObject_Repr(v); - } - else { + func = PyObject_GetAttr(v, unicodestr); + if (func != NULL) { res = PyEval_CallObject(func, (PyObject *)NULL); Py_DECREF(func); } + else { + PyErr_Clear(); + if (v->ob_type->tp_str != NULL) + res = (*v->ob_type->tp_str)(v); + else + res = PyObject_Repr(v); + } } if (res == NULL) return NULL; if (!PyUnicode_Check(res)) { - PyObject* str; - str = PyUnicode_FromObject(res); + PyObject *str; + str = PyUnicode_FromEncodedObject(res, NULL, "strict"); Py_DECREF(res); if (str) res = str; |