diff options
-rw-r--r-- | Include/unicodeobject.h | 19 | ||||
-rw-r--r-- | Objects/object.c | 43 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 86 |
3 files changed, 76 insertions, 72 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 41feae2..368a212 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -454,14 +454,12 @@ extern DL_IMPORT(int) PyUnicode_Resize( Coercion is done in the following way: - 1. Unicode objects are passed back as-is with incremented - refcount. - - 2. String and other char buffer compatible objects are decoded + 1. String and other char buffer compatible objects are decoded under the assumptions that they contain data using the current default encoding. Decoding is done in "strict" mode. - 3. All other objects raise an exception. + 2. All other objects (including Unicode objects) raise an + exception. The API returns NULL in case of an error. The caller is responsible for decref'ing the returned objects. @@ -474,12 +472,13 @@ extern DL_IMPORT(PyObject*) PyUnicode_FromEncodedObject( const char *errors /* error handling */ ); -/* Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict"); - which results in using the default encoding as basis for - decoding the object. - - Coerces obj to an Unicode object and return a reference with +/* Coerce obj to an Unicode object and return a reference with *incremented* refcount. + + Unicode objects are passed back as-is (subclasses are converted to + true Unicode objects), all other objects are delegated to + PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in + using the default encoding as basis for decoding the object. The API returns NULL in case of an error. The caller is responsible for decref'ing the returned objects. diff --git a/Objects/object.c b/Objects/object.c index af0c0bb..aa5f87c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -296,39 +296,50 @@ PyObject_Unicode(PyObject *v) if (v == NULL) res = PyString_FromString("<NULL>"); - else if (PyUnicode_Check(v)) { + if (PyUnicode_CheckExact(v)) { Py_INCREF(v); return v; } - else if (PyString_Check(v)) { + if (PyUnicode_Check(v)) { + /* For a Unicode subtype that's not a Unicode object, + return a true Unicode object with the same data. */ + return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); + } + if (PyString_Check(v)) { Py_INCREF(v); res = v; } - else if (v->ob_type->tp_str != NULL) - res = (*v->ob_type->tp_str)(v); else { PyObject *func; - static PyObject *strstr; - if (strstr == NULL) { - strstr= PyString_InternFromString("__str__"); - if (strstr == NULL) + static PyObject *unicodestr; + /* XXX As soon as we have a tp_unicode slot, we should + check this before trying the __unicode__ + method. */ + if (unicodestr == NULL) { + unicodestr= PyString_InternFromString( + "__unicode__"); + if (unicodestr == NULL) return NULL; } - if (!PyInstance_Check(v) || - (func = PyObject_GetAttr(v, strstr)) == NULL) { - PyErr_Clear(); - res = PyObject_Repr(v); - } - else { + func = PyObject_GetAttr(v, unicodestr); + if (func != NULL) { res = PyEval_CallObject(func, (PyObject *)NULL); Py_DECREF(func); } + else { + PyErr_Clear(); + if (v->ob_type->tp_str != NULL) + res = (*v->ob_type->tp_str)(v); + else + res = PyObject_Repr(v); + } } if (res == NULL) return NULL; if (!PyUnicode_Check(res)) { - PyObject* str; - str = PyUnicode_FromObject(res); + PyObject *str; + str = PyUnicode_FromEncodedObject(res, NULL, "strict"); Py_DECREF(res); if (str) res = str; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a252587..a29c75b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -395,6 +395,18 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode, PyObject *PyUnicode_FromObject(register PyObject *obj) { + /* XXX Perhaps we should make this API an alias of + PyObject_Unicode() instead ?! */ + if (PyUnicode_CheckExact(obj)) { + Py_INCREF(obj); + return obj; + } + if (PyUnicode_Check(obj)) { + /* For a Unicode subtype that's not a Unicode object, + return a true Unicode object with the same data. */ + return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj), + PyUnicode_GET_SIZE(obj)); + } return PyUnicode_FromEncodedObject(obj, NULL, "strict"); } @@ -406,69 +418,49 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, int len; int owned = 0; PyObject *v; - int reclevel; if (obj == NULL) { PyErr_BadInternalCall(); return NULL; } - /* Coerce object */ - for (reclevel = 0; reclevel < 2; reclevel++) { +#if 0 + /* For b/w compatibility we also accept Unicode objects provided + that no encodings is given and then redirect to PyObject_Unicode() + which then applies the additional logic for Unicode subclasses. + + NOTE: This API should really only be used for object which + represent *encoded* Unicode ! + */ if (PyUnicode_Check(obj)) { if (encoding) { PyErr_SetString(PyExc_TypeError, "decoding Unicode is not supported"); - goto onError; - } - if (PyUnicode_CheckExact(obj)) { - Py_INCREF(obj); - v = obj; + return NULL; } - else { - /* For a subclass of unicode, return a true unicode object - with the same string value. */ - v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj), - PyUnicode_GET_SIZE(obj)); + return PyObject_Unicode(obj); } - goto done; +#else + if (PyUnicode_Check(obj)) { + PyErr_SetString(PyExc_TypeError, + "decoding Unicode is not supported"); + return NULL; } - else if (PyString_Check(obj)) { +#endif + + /* Coerce object */ + if (PyString_Check(obj)) { s = PyString_AS_STRING(obj); len = PyString_GET_SIZE(obj); - break; - } - else { - PyObject *w; - - /* Try char buffer interface */ - if (PyObject_AsCharBuffer(obj, &s, &len)) - PyErr_Clear(); - else - break; - - /* Mimic the behaviour of str(object) if everything else - fails (see PyObject_Str()); this also covers instances - which implement __str__. */ - if (obj->ob_type->tp_str == NULL) - w = PyObject_Repr(obj); - else - w = (*obj->ob_type->tp_str)(obj); - if (w == NULL) - goto onError; - if (owned) { - Py_DECREF(obj); } - obj = w; - owned = 1; - } - } - - if (s == NULL) { + else if (PyObject_AsCharBuffer(obj, &s, &len)) { + /* Overwrite the error message with something more useful in + case of a TypeError. */ + if (PyErr_ExceptionMatches(PyExc_TypeError)) PyErr_Format(PyExc_TypeError, - "coercing to Unicode: __str__ recursion limit exceeded " - "(last type: %.80s)", + "coercing to Unicode: need string or buffer, " + "%.80s found", obj->ob_type->tp_name); goto onError; } @@ -481,7 +473,6 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, else v = PyUnicode_Decode(s, len, encoding, errors); - done: if (owned) { Py_DECREF(obj); } @@ -5653,6 +5644,9 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; if (x == NULL) return (PyObject *)_PyUnicode_New(0); + if (encoding == NULL && errors == NULL) + return PyObject_Unicode(x); + else return PyUnicode_FromEncodedObject(x, encoding, errors); } |