summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/unicodeobject.h19
-rw-r--r--Objects/object.c43
-rw-r--r--Objects/unicodeobject.c86
3 files changed, 76 insertions, 72 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 41feae2..368a212 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -454,14 +454,12 @@ extern DL_IMPORT(int) PyUnicode_Resize(
Coercion is done in the following way:
- 1. Unicode objects are passed back as-is with incremented
- refcount.
-
- 2. String and other char buffer compatible objects are decoded
+ 1. String and other char buffer compatible objects are decoded
under the assumptions that they contain data using the current
default encoding. Decoding is done in "strict" mode.
- 3. All other objects raise an exception.
+ 2. All other objects (including Unicode objects) raise an
+ exception.
The API returns NULL in case of an error. The caller is responsible
for decref'ing the returned objects.
@@ -474,12 +472,13 @@ extern DL_IMPORT(PyObject*) PyUnicode_FromEncodedObject(
const char *errors /* error handling */
);
-/* Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict");
- which results in using the default encoding as basis for
- decoding the object.
-
- Coerces obj to an Unicode object and return a reference with
+/* Coerce obj to an Unicode object and return a reference with
*incremented* refcount.
+
+ Unicode objects are passed back as-is (subclasses are converted to
+ true Unicode objects), all other objects are delegated to
+ PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
+ using the default encoding as basis for decoding the object.
The API returns NULL in case of an error. The caller is responsible
for decref'ing the returned objects.
diff --git a/Objects/object.c b/Objects/object.c
index af0c0bb..aa5f87c 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -296,39 +296,50 @@ PyObject_Unicode(PyObject *v)
if (v == NULL)
res = PyString_FromString("<NULL>");
- else if (PyUnicode_Check(v)) {
+ if (PyUnicode_CheckExact(v)) {
Py_INCREF(v);
return v;
}
- else if (PyString_Check(v)) {
+ if (PyUnicode_Check(v)) {
+ /* For a Unicode subtype that's not a Unicode object,
+ return a true Unicode object with the same data. */
+ return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v),
+ PyUnicode_GET_SIZE(v));
+ }
+ if (PyString_Check(v)) {
Py_INCREF(v);
res = v;
}
- else if (v->ob_type->tp_str != NULL)
- res = (*v->ob_type->tp_str)(v);
else {
PyObject *func;
- static PyObject *strstr;
- if (strstr == NULL) {
- strstr= PyString_InternFromString("__str__");
- if (strstr == NULL)
+ static PyObject *unicodestr;
+ /* XXX As soon as we have a tp_unicode slot, we should
+ check this before trying the __unicode__
+ method. */
+ if (unicodestr == NULL) {
+ unicodestr= PyString_InternFromString(
+ "__unicode__");
+ if (unicodestr == NULL)
return NULL;
}
- if (!PyInstance_Check(v) ||
- (func = PyObject_GetAttr(v, strstr)) == NULL) {
- PyErr_Clear();
- res = PyObject_Repr(v);
- }
- else {
+ func = PyObject_GetAttr(v, unicodestr);
+ if (func != NULL) {
res = PyEval_CallObject(func, (PyObject *)NULL);
Py_DECREF(func);
}
+ else {
+ PyErr_Clear();
+ if (v->ob_type->tp_str != NULL)
+ res = (*v->ob_type->tp_str)(v);
+ else
+ res = PyObject_Repr(v);
+ }
}
if (res == NULL)
return NULL;
if (!PyUnicode_Check(res)) {
- PyObject* str;
- str = PyUnicode_FromObject(res);
+ PyObject *str;
+ str = PyUnicode_FromEncodedObject(res, NULL, "strict");
Py_DECREF(res);
if (str)
res = str;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a252587..a29c75b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -395,6 +395,18 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
PyObject *PyUnicode_FromObject(register PyObject *obj)
{
+ /* XXX Perhaps we should make this API an alias of
+ PyObject_Unicode() instead ?! */
+ if (PyUnicode_CheckExact(obj)) {
+ Py_INCREF(obj);
+ return obj;
+ }
+ if (PyUnicode_Check(obj)) {
+ /* For a Unicode subtype that's not a Unicode object,
+ return a true Unicode object with the same data. */
+ return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
+ PyUnicode_GET_SIZE(obj));
+ }
return PyUnicode_FromEncodedObject(obj, NULL, "strict");
}
@@ -406,69 +418,49 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
int len;
int owned = 0;
PyObject *v;
- int reclevel;
if (obj == NULL) {
PyErr_BadInternalCall();
return NULL;
}
- /* Coerce object */
- for (reclevel = 0; reclevel < 2; reclevel++) {
+#if 0
+ /* For b/w compatibility we also accept Unicode objects provided
+ that no encodings is given and then redirect to PyObject_Unicode()
+ which then applies the additional logic for Unicode subclasses.
+
+ NOTE: This API should really only be used for object which
+ represent *encoded* Unicode !
+ */
if (PyUnicode_Check(obj)) {
if (encoding) {
PyErr_SetString(PyExc_TypeError,
"decoding Unicode is not supported");
- goto onError;
- }
- if (PyUnicode_CheckExact(obj)) {
- Py_INCREF(obj);
- v = obj;
+ return NULL;
}
- else {
- /* For a subclass of unicode, return a true unicode object
- with the same string value. */
- v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
- PyUnicode_GET_SIZE(obj));
+ return PyObject_Unicode(obj);
}
- goto done;
+#else
+ if (PyUnicode_Check(obj)) {
+ PyErr_SetString(PyExc_TypeError,
+ "decoding Unicode is not supported");
+ return NULL;
}
- else if (PyString_Check(obj)) {
+#endif
+
+ /* Coerce object */
+ if (PyString_Check(obj)) {
s = PyString_AS_STRING(obj);
len = PyString_GET_SIZE(obj);
- break;
- }
- else {
- PyObject *w;
-
- /* Try char buffer interface */
- if (PyObject_AsCharBuffer(obj, &s, &len))
- PyErr_Clear();
- else
- break;
-
- /* Mimic the behaviour of str(object) if everything else
- fails (see PyObject_Str()); this also covers instances
- which implement __str__. */
- if (obj->ob_type->tp_str == NULL)
- w = PyObject_Repr(obj);
- else
- w = (*obj->ob_type->tp_str)(obj);
- if (w == NULL)
- goto onError;
- if (owned) {
- Py_DECREF(obj);
}
- obj = w;
- owned = 1;
- }
- }
-
- if (s == NULL) {
+ else if (PyObject_AsCharBuffer(obj, &s, &len)) {
+ /* Overwrite the error message with something more useful in
+ case of a TypeError. */
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
- "coercing to Unicode: __str__ recursion limit exceeded "
- "(last type: %.80s)",
+ "coercing to Unicode: need string or buffer, "
+ "%.80s found",
obj->ob_type->tp_name);
goto onError;
}
@@ -481,7 +473,6 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
else
v = PyUnicode_Decode(s, len, encoding, errors);
- done:
if (owned) {
Py_DECREF(obj);
}
@@ -5653,6 +5644,9 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
if (x == NULL)
return (PyObject *)_PyUnicode_New(0);
+ if (encoding == NULL && errors == NULL)
+ return PyObject_Unicode(x);
+ else
return PyUnicode_FromEncodedObject(x, encoding, errors);
}