From ad7c98e264bbc9c84e911417c8770f6e95ffb794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Wed, 17 Jan 2001 17:09:53 +0000 Subject: This patch adds a new builtin unistr() which behaves like str() except that it always returns Unicode objects. A new C API PyObject_Unicode() is also provided. This closes patch #101664. Written by Marc-Andre Lemburg. Copyright assigned to Guido van Rossum. --- Doc/api/api.tex | 20 +++++++++++++------ Doc/lib/libfuncs.tex | 6 ++++++ Include/abstract.h | 12 +++++++++++ Include/object.h | 1 + Lib/test/output/test_builtin | 1 + Lib/test/test_b2.py | 11 +++++++++++ Misc/NEWS | 8 ++++++++ Objects/object.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ Objects/unicodeobject.c | 1 + Python/bltinmodule.c | 18 +++++++++++++++++ 10 files changed, 119 insertions(+), 6 deletions(-) diff --git a/Doc/api/api.tex b/Doc/api/api.tex index e81edab..da26364 100644 --- a/Doc/api/api.tex +++ b/Doc/api/api.tex @@ -4,7 +4,7 @@ \input{boilerplate} -\makeindex % tell \index to actually write the .idx file +\makeindex % tell \index to actually write the .idx file \begin{document} @@ -1476,6 +1476,14 @@ by the \keyword{print} statement. \end{cfuncdesc} +\begin{cfuncdesc}{PyObject*}{PyObject_Unicode}{PyObject *o} +Compute a Unicode string representation of object \var{o}. Returns the +Unicode string representation on success, \NULL{} on failure. This is +the equivalent of the Python expression \samp{unistr(\var{o})}. +Called by the \function{unistr()}\bifuncindex{unistr} built-in function. +\end{cfuncdesc} + + \begin{cfuncdesc}{int}{PyCallable_Check}{PyObject *o} Determine if the object \var{o} is callable. Return \code{1} if the object is callable and \code{0} otherwise. @@ -3780,14 +3788,14 @@ Returns true if its argument is a \ctype{PyCObject}. \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PyCObject_FromVoidPtr}{void* cobj, - void (*destr)(void *)} + void (*destr)(void *)} Creates a \ctype{PyCObject} from the \code{void *}\var{cobj}. The \var{destr} function will be called when the object is reclaimed, unless it is \NULL. \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PyCObject_FromVoidPtrAndDesc}{void* cobj, - void* desc, void (*destr)(void *, void *) } + void* desc, void (*destr)(void *, void *) } Creates a \ctype{PyCObject} from the \ctype{void *}\var{cobj}. The \var{destr} function will be called when the object is reclaimed. The \var{desc} argument can be used to pass extra callback data for the @@ -4661,11 +4669,11 @@ implementing new object types in C. \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PyObject_Init}{PyObject *op, - PyTypeObject *type} + PyTypeObject *type} \end{cfuncdesc} \begin{cfuncdesc}{PyVarObject*}{PyObject_InitVar}{PyVarObject *op, - PyTypeObject *type, int size} + PyTypeObject *type, int size} \end{cfuncdesc} \begin{cfuncdesc}{\var{TYPE}*}{PyObject_New}{TYPE, PyTypeObject *type} @@ -4909,6 +4917,6 @@ The function cannot fail. \chapter{Reporting Bugs} \input{reportingbugs} -\input{api.ind} % Index -- must be last +\input{api.ind} % Index -- must be last \end{document} diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex index 2f8dcf9..6b8d64c 100644 --- a/Doc/lib/libfuncs.tex +++ b/Doc/lib/libfuncs.tex @@ -694,6 +694,12 @@ to decode UTF-8 in strict mode, meaning that encoding errors raise \versionadded{2.0} \end{funcdesc} +\begin{funcdesc}{unistr}{object} +Return a Unicode string containing a nicely printable representation of an +object. For Unicode, this returns the Unicode string itself. For +all other objects, it tries to convert \code{str(\var{object})] to Unicode. +\end{funcdesc} + \begin{funcdesc}{vars}{\optional{object}} Without arguments, return a dictionary corresponding to the current local symbol table. With a module, class or class instance object as diff --git a/Include/abstract.h b/Include/abstract.h index 9c18bbd..2c0e735 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -271,6 +271,18 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ */ + /* Implemented elsewhere: + + PyObject *PyObject_Unicode(PyObject *o); + + Compute the unicode representation of object, o. Returns the + unicode representation on success, NULL on failure. This is + the equivalent of the Python expression: unistr(o).) + + Called by the unistr() built-in function. + + */ + DL_IMPORT(int) PyCallable_Check(PyObject *o); /* diff --git a/Include/object.h b/Include/object.h index 0ebe805..515ce12 100644 --- a/Include/object.h +++ b/Include/object.h @@ -266,6 +266,7 @@ extern DL_IMPORT(PyTypeObject) PyType_Type; /* The type of type objects */ extern DL_IMPORT(int) PyObject_Print(PyObject *, FILE *, int); extern DL_IMPORT(PyObject *) PyObject_Repr(PyObject *); extern DL_IMPORT(PyObject *) PyObject_Str(PyObject *); +extern DL_IMPORT(PyObject *) PyObject_Unicode(PyObject *); extern DL_IMPORT(int) PyObject_Compare(PyObject *, PyObject *); extern DL_IMPORT(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); extern DL_IMPORT(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); diff --git a/Lib/test/output/test_builtin b/Lib/test/output/test_builtin index 1c3b69c..29c6f86 100644 --- a/Lib/test/output/test_builtin +++ b/Lib/test/output/test_builtin @@ -45,6 +45,7 @@ repr round setattr str +unistr tuple type vars diff --git a/Lib/test/test_b2.py b/Lib/test/test_b2.py index 5546d8a..9871652 100644 --- a/Lib/test/test_b2.py +++ b/Lib/test/test_b2.py @@ -214,6 +214,17 @@ if str(()) != '()': raise TestFailed, 'str(())' if str([]) != '[]': raise TestFailed, 'str([])' if str({}) != '{}': raise TestFailed, 'str({})' +print 'unistr' +if unistr('') <> u'': raise TestFailed, 'unistr(\'\')' +if unistr('a') <> u'a': raise TestFailed, 'unistr(\'a\')' +if unistr(u'') <> u'': raise TestFailed, 'unistr(u\'\')' +if unistr(u'a') <> u'a': raise TestFailed, 'unistr(u\'a\')' +if unistr(0) <> u'0': raise TestFailed, 'unistr(0)' +if unistr(0L) <> u'0': raise TestFailed, 'unistr(0L)' +if unistr(()) <> u'()': raise TestFailed, 'unistr(())' +if unistr([]) <> u'[]': raise TestFailed, 'unistr([])' +if unistr({}) <> u'{}': raise TestFailed, 'unistr({})' + print 'tuple' if tuple(()) != (): raise TestFailed, 'tuple(())' if tuple((0, 1, 2, 3)) != (0, 1, 2, 3): raise TestFailed, 'tuple((0, 1, 2, 3))' diff --git a/Misc/NEWS b/Misc/NEWS index 15f4513..27355ed 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -3,6 +3,14 @@ What's New in Python 2.1 alpha 1? Core language, builtins, and interpreter +- There is a new Unicode companion to the builtin str() function + called unistr(). Like str(), it calls either the tp_str slot of + objects or the "__str__" method and converts the returned value + to an Unicode object (in case this is necessary). + + The unistr() is complemented by a new PyObject_Unicode() C API + which behaves in the same way. + - The comparison operators support "rich comparison overloading" (PEP 207). C extension types can provide a rich comparison function in the new tp_richcompare slot in the type object. The cmp() function diff --git a/Objects/object.c b/Objects/object.c index 3cad241..20950c1 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -568,6 +568,53 @@ get_inprogress_dict(void) return inprogress; } +PyObject * +PyObject_Unicode(PyObject *v) +{ + PyObject *res; + + if (v == NULL) + res = PyString_FromString(""); + else if (PyUnicode_Check(v)) { + Py_INCREF(v); + return v; + } + else if (PyString_Check(v)) + res = v; + else if (v->ob_type->tp_str != NULL) + res = (*v->ob_type->tp_str)(v); + else { + PyObject *func; + static PyObject *strstr; + if (strstr == NULL) { + strstr= PyString_InternFromString("__str__"); + if (strstr == NULL) + return NULL; + } + if (!PyInstance_Check(v) || + (func = PyObject_GetAttr(v, strstr)) == NULL) { + PyErr_Clear(); + res = PyObject_Repr(v); + } + else { + res = PyEval_CallObject(func, (PyObject *)NULL); + Py_DECREF(func); + } + } + if (res == NULL) + return NULL; + if (!PyUnicode_Check(res)) { + PyObject* str; + str = PyUnicode_FromObject(res); + Py_DECREF(res); + if (str) + res = str; + else + return NULL; + } + return res; +} + static PyObject * make_pair(PyObject *v, PyObject *w) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a3678d5..c1f3d54 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -413,6 +413,7 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, } else v = PyUnicode_Decode(s, len, encoding, errors); + done: if (owned) { Py_DECREF(obj); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 683eec0..3acd0e2 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1928,6 +1928,23 @@ If the argument is a string, the return value is the same object."; static PyObject * +builtin_unistr(PyObject *self, PyObject *args) +{ + PyObject *v; + + if (!PyArg_ParseTuple(args, "O:unistr", &v)) + return NULL; + return PyObject_Unicode(v); +} + +static char unistr_doc[] = +"unistr(object) -> unicode\n\ +\n\ +Return a nice unicode representation of the object.\n\ +If the argument is a unicode, the return value is the same object."; + + +static PyObject * builtin_tuple(PyObject *self, PyObject *args) { PyObject *v; @@ -2242,6 +2259,7 @@ static PyMethodDef builtin_methods[] = { {"type", builtin_type, 1, type_doc}, {"unicode", builtin_unicode, 1, unicode_doc}, {"unichr", builtin_unichr, 1, unichr_doc}, + {"unistr", builtin_unistr, 1, unistr_doc}, {"vars", builtin_vars, 1, vars_doc}, {"xrange", builtin_xrange, 1, xrange_doc}, {"zip", builtin_zip, 1, zip_doc}, -- cgit v0.12