diff options
author | Marc-André Lemburg <mal@egenix.com> | 2006-08-14 10:55:19 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2006-08-14 10:55:19 (GMT) |
commit | 040f76b79c0ce86dc33b9c525fbcd84b2254e559 (patch) | |
tree | e907d6c112d52b1a92d7b98c63023ca338c9a188 | |
parent | e6dd31c50be76a5b57917226e16bdaa6ca20a28f (diff) | |
download | cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.zip cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.gz cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.bz2 |
Slightly revised version of patch #1538956:
Replace UnicodeDecodeErrors raised during == and !=
compares of Unicode and other objects with a new
UnicodeWarning.
All other comparisons continue to raise exceptions.
Exceptions other than UnicodeDecodeErrors are also left
untouched.
-rw-r--r-- | Doc/api/concrete.tex | 25 | ||||
-rw-r--r-- | Doc/api/exceptions.tex | 9 | ||||
-rw-r--r-- | Doc/lib/libexcs.tex | 5 | ||||
-rw-r--r-- | Doc/lib/libwarnings.tex | 3 | ||||
-rw-r--r-- | Include/pyerrors.h | 1 | ||||
-rw-r--r-- | Include/unicodeobject.h | 24 | ||||
-rw-r--r-- | Lib/test/exception_hierarchy.txt | 1 | ||||
-rw-r--r-- | Misc/NEWS | 26 | ||||
-rw-r--r-- | Objects/exceptions.c | 10 | ||||
-rw-r--r-- | Objects/object.c | 17 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 87 |
11 files changed, 171 insertions, 37 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index 4c7487c..cd9d8d5 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -1560,6 +1560,31 @@ They all return \NULL{} or \code{-1} if an exception occurs. greater than, respectively. \end{cfuncdesc} +\begin{cfuncdesc}{int}{PyUnicode_RichCompare}{PyObject *left, + PyObject *right, + int op} + +% This entry could use some polishing - my TeX is too +% rusty these days... (MAL) + + Rich compare two strings and return one of the following: +\begin{verbatim} + - NULL in case an exception was raised + - Py_True or Py_False for successfuly comparisons + - Py_NotImplemented in case the type combination is unknown +\end{verbatim} + + Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in + case the conversion of the arguments to Unicode fails with a + UnicodeDecodeError. + + Possible values for \var{op}: +\begin{verbatim} + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE +\end{verbatim} + +\end{cfuncdesc} + \begin{cfuncdesc}{PyObject*}{PyUnicode_Format}{PyObject *format, PyObject *args} Return a new string object from \var{format} and \var{args}; this diff --git a/Doc/api/exceptions.tex b/Doc/api/exceptions.tex index cb75d50..057c1da 100644 --- a/Doc/api/exceptions.tex +++ b/Doc/api/exceptions.tex @@ -288,10 +288,11 @@ for each thread. names are \samp{PyExc_} followed by the Python exception name. These have the type \ctype{PyObject*}; they are all class objects. Their names are \cdata{PyExc_Warning}, \cdata{PyExc_UserWarning}, - \cdata{PyExc_DeprecationWarning}, \cdata{PyExc_SyntaxWarning}, - \cdata{PyExc_RuntimeWarning}, and \cdata{PyExc_FutureWarning}. - \cdata{PyExc_Warning} is a subclass of \cdata{PyExc_Exception}; the - other warning categories are subclasses of \cdata{PyExc_Warning}. + \cdata{PyExc_UnicodeWarning}, \cdata{PyExc_DeprecationWarning}, + \cdata{PyExc_SyntaxWarning}, \cdata{PyExc_RuntimeWarning}, and + \cdata{PyExc_FutureWarning}. \cdata{PyExc_Warning} is a subclass of + \cdata{PyExc_Exception}; the other warning categories are subclasses + of \cdata{PyExc_Warning}. For information about warning control, see the documentation for the \module{warnings} module and the \programopt{-W} option in the diff --git a/Doc/lib/libexcs.tex b/Doc/lib/libexcs.tex index bef8bf1..6d2a3c5 100644 --- a/Doc/lib/libexcs.tex +++ b/Doc/lib/libexcs.tex @@ -456,6 +456,11 @@ Base class for warnings about probable mistakes in module imports. \versionadded{2.5} \end{excdesc} +\begin{excdesc}{UnicodeWarning} +Base class for warnings related to Unicode. +\versionadded{2.5} +\end{excdesc} + The class hierarchy for built-in exceptions is: \verbatiminput{../../Lib/test/exception_hierarchy.txt} diff --git a/Doc/lib/libwarnings.tex b/Doc/lib/libwarnings.tex index 08c0340..a37a9f5 100644 --- a/Doc/lib/libwarnings.tex +++ b/Doc/lib/libwarnings.tex @@ -76,6 +76,9 @@ features that will be deprecated in the future (ignored by default).} \lineii{ImportWarning}{Base category for warnings triggered during the process of importing a module (ignored by default).} + +\lineii{UnicodeWarning}{Base category for warnings related to Unicode.} + \end{tableii} While these are technically built-in exceptions, they are documented diff --git a/Include/pyerrors.h b/Include/pyerrors.h index ae1d990..9532e32 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -173,6 +173,7 @@ PyAPI_DATA(PyObject *) PyExc_SyntaxWarning; PyAPI_DATA(PyObject *) PyExc_RuntimeWarning; PyAPI_DATA(PyObject *) PyExc_FutureWarning; PyAPI_DATA(PyObject *) PyExc_ImportWarning; +PyAPI_DATA(PyObject *) PyExc_UnicodeWarning; /* Convenience functions */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index c7e07a8..33aa185 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -189,6 +189,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_RSplit PyUnicodeUCS2_RSplit # define PyUnicode_Replace PyUnicodeUCS2_Replace # define PyUnicode_Resize PyUnicodeUCS2_Resize +# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding # define PyUnicode_Split PyUnicodeUCS2_Split # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines @@ -266,6 +267,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_RSplit PyUnicodeUCS4_RSplit # define PyUnicode_Replace PyUnicodeUCS4_Replace # define PyUnicode_Resize PyUnicodeUCS4_Resize +# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding # define PyUnicode_Split PyUnicodeUCS4_Split # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines @@ -1139,6 +1141,28 @@ PyAPI_FUNC(int) PyUnicode_Compare( PyObject *right /* Right string */ ); +/* Rich compare two strings and return one of the following: + + - NULL in case an exception was raised + - Py_True or Py_False for successfuly comparisons + - Py_NotImplemented in case the type combination is unknown + + Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in + case the conversion of the arguments to Unicode fails with a + UnicodeDecodeError. + + Possible values for op: + + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE + +*/ + +PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( + PyObject *left, /* Left string */ + PyObject *right, /* Right string */ + int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ + ); + /* Apply a argument tuple or dictionary to a format string and return the resulting Unicode string. */ diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt index 58131d7..a03f7bb 100644 --- a/Lib/test/exception_hierarchy.txt +++ b/Lib/test/exception_hierarchy.txt @@ -45,3 +45,4 @@ BaseException +-- UserWarning +-- FutureWarning +-- ImportWarning + +-- UnicodeWarning @@ -12,18 +12,18 @@ What's New in Python 2.5 release candidate 1? Core and builtins ----------------- -- Fix segfault when doing string formatting on subclasses of long. - -- Fix bug related to __len__ functions using values > 2**32 on 64-bit machines - with new-style classes. - -- Fix bug related to __len__ functions returning negative values with - classic classes. - -- Patch #1538606, Fix __index__() clipping. There were some problems - discovered with the API and how integers that didn't fit into Py_ssize_t - were handled. This patch attempts to provide enough alternatives - to effectively use __index__. +- Unicode objects will no longer raise an exception when being + compared equal or unequal to a string and causing a + UnicodeDecodeError exception, e.g. as result of a decoding failure. + + Instead, the equal (==) and unequal (!=) comparison operators will + now issue a UnicodeWarning and interpret the two objects as + unequal. The UnicodeWarning can be filtered as desired using + the warning framework, e.g. silenced completely, turned into an + exception, logged, etc. + + Note that compare operators other than equal and unequal will still + raise UnicodeDecodeError exceptions as they've always done. - Bug #1536021: __hash__ may now return long int; the final hash value is obtained by invoking hash on the long int. @@ -99,6 +99,8 @@ Build C API ----- +- New API for Unicode rich comparisons: PyUnicode_RichCompare() + - Bug #1069160. Internal correctness changes were made to ``PyThreadState_SetAsyncExc()``. A test case was added, and the documentation was changed to state that the return value diff --git a/Objects/exceptions.c b/Objects/exceptions.c index be9627c..c3ead69 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -1948,6 +1948,14 @@ SimpleExtendsException(PyExc_Warning, ImportWarning, "Base class for warnings about probable mistakes in module imports"); +/* + * UnicodeWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, UnicodeWarning, + "Base class for warnings about Unicode related problems, mostly\n" + "related to conversion problems."); + + /* Pre-computed MemoryError instance. Best to create this as early as * possible and not wait until a MemoryError is actually raised! */ @@ -2048,6 +2056,7 @@ _PyExc_Init(void) PRE_INIT(RuntimeWarning) PRE_INIT(FutureWarning) PRE_INIT(ImportWarning) + PRE_INIT(UnicodeWarning) m = Py_InitModule4("exceptions", functions, exceptions_doc, (PyObject *)NULL, PYTHON_API_VERSION); @@ -2113,6 +2122,7 @@ _PyExc_Init(void) POST_INIT(RuntimeWarning) POST_INIT(FutureWarning) POST_INIT(ImportWarning) + POST_INIT(UnicodeWarning) PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL); if (!PyExc_MemoryErrorInst) diff --git a/Objects/object.c b/Objects/object.c index 73c8941..b0672f3 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -731,23 +731,6 @@ default_3way_compare(PyObject *v, PyObject *w) return (vv < ww) ? -1 : (vv > ww) ? 1 : 0; } -#ifdef Py_USING_UNICODE - /* Special case for Unicode */ - if (PyUnicode_Check(v) || PyUnicode_Check(w)) { - c = PyUnicode_Compare(v, w); - if (!PyErr_Occurred()) - return c; - /* TypeErrors are ignored: if Unicode coercion fails due - to one of the arguments not having the right type, we - continue as defined by the coercion protocol (see - above). Luckily, decoding errors are reported as - ValueErrors and are not masked by this technique. */ - if (!PyErr_ExceptionMatches(PyExc_TypeError)) - return -2; - PyErr_Clear(); - } -#endif - /* None is smaller than anything */ if (v == Py_None) return -1; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ababda1..f4e3755 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5405,6 +5405,82 @@ onError: return -1; } +PyObject *PyUnicode_RichCompare(PyObject *left, + PyObject *right, + int op) +{ + int result; + + result = PyUnicode_Compare(left, right); + if (result == -1 && PyErr_Occurred()) + goto onError; + + /* Convert the return value to a Boolean */ + switch (op) { + case Py_EQ: + result = (result == 0); + break; + case Py_NE: + result = (result != 0); + break; + case Py_LE: + result = (result <= 0); + break; + case Py_GE: + result = (result >= 0); + break; + case Py_LT: + result = (result == -1); + break; + case Py_GT: + result = (result == 1); + break; + } + return PyBool_FromLong(result); + + onError: + + /* Standard case + + Type errors mean that PyUnicode_FromObject() could not convert + one of the arguments (usually the right hand side) to Unicode, + ie. we can't handle the comparison request. However, it is + possible that the other object knows a comparison method, which + is why we return Py_NotImplemented to give the other object a + chance. + + */ + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + if (op != Py_EQ && op != Py_NE) + return NULL; + + /* Equality comparison. + + This is a special case: we silence any PyExc_UnicodeDecodeError + and instead turn it into a PyErr_UnicodeWarning. + + */ + if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) + return NULL; + PyErr_Clear(); + if (PyErr_Warn(PyExc_UnicodeWarning, + (op == Py_EQ) ? + "Unicode equal comparison " + "failed to convert both arguments to Unicode - " + "interpreting them as being unequal" : + "Unicode unequal comparison " + "failed to convert both arguments to Unicode - " + "interpreting them as being unequal" + ) < 0) + return NULL; + result = (op == Py_NE); + return PyBool_FromLong(result); +} + int PyUnicode_Contains(PyObject *container, PyObject *element) { @@ -6985,11 +7061,14 @@ static PySequenceMethods unicode_as_sequence = { PyUnicode_Contains, /* sq_contains */ }; +#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX) + static PyObject* unicode_subscript(PyUnicodeObject* self, PyObject* item) { - if (PyIndex_Check(item)) { - Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + PyNumberMethods *nb = item->ob_type->tp_as_number; + if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) { + Py_ssize_t i = nb->nb_index(item); if (i == -1 && PyErr_Occurred()) return NULL; if (i < 0) @@ -7859,7 +7938,7 @@ PyTypeObject PyUnicode_Type = { 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ - (cmpfunc) unicode_compare, /* tp_compare */ + 0, /* tp_compare */ unicode_repr, /* tp_repr */ &unicode_as_number, /* tp_as_number */ &unicode_as_sequence, /* tp_as_sequence */ @@ -7875,7 +7954,7 @@ PyTypeObject PyUnicode_Type = { unicode_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ + PyUnicode_RichCompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ |