summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2006-08-14 10:55:19 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2006-08-14 10:55:19 (GMT)
commit040f76b79c0ce86dc33b9c525fbcd84b2254e559 (patch)
treee907d6c112d52b1a92d7b98c63023ca338c9a188
parente6dd31c50be76a5b57917226e16bdaa6ca20a28f (diff)
downloadcpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.zip
cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.gz
cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.bz2
Slightly revised version of patch #1538956:
Replace UnicodeDecodeErrors raised during == and != compares of Unicode and other objects with a new UnicodeWarning. All other comparisons continue to raise exceptions. Exceptions other than UnicodeDecodeErrors are also left untouched.
-rw-r--r--Doc/api/concrete.tex25
-rw-r--r--Doc/api/exceptions.tex9
-rw-r--r--Doc/lib/libexcs.tex5
-rw-r--r--Doc/lib/libwarnings.tex3
-rw-r--r--Include/pyerrors.h1
-rw-r--r--Include/unicodeobject.h24
-rw-r--r--Lib/test/exception_hierarchy.txt1
-rw-r--r--Misc/NEWS26
-rw-r--r--Objects/exceptions.c10
-rw-r--r--Objects/object.c17
-rw-r--r--Objects/unicodeobject.c87
11 files changed, 171 insertions, 37 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index 4c7487c..cd9d8d5 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -1560,6 +1560,31 @@ They all return \NULL{} or \code{-1} if an exception occurs.
greater than, respectively.
\end{cfuncdesc}
+\begin{cfuncdesc}{int}{PyUnicode_RichCompare}{PyObject *left,
+ PyObject *right,
+ int op}
+
+% This entry could use some polishing - my TeX is too
+% rusty these days... (MAL)
+
+ Rich compare two strings and return one of the following:
+\begin{verbatim}
+ - NULL in case an exception was raised
+ - Py_True or Py_False for successfuly comparisons
+ - Py_NotImplemented in case the type combination is unknown
+\end{verbatim}
+
+ Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
+ case the conversion of the arguments to Unicode fails with a
+ UnicodeDecodeError.
+
+ Possible values for \var{op}:
+\begin{verbatim}
+ Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
+\end{verbatim}
+
+\end{cfuncdesc}
+
\begin{cfuncdesc}{PyObject*}{PyUnicode_Format}{PyObject *format,
PyObject *args}
Return a new string object from \var{format} and \var{args}; this
diff --git a/Doc/api/exceptions.tex b/Doc/api/exceptions.tex
index cb75d50..057c1da 100644
--- a/Doc/api/exceptions.tex
+++ b/Doc/api/exceptions.tex
@@ -288,10 +288,11 @@ for each thread.
names are \samp{PyExc_} followed by the Python exception name.
These have the type \ctype{PyObject*}; they are all class objects.
Their names are \cdata{PyExc_Warning}, \cdata{PyExc_UserWarning},
- \cdata{PyExc_DeprecationWarning}, \cdata{PyExc_SyntaxWarning},
- \cdata{PyExc_RuntimeWarning}, and \cdata{PyExc_FutureWarning}.
- \cdata{PyExc_Warning} is a subclass of \cdata{PyExc_Exception}; the
- other warning categories are subclasses of \cdata{PyExc_Warning}.
+ \cdata{PyExc_UnicodeWarning}, \cdata{PyExc_DeprecationWarning},
+ \cdata{PyExc_SyntaxWarning}, \cdata{PyExc_RuntimeWarning}, and
+ \cdata{PyExc_FutureWarning}. \cdata{PyExc_Warning} is a subclass of
+ \cdata{PyExc_Exception}; the other warning categories are subclasses
+ of \cdata{PyExc_Warning}.
For information about warning control, see the documentation for the
\module{warnings} module and the \programopt{-W} option in the
diff --git a/Doc/lib/libexcs.tex b/Doc/lib/libexcs.tex
index bef8bf1..6d2a3c5 100644
--- a/Doc/lib/libexcs.tex
+++ b/Doc/lib/libexcs.tex
@@ -456,6 +456,11 @@ Base class for warnings about probable mistakes in module imports.
\versionadded{2.5}
\end{excdesc}
+\begin{excdesc}{UnicodeWarning}
+Base class for warnings related to Unicode.
+\versionadded{2.5}
+\end{excdesc}
+
The class hierarchy for built-in exceptions is:
\verbatiminput{../../Lib/test/exception_hierarchy.txt}
diff --git a/Doc/lib/libwarnings.tex b/Doc/lib/libwarnings.tex
index 08c0340..a37a9f5 100644
--- a/Doc/lib/libwarnings.tex
+++ b/Doc/lib/libwarnings.tex
@@ -76,6 +76,9 @@ features that will be deprecated in the future (ignored by default).}
\lineii{ImportWarning}{Base category for warnings triggered during the
process of importing a module (ignored by default).}
+
+\lineii{UnicodeWarning}{Base category for warnings related to Unicode.}
+
\end{tableii}
While these are technically built-in exceptions, they are documented
diff --git a/Include/pyerrors.h b/Include/pyerrors.h
index ae1d990..9532e32 100644
--- a/Include/pyerrors.h
+++ b/Include/pyerrors.h
@@ -173,6 +173,7 @@ PyAPI_DATA(PyObject *) PyExc_SyntaxWarning;
PyAPI_DATA(PyObject *) PyExc_RuntimeWarning;
PyAPI_DATA(PyObject *) PyExc_FutureWarning;
PyAPI_DATA(PyObject *) PyExc_ImportWarning;
+PyAPI_DATA(PyObject *) PyExc_UnicodeWarning;
/* Convenience functions */
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index c7e07a8..33aa185 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -189,6 +189,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
# define PyUnicode_Replace PyUnicodeUCS2_Replace
# define PyUnicode_Resize PyUnicodeUCS2_Resize
+# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
# define PyUnicode_Split PyUnicodeUCS2_Split
# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
@@ -266,6 +267,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
# define PyUnicode_Replace PyUnicodeUCS4_Replace
# define PyUnicode_Resize PyUnicodeUCS4_Resize
+# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
# define PyUnicode_Split PyUnicodeUCS4_Split
# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
@@ -1139,6 +1141,28 @@ PyAPI_FUNC(int) PyUnicode_Compare(
PyObject *right /* Right string */
);
+/* Rich compare two strings and return one of the following:
+
+ - NULL in case an exception was raised
+ - Py_True or Py_False for successfuly comparisons
+ - Py_NotImplemented in case the type combination is unknown
+
+ Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
+ case the conversion of the arguments to Unicode fails with a
+ UnicodeDecodeError.
+
+ Possible values for op:
+
+ Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
+
+*/
+
+PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
+ PyObject *left, /* Left string */
+ PyObject *right, /* Right string */
+ int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
+ );
+
/* Apply a argument tuple or dictionary to a format string and return
the resulting Unicode string. */
diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt
index 58131d7..a03f7bb 100644
--- a/Lib/test/exception_hierarchy.txt
+++ b/Lib/test/exception_hierarchy.txt
@@ -45,3 +45,4 @@ BaseException
+-- UserWarning
+-- FutureWarning
+-- ImportWarning
+ +-- UnicodeWarning
diff --git a/Misc/NEWS b/Misc/NEWS
index 5894c16..981c17b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,18 +12,18 @@ What's New in Python 2.5 release candidate 1?
Core and builtins
-----------------
-- Fix segfault when doing string formatting on subclasses of long.
-
-- Fix bug related to __len__ functions using values > 2**32 on 64-bit machines
- with new-style classes.
-
-- Fix bug related to __len__ functions returning negative values with
- classic classes.
-
-- Patch #1538606, Fix __index__() clipping. There were some problems
- discovered with the API and how integers that didn't fit into Py_ssize_t
- were handled. This patch attempts to provide enough alternatives
- to effectively use __index__.
+- Unicode objects will no longer raise an exception when being
+ compared equal or unequal to a string and causing a
+ UnicodeDecodeError exception, e.g. as result of a decoding failure.
+
+ Instead, the equal (==) and unequal (!=) comparison operators will
+ now issue a UnicodeWarning and interpret the two objects as
+ unequal. The UnicodeWarning can be filtered as desired using
+ the warning framework, e.g. silenced completely, turned into an
+ exception, logged, etc.
+
+ Note that compare operators other than equal and unequal will still
+ raise UnicodeDecodeError exceptions as they've always done.
- Bug #1536021: __hash__ may now return long int; the final hash
value is obtained by invoking hash on the long int.
@@ -99,6 +99,8 @@ Build
C API
-----
+- New API for Unicode rich comparisons: PyUnicode_RichCompare()
+
- Bug #1069160. Internal correctness changes were made to
``PyThreadState_SetAsyncExc()``. A test case was added, and
the documentation was changed to state that the return value
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index be9627c..c3ead69 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -1948,6 +1948,14 @@ SimpleExtendsException(PyExc_Warning, ImportWarning,
"Base class for warnings about probable mistakes in module imports");
+/*
+ * UnicodeWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, UnicodeWarning,
+ "Base class for warnings about Unicode related problems, mostly\n"
+ "related to conversion problems.");
+
+
/* Pre-computed MemoryError instance. Best to create this as early as
* possible and not wait until a MemoryError is actually raised!
*/
@@ -2048,6 +2056,7 @@ _PyExc_Init(void)
PRE_INIT(RuntimeWarning)
PRE_INIT(FutureWarning)
PRE_INIT(ImportWarning)
+ PRE_INIT(UnicodeWarning)
m = Py_InitModule4("exceptions", functions, exceptions_doc,
(PyObject *)NULL, PYTHON_API_VERSION);
@@ -2113,6 +2122,7 @@ _PyExc_Init(void)
POST_INIT(RuntimeWarning)
POST_INIT(FutureWarning)
POST_INIT(ImportWarning)
+ POST_INIT(UnicodeWarning)
PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL);
if (!PyExc_MemoryErrorInst)
diff --git a/Objects/object.c b/Objects/object.c
index 73c8941..b0672f3 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -731,23 +731,6 @@ default_3way_compare(PyObject *v, PyObject *w)
return (vv < ww) ? -1 : (vv > ww) ? 1 : 0;
}
-#ifdef Py_USING_UNICODE
- /* Special case for Unicode */
- if (PyUnicode_Check(v) || PyUnicode_Check(w)) {
- c = PyUnicode_Compare(v, w);
- if (!PyErr_Occurred())
- return c;
- /* TypeErrors are ignored: if Unicode coercion fails due
- to one of the arguments not having the right type, we
- continue as defined by the coercion protocol (see
- above). Luckily, decoding errors are reported as
- ValueErrors and are not masked by this technique. */
- if (!PyErr_ExceptionMatches(PyExc_TypeError))
- return -2;
- PyErr_Clear();
- }
-#endif
-
/* None is smaller than anything */
if (v == Py_None)
return -1;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ababda1..f4e3755 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5405,6 +5405,82 @@ onError:
return -1;
}
+PyObject *PyUnicode_RichCompare(PyObject *left,
+ PyObject *right,
+ int op)
+{
+ int result;
+
+ result = PyUnicode_Compare(left, right);
+ if (result == -1 && PyErr_Occurred())
+ goto onError;
+
+ /* Convert the return value to a Boolean */
+ switch (op) {
+ case Py_EQ:
+ result = (result == 0);
+ break;
+ case Py_NE:
+ result = (result != 0);
+ break;
+ case Py_LE:
+ result = (result <= 0);
+ break;
+ case Py_GE:
+ result = (result >= 0);
+ break;
+ case Py_LT:
+ result = (result == -1);
+ break;
+ case Py_GT:
+ result = (result == 1);
+ break;
+ }
+ return PyBool_FromLong(result);
+
+ onError:
+
+ /* Standard case
+
+ Type errors mean that PyUnicode_FromObject() could not convert
+ one of the arguments (usually the right hand side) to Unicode,
+ ie. we can't handle the comparison request. However, it is
+ possible that the other object knows a comparison method, which
+ is why we return Py_NotImplemented to give the other object a
+ chance.
+
+ */
+ if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+ PyErr_Clear();
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ if (op != Py_EQ && op != Py_NE)
+ return NULL;
+
+ /* Equality comparison.
+
+ This is a special case: we silence any PyExc_UnicodeDecodeError
+ and instead turn it into a PyErr_UnicodeWarning.
+
+ */
+ if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))
+ return NULL;
+ PyErr_Clear();
+ if (PyErr_Warn(PyExc_UnicodeWarning,
+ (op == Py_EQ) ?
+ "Unicode equal comparison "
+ "failed to convert both arguments to Unicode - "
+ "interpreting them as being unequal" :
+ "Unicode unequal comparison "
+ "failed to convert both arguments to Unicode - "
+ "interpreting them as being unequal"
+ ) < 0)
+ return NULL;
+ result = (op == Py_NE);
+ return PyBool_FromLong(result);
+}
+
int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
@@ -6985,11 +7061,14 @@ static PySequenceMethods unicode_as_sequence = {
PyUnicode_Contains, /* sq_contains */
};
+#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
+
static PyObject*
unicode_subscript(PyUnicodeObject* self, PyObject* item)
{
- if (PyIndex_Check(item)) {
- Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+ PyNumberMethods *nb = item->ob_type->tp_as_number;
+ if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
+ Py_ssize_t i = nb->nb_index(item);
if (i == -1 && PyErr_Occurred())
return NULL;
if (i < 0)
@@ -7859,7 +7938,7 @@ PyTypeObject PyUnicode_Type = {
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
- (cmpfunc) unicode_compare, /* tp_compare */
+ 0, /* tp_compare */
unicode_repr, /* tp_repr */
&unicode_as_number, /* tp_as_number */
&unicode_as_sequence, /* tp_as_sequence */
@@ -7875,7 +7954,7 @@ PyTypeObject PyUnicode_Type = {
unicode_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
- 0, /* tp_richcompare */
+ PyUnicode_RichCompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */