Slightly revised version of patch #1538956:

Replace UnicodeDecodeErrors raised during == and != compares of Unicode and other objects with a new UnicodeWarning. All other comparisons continue to raise exceptions. Exceptions other than UnicodeDecodeErrors are also left untouched.
author: Marc-André Lemburg <mal@egenix.com> 2006-08-14 10:55:19 (GMT)
committer: Marc-André Lemburg <mal@egenix.com> 2006-08-14 10:55:19 (GMT)
commit: 040f76b79c0ce86dc33b9c525fbcd84b2254e559 (patch)
tree: e907d6c112d52b1a92d7b98c63023ca338c9a188 /Objects
parent: e6dd31c50be76a5b57917226e16bdaa6ca20a28f (diff)
download: cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.zip
cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.gz
cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.bz2
3 files changed, 93 insertions, 21 deletions
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index be9627c..c3ead69 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -1948,6 +1948,14 @@ SimpleExtendsException(PyExc_Warning, ImportWarning,
           "Base class for warnings about probable mistakes in module imports");
 
 
+/*
+ *    UnicodeWarning extends Warning
+ */
+SimpleExtendsException(PyExc_Warning, UnicodeWarning,
+    "Base class for warnings about Unicode related problems, mostly\n"
+    "related to conversion problems.");
+
+
 /* Pre-computed MemoryError instance.  Best to create this as early as
  * possible and not wait until a MemoryError is actually raised!
  */
@@ -2048,6 +2056,7 @@ _PyExc_Init(void)
     PRE_INIT(RuntimeWarning)
     PRE_INIT(FutureWarning)
     PRE_INIT(ImportWarning)
+    PRE_INIT(UnicodeWarning)
 
     m = Py_InitModule4("exceptions", functions, exceptions_doc,
         (PyObject *)NULL, PYTHON_API_VERSION);
@@ -2113,6 +2122,7 @@ _PyExc_Init(void)
     POST_INIT(RuntimeWarning)
     POST_INIT(FutureWarning)
     POST_INIT(ImportWarning)
+    POST_INIT(UnicodeWarning)
 
     PyExc_MemoryErrorInst = BaseException_new(&_PyExc_MemoryError, NULL, NULL);
     if (!PyExc_MemoryErrorInst)
diff --git a/Objects/object.c b/Objects/object.c
index 73c8941..b0672f3 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -731,23 +731,6 @@ default_3way_compare(PyObject *v, PyObject *w)
 		return (vv < ww) ? -1 : (vv > ww) ? 1 : 0;
 	}
 
-#ifdef Py_USING_UNICODE
-	/* Special case for Unicode */
-	if (PyUnicode_Check(v) || PyUnicode_Check(w)) {
-		c = PyUnicode_Compare(v, w);
-		if (!PyErr_Occurred())
-			return c;
-		/* TypeErrors are ignored: if Unicode coercion fails due
-		   to one of the arguments not having the right type, we
-		   continue as defined by the coercion protocol (see
-		   above).  Luckily, decoding errors are reported as
-		   ValueErrors and are not masked by this technique. */
-		if (!PyErr_ExceptionMatches(PyExc_TypeError))
-			return -2;
-		PyErr_Clear();
-	}
-#endif
-
 	/* None is smaller than anything */
 	if (v == Py_None)
 		return -1;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ababda1..f4e3755 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5405,6 +5405,82 @@ onError:
     return -1;
 }
 
+PyObject *PyUnicode_RichCompare(PyObject *left,
+                                PyObject *right,
+                                int op)
+{
+    int result;
+
+    result = PyUnicode_Compare(left, right);
+    if (result == -1 && PyErr_Occurred())
+        goto onError;
+
+    /* Convert the return value to a Boolean */
+    switch (op) {
+    case Py_EQ:
+        result = (result == 0);
+        break;
+    case Py_NE:
+        result = (result != 0);
+        break;
+    case Py_LE:
+        result = (result <= 0);
+        break;
+    case Py_GE:
+        result = (result >= 0);
+        break;
+    case Py_LT:
+        result = (result == -1);
+        break;
+    case Py_GT:
+        result = (result == 1);
+        break;
+    }
+    return PyBool_FromLong(result);
+
+ onError:
+
+    /* Standard case
+
+       Type errors mean that PyUnicode_FromObject() could not convert
+       one of the arguments (usually the right hand side) to Unicode,
+       ie. we can't handle the comparison request. However, it is
+       possible that the other object knows a comparison method, which
+       is why we return Py_NotImplemented to give the other object a
+       chance.
+
+    */
+    if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+        PyErr_Clear();
+        Py_INCREF(Py_NotImplemented);
+        return Py_NotImplemented;
+    }
+    if (op != Py_EQ && op != Py_NE)
+        return NULL;
+
+    /* Equality comparison.
+
+       This is a special case: we silence any PyExc_UnicodeDecodeError
+       and instead turn it into a PyErr_UnicodeWarning.
+
+    */
+    if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))
+        return NULL;
+    PyErr_Clear();
+    if (PyErr_Warn(PyExc_UnicodeWarning, 
+                   (op == Py_EQ) ? 
+                   "Unicode equal comparison "
+                   "failed to convert both arguments to Unicode - "
+                   "interpreting them as being unequal" :
+                   "Unicode unequal comparison "
+                   "failed to convert both arguments to Unicode - "
+                   "interpreting them as being unequal"
+                   ) < 0)
+        return NULL;
+    result = (op == Py_NE);
+    return PyBool_FromLong(result);
+}
+
 int PyUnicode_Contains(PyObject *container,
 		       PyObject *element)
 {
@@ -6985,11 +7061,14 @@ static PySequenceMethods unicode_as_sequence = {
     PyUnicode_Contains, 		/* sq_contains */
 };
 
+#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
+
 static PyObject*
 unicode_subscript(PyUnicodeObject* self, PyObject* item)
 {
-    if (PyIndex_Check(item)) {
-        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+    PyNumberMethods *nb = item->ob_type->tp_as_number;
+    if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
+        Py_ssize_t i = nb->nb_index(item);
         if (i == -1 && PyErr_Occurred())
             return NULL;
         if (i < 0)
@@ -7859,7 +7938,7 @@ PyTypeObject PyUnicode_Type = {
     0, 					/* tp_print */
     0,				 	/* tp_getattr */
     0, 					/* tp_setattr */
-    (cmpfunc) unicode_compare, 		/* tp_compare */
+    0, 					/* tp_compare */
     unicode_repr, 			/* tp_repr */
     &unicode_as_number, 		/* tp_as_number */
     &unicode_as_sequence, 		/* tp_as_sequence */
@@ -7875,7 +7954,7 @@ PyTypeObject PyUnicode_Type = {
     unicode_doc,			/* tp_doc */
     0,					/* tp_traverse */
     0,					/* tp_clear */
-    0,					/* tp_richcompare */
+    PyUnicode_RichCompare,		/* tp_richcompare */
     0,					/* tp_weaklistoffset */
     0,					/* tp_iter */
     0,					/* tp_iternext */
author	Marc-André Lemburg <mal@egenix.com>	2006-08-14 10:55:19 (GMT)
committer	Marc-André Lemburg <mal@egenix.com>	2006-08-14 10:55:19 (GMT)
commit	040f76b79c0ce86dc33b9c525fbcd84b2254e559 (patch)
tree	e907d6c112d52b1a92d7b98c63023ca338c9a188 /Objects
parent	e6dd31c50be76a5b57917226e16bdaa6ca20a28f (diff)
download	cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.zip cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.gz cpython-040f76b79c0ce86dc33b9c525fbcd84b2254e559.tar.bz2