SF bug #513866: Float/long comparison anomaly.

When an integer is compared to a float now, the int isn't coerced to float. This avoids spurious overflow exceptions and insane results. This should compute correct results, without raising spurious exceptions, in all cases now -- although I expect that what happens when an int/long is compared to a NaN is still a platform accident. Note that we had potential problems here even with "short" ints, on boxes where sizeof(long)==8. There's #ifdef'ed code here to handle that, but I can't test it as intended. I tested it by changing the #ifdef to trigger on my 32-bit box instead. I suppose this is a bugfix candidate, but I won't backport it. It's long-winded (for speed) and messy (because the problem is messy). Note that this also depends on a previous 2.4 patch that introduced _Py_SwappedOp[] as an extern.
author: Tim Peters <tim.peters@gmail.com> 2004-09-23 08:06:40 (GMT)
committer: Tim Peters <tim.peters@gmail.com> 2004-09-23 08:06:40 (GMT)
commit: 307fa78107c39ffda1eb4ad18201d25650354c4e (patch)
tree: 6b2daf6dc3169cfc2054ebc4162647b5c647d875
parent: 4533f1fb7fbf1fa8a9cb264ff6f1f0aba043e80d (diff)
download: cpython-307fa78107c39ffda1eb4ad18201d25650354c4e.zip
cpython-307fa78107c39ffda1eb4ad18201d25650354c4e.tar.gz
cpython-307fa78107c39ffda1eb4ad18201d25650354c4e.tar.bz2
3 files changed, 318 insertions, 11 deletions
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py
index 1a04ce9..74ae7c6 100644
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -387,8 +387,7 @@ def test_float_overflow():
                  "1. ** huge", "huge ** 1.", "1. ** mhuge", "mhuge ** 1.",
                  "math.sin(huge)", "math.sin(mhuge)",
                  "math.sqrt(huge)", "math.sqrt(mhuge)", # should do better
-                 "math.floor(huge)", "math.floor(mhuge)",
-                 "float(shuge) == int(shuge)"]:
+                 "math.floor(huge)", "math.floor(mhuge)"]:
 
         try:
             eval(test, namespace)
@@ -397,6 +396,11 @@ def test_float_overflow():
         else:
             raise TestFailed("expected OverflowError from %s" % test)
 
+        # XXX Perhaps float(shuge) can raise OverflowError on some box?
+        # The comparison should not.
+        if float(shuge) == int(shuge):
+            raise TestFailed("float(shuge) should not equal int(shuge)")
+
 # ---------------------------------------------- test huge log and log10
 
 def test_logs():
@@ -431,6 +435,101 @@ def test_logs():
         except ValueError:
             pass
 
+# ----------------------------------------------- test mixed comparisons
+
+def test_mixed_compares():
+    import math
+    import sys
+
+    if verbose:
+        print "mixed comparisons"
+
+    # We're mostly concerned with that mixing floats and longs does the
+    # right stuff, even when longs are too large to fit in a float.
+    # The safest way to check the results is to use an entirely different
+    # method, which we do here via a skeletal rational class (which
+    # represents all Python ints, longs and floats exactly).
+    class Rat:
+        def __init__(self, value):
+            if isinstance(value, (int, long)):
+                self.n = value
+                self.d = 1
+
+            elif isinstance(value, float):
+                # Convert to exact rational equivalent.
+                f, e = math.frexp(abs(value))
+                assert f == 0 or 0.5 <= f < 1.0
+                # |value| = f * 2**e exactly
+
+                # Suck up CHUNK bits at a time; 28 is enough so that we suck
+                # up all bits in 2 iterations for all known binary double-
+                # precision formats, and small enough to fit in an int.
+                CHUNK = 28
+                top = 0
+                # invariant: |value| = (top + f) * 2**e exactly
+                while f:
+                    f = math.ldexp(f, CHUNK)
+                    digit = int(f)
+                    assert digit >> CHUNK == 0
+                    top = (top << CHUNK) | digit
+                    f -= digit
+                    assert 0.0 <= f < 1.0
+                    e -= CHUNK
+
+                # Now |value| = top * 2**e exactly.
+                if e >= 0:
+                    n = top << e
+                    d = 1
+                else:
+                    n = top
+                    d = 1 << -e
+                if value < 0:
+                    n = -n
+                self.n = n
+                self.d = d
+                assert float(n) / float(d) == value
+
+            else:
+                raise TypeError("can't deal with %r" % val)
+
+        def __cmp__(self, other):
+            if not isinstance(other, Rat):
+                other = Rat(other)
+            return cmp(self.n * other.d, self.d * other.n)
+
+    cases = [0, 0.001, 0.99, 1.0, 1.5, 1e20, 1e200]
+    # 2**48 is an important boundary in the internals.  2**53 is an
+    # important boundary for IEEE double precision.
+    for t in 2.0**48, 2.0**50, 2.0**53:
+        cases.extend([t - 1.0, t - 0.3, t, t + 0.3, t + 1.0,
+                      long(t-1), long(t), long(t+1)])
+    cases.extend([0, 1, 2, sys.maxint, float(sys.maxint)])
+    # 1L<<20000 should exceed all double formats.  long(1e200) is to
+    # check that we get equality with 1e200 above.
+    t = long(1e200)
+    cases.extend([0L, 1L, 2L, 1L << 20000, t-1, t, t+1])
+    cases.extend([-x for x in cases])
+    for x in cases:
+        Rx = Rat(x)
+        for y in cases:
+            Ry = Rat(y)
+            Rcmp = cmp(Rx, Ry)
+            xycmp = cmp(x, y)
+            if Rcmp != xycmp:
+                raise TestFailed('%r %r %d %d' % (x, y, Rcmp, xycmp))
+            if (x == y) != (Rcmp == 0):
+                raise TestFailed('%r == %r %d' % (x, y, Rcmp))
+            if (x != y) != (Rcmp != 0):
+                raise TestFailed('%r != %r %d' % (x, y, Rcmp))
+            if (x < y) != (Rcmp < 0):
+                raise TestFailed('%r < %r %d' % (x, y, Rcmp))
+            if (x <= y) != (Rcmp <= 0):
+                raise TestFailed('%r <= %r %d' % (x, y, Rcmp))
+            if (x > y) != (Rcmp > 0):
+                raise TestFailed('%r > %r %d' % (x, y, Rcmp))
+            if (x >= y) != (Rcmp >= 0):
+                raise TestFailed('%r >= %r %d' % (x, y, Rcmp))
+
 # ---------------------------------------------------------------- do it
 
 test_division()
@@ -441,3 +540,4 @@ test_misc()
 test_auto_overflow()
 test_float_overflow()
 test_logs()
+test_mixed_compares()
diff --git a/Misc/NEWS b/Misc/NEWS
index 3c2734b..89a6c57 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -15,7 +15,14 @@ Core and builtins
 - The bytecode optimizer now folds tuples of constants into a single
   constant.
 
-- PyLong_AsUnsignedLong[Mask] now support int objects as well.
+- SF bug #513866:  Float/long comparison anomaly.  Prior to 2.4b1, when
+  an integer was compared to a float, the integer was coerced to a float.
+  That could yield spurious overflow errors (if the integer was very
+  large), and to anomalies such as
+  ``long(1e200)+1 == 1e200 == long(1e200)-1``.  Coercion to float is no
+  longer performed, and cases like ``long(1e200)-1 < 1e200``,
+  ``long(1e200)+1 > 1e200`` and ``(1 << 20000) > 1e200`` are computed
+  correctly now.
 
 Extension modules
 -----------------
@@ -72,6 +79,8 @@ Build
 C API
 -----
 
+- PyLong_AsUnsignedLong[Mask] now support int objects as well.
+
 - SF patch #998993: ``PyUnicode_DecodeUTF8Stateful`` and
   ``PyUnicode_DecodeUTF16Stateful`` have been added, which implement stateful
   decoding.
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 774d2ea..46c05b2 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -354,38 +354,236 @@ float_str(PyFloatObject *v)
 	return PyString_FromString(buf);
 }
 
+/* Comparison is pretty much a nightmare.  When comparing float to float,
+ * we do it as straightforwardly (and long-windedly) as conceivable, so
+ * that, e.g., Python x == y delivers the same result as the platform
+ * C x == y when x and/or y is a NaN.
+ * When mixing float with an integer type, there's no good *uniform* approach.
+ * Converting the double to an integer obviously doesn't work, since we
+ * may lose info from fractional bits.  Converting the integer to a double
+ * also has two failure modes:  (1) a long int may trigger overflow (too
+ * large to fit in the dynamic range of a C double); (2) even a C long may have
+ * more bits than fit in a C double (e.g., on a a 64-bit box long may have
+ * 63 bits of precision, but a C double probably has only 53), and then
+ * we can falsely claim equality when low-order integer bits are lost by
+ * coercion to double.  So this part is painful too.
+ */
+
 static PyObject*
 float_richcompare(PyObject *v, PyObject *w, int op)
 {
 	double i, j;
 	int r = 0;
 
-	CONVERT_TO_DOUBLE(v, i);
-	CONVERT_TO_DOUBLE(w, j);
+	assert(PyFloat_Check(v));
+	i = PyFloat_AS_DOUBLE(v);
+
+	/* Switch on the type of w.  Set i and j to doubles to be compared,
+	 * and op to the richcomp to use.
+	 */
+	if (PyFloat_Check(w))
+		j = PyFloat_AS_DOUBLE(w);
 
+	else if (Py_IS_INFINITY(i)) {
+		/* XXX If we had a reliable way to check whether i is a
+		 * XXX NaN, it would belong in this branch too.
+		 */
+		if (PyInt_Check(w) || PyLong_Check(w))
+			/* The magnitude of i exceeds any finite integer,
+			 * so it doesn't matter which int we compare i with.
+			 */
+			j = 0.0;
+		else
+			goto Unimplemented;
+	}
+
+	else if (PyInt_Check(w)) {
+		long jj = PyInt_AS_LONG(w);
+		/* In the worst realistic case I can imagine, C double is a
+		 * Cray single with 48 bits of precision, and long has 64
+		 * bits.
+		 */
+#if SIZEOF_LONG > 4
+		unsigned long abs = (unsigned long)(jj < 0 ? -jj : jj);
+		if (abs >> 48) {
+			/* Needs more than 48 bits.  Make it take the
+			 * PyLong path.
+			 */
+			PyObject *result;
+			PyObject *ww = PyLong_FromLong(jj);
+
+			if (ww == NULL)
+				return NULL;
+			result = float_richcompare(v, ww, op);
+			Py_DECREF(ww);
+			return result;
+		}
+#endif
+		j = (double)jj;
+		assert((long)j == jj);
+	}
+
+	else if (PyLong_Check(w)) {
+		int vsign = i == 0.0 ? 0 : i < 0.0 ? -1 : 1;
+		int wsign = _PyLong_Sign(w);
+		size_t nbits;
+		double mant;
+		int exponent;
+
+		if (vsign != wsign) {
+			/* Magnitudes are irrelevant -- the signs alone
+			 * determine the outcome.
+			 */
+			i = (double)vsign;
+			j = (double)wsign;
+			goto Compare;
+		}
+		/* The signs are the same. */
+		/* Convert w to a double if it fits.  In particular, 0 fits. */
+		nbits = _PyLong_NumBits(w);
+		if (nbits == (size_t)-1 && PyErr_Occurred()) {
+			/* This long is so large that size_t isn't big enough
+			 * to hold the # of Python digits.  Replace with
+			 * little doubles that give the same outcome --
+			 * w is so large that its magnitude must exceed
+			 * the magnitude of any finite float.
+			 */
+			PyErr_Clear();
+			i = (double)vsign;
+			assert(wsign != 0);
+			j = wsign * 2.0;
+			goto Compare;
+		}
+		if (nbits <= 48) {
+			j = PyLong_AsDouble(w);
+			/* It's impossible that <= 48 bits overflowed. */
+			assert(j != -1.0 || ! PyErr_Occurred());
+			goto Compare;
+		}
+		assert(wsign != 0); /* else nbits was 0 */
+		assert(vsign != 0); /* if vsign were 0, then since wsign is
+		                     * not 0, we would have taken the
+		                     * vsign != wsign branch at the start */
+		/* We want to work with non-negative numbers. */
+		if (vsign < 0) {
+			/* "Multiply both sides" by -1; this also swaps the
+			 * comparator.
+			 */
+			i = -i;
+			op = _Py_SwappedOp[op];
+		}
+		assert(i > 0.0);
+		mant = frexp(i, &exponent);
+		/* exponent is the # of bits in v before the radix point;
+		 * we know that nbits (the # of bits in w) > 48 at this point
+		 */
+		if (exponent < 0 || (size_t)exponent < nbits) {
+			i = 1.0;
+			j = 2.0;
+			goto Compare;
+		}
+		if ((size_t)exponent > nbits) {
+			i = 2.0;
+			j = 1.0;
+			goto Compare;
+		}
+		/* v and w have the same number of bits before the radix
+		 * point.  Construct two longs that have the same comparison
+		 * outcome.
+		 */
+		{
+			double fracpart;
+			double intpart;
+			PyObject *result = NULL;
+			PyObject *one = NULL;
+			PyObject *vv = NULL;
+			PyObject *ww = w;
+
+			if (wsign < 0) {
+				ww = PyNumber_Negative(w);
+				if (ww == NULL)
+					goto Error;
+			}
+			else
+				Py_INCREF(ww);
+
+			fracpart = modf(i, &intpart);
+			vv = PyLong_FromDouble(intpart);
+			if (vv == NULL)
+				goto Error;
+
+			if (fracpart != 0.0) {
+				/* Shift left, and or a 1 bit into vv
+				 * to represent the lost fraction.
+				 */
+				PyObject *temp;
+
+				one = PyInt_FromLong(1);
+				if (one == NULL)
+					goto Error;
+
+				temp = PyNumber_Lshift(ww, one);
+				if (temp == NULL)
+					goto Error;
+				Py_DECREF(ww);
+				ww = temp;
+
+				temp = PyNumber_Lshift(vv, one);
+				if (temp == NULL)
+					goto Error;
+				Py_DECREF(vv);
+				vv = temp;
+
+				temp = PyNumber_Or(vv, one);
+				if (temp == NULL)
+					goto Error;
+				Py_DECREF(vv);
+				vv = temp;
+			}
+
+			r = PyObject_RichCompareBool(vv, ww, op);
+			if (r < 0)
+				goto Error;
+			result = PyBool_FromLong(r);
+ 		 Error:
+ 		 	Py_XDECREF(vv);
+ 		 	Py_XDECREF(ww);
+ 		 	Py_XDECREF(one);
+ 		 	return result;
+		}
+	} /* else if (PyLong_Check(w)) */
+
+	else	/* w isn't float, int, or long */
+		goto Unimplemented;
+
+ Compare:
 	PyFPE_START_PROTECT("richcompare", return NULL)
 	switch (op) {
 	case Py_EQ:
-		r = i==j;
+		r = i == j;
 		break;
 	case Py_NE:
-		r = i!=j;
+		r = i != j;
 		break;
 	case Py_LE:
-		r = i<=j;
+		r = i <= j;
 		break;
 	case Py_GE:
-		r = i>=j;
+		r = i >= j;
 		break;
 	case Py_LT:
-		r = i<j;
+		r = i < j;
 		break;
 	case Py_GT:
-		r = i>j;
+		r = i > j;
 		break;
 	}
 	PyFPE_END_PROTECT(r)
 	return PyBool_FromLong(r);
+
+ Unimplemented:
+	Py_INCREF(Py_NotImplemented);
+	return Py_NotImplemented;
 }
 
 static long
author	Tim Peters <tim.peters@gmail.com>	2004-09-23 08:06:40 (GMT)
committer	Tim Peters <tim.peters@gmail.com>	2004-09-23 08:06:40 (GMT)
commit	307fa78107c39ffda1eb4ad18201d25650354c4e (patch)
tree	6b2daf6dc3169cfc2054ebc4162647b5c647d875
parent	4533f1fb7fbf1fa8a9cb264ff6f1f0aba043e80d (diff)
download	cpython-307fa78107c39ffda1eb4ad18201d25650354c4e.zip cpython-307fa78107c39ffda1eb4ad18201d25650354c4e.tar.gz cpython-307fa78107c39ffda1eb4ad18201d25650354c4e.tar.bz2