From a3c01ce696cdbc0d8236a002cc76103db5662a1b Mon Sep 17 00:00:00 2001
From: Tim Peters <tim.peters@gmail.com>
Date: Tue, 4 Dec 2001 23:05:10 +0000
Subject: SF bug #488480: integer multiply to return -max_int-1. int_mul(): 
 new and vastly simpler overflow checking.  Whether it's faster or slower will
 likely vary across platforms, favoring boxes with fast floating point.  OTOH,
 we no longer have to worry about people shipping broken LONG_BIT definitions
 <0.9 wink>.

---
 Lib/test/test_types.py |  26 +++++++
 Objects/intobject.c    | 190 ++++++++++++++++---------------------------------
 2 files changed, 89 insertions(+), 127 deletions(-)

diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py
index c0c1c88..3af3113 100644
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -71,6 +71,32 @@ if not -24 < -12: raise TestFailed, 'int op'
 xsize, ysize, zsize = 238, 356, 4
 if not (xsize*ysize*zsize == zsize*xsize*ysize == 338912):
     raise TestFailed, 'int mul commutativity'
+# And another.
+m = -sys.maxint - 1
+for divisor in 1, 2, 4, 8, 16, 32:
+    j = m / divisor
+    prod = divisor * j
+    if prod != m:
+        raise TestFailed, "%r * %r == %r != %r" % (divisor, j, prod, m)
+    if type(prod) is not int:
+        raise TestFailed, ("expected type(prod) to be int, not %r" %
+                           type(prod))
+# Check for expected * overflow to long.
+for divisor in 1, 2, 4, 8, 16, 32:
+    j = m / divisor - 1
+    prod = divisor * j
+    if type(prod) is not long:
+        raise TestFailed, ("expected type(%r) to be long, not %r" %
+                           (prod, type(prod)))
+# Check for expected * overflow to long.
+m = sys.maxint
+for divisor in 1, 2, 4, 8, 16, 32:
+    j = m / divisor + 1
+    prod = divisor * j
+    if type(prod) is not long:
+        raise TestFailed, ("expected type(%r) to be long, not %r" %
+                           (prod, type(prod)))
+
 print '6.4.2 Long integers'
 if 12L + 24L != 36L: raise TestFailed, 'long op'
 if 12L + (-24L) != -12L: raise TestFailed, 'long op'
diff --git a/Objects/intobject.c b/Objects/intobject.c
index 46c3a09..dfb03a9 100644
--- a/Objects/intobject.c
+++ b/Objects/intobject.c
@@ -148,16 +148,16 @@ PyInt_AsLong(register PyObject *op)
 	PyNumberMethods *nb;
 	PyIntObject *io;
 	long val;
-	
+
 	if (op && PyInt_Check(op))
 		return PyInt_AS_LONG((PyIntObject*) op);
-	
+
 	if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL ||
 	    nb->nb_int == NULL) {
 		PyErr_SetString(PyExc_TypeError, "an integer is required");
 		return -1;
 	}
-	
+
 	io = (PyIntObject*) (*nb->nb_int) (op);
 	if (io == NULL)
 		return -1;
@@ -166,10 +166,10 @@ PyInt_AsLong(register PyObject *op)
 				"nb_int should return int object");
 		return -1;
 	}
-	
+
 	val = PyInt_AS_LONG(io);
 	Py_DECREF(io);
-	
+
 	return val;
 }
 
@@ -219,7 +219,7 @@ PyObject *
 PyInt_FromUnicode(Py_UNICODE *s, int length, int base)
 {
 	char buffer[256];
-	
+
 	if (length >= sizeof(buffer)) {
 		PyErr_SetString(PyExc_ValueError,
 				"int() literal too large to convert");
@@ -312,39 +312,38 @@ int_sub(PyIntObject *v, PyIntObject *w)
 }
 
 /*
-Integer overflow checking used to be done using a double, but on 64
-bit machines (where both long and double are 64 bit) this fails
-because the double doesn't have enough precision.  John Tromp suggests
-the following algorithm:
-
-Suppose again we normalize a and b to be nonnegative.
-Let ah and al (bh and bl) be the high and low 32 bits of a (b, resp.).
-Now we test ah and bh against zero and get essentially 3 possible outcomes.
-
-1) both ah and bh > 0 : then report overflow
-
-2) both ah and bh = 0 : then compute a*b and report overflow if it comes out
-                        negative
-
-3) ah > 0 and bh = 0  : compute ah*bl and report overflow if it's >= 2^31
-                        compute al*bl and report overflow if it's negative
-                        add (ah*bl)<<32 to al*bl and report overflow if
-                        it's negative
-
-In case of no overflow the result is then negated if necessary.
-
-The majority of cases will be 2), in which case this method is the same as
-what I suggested before. If multiplication is expensive enough, then the
-other method is faster on case 3), but also more work to program, so I
-guess the above is the preferred solution.
-
+Integer overflow checking for * is painful:  Python tried a couple ways, but
+they didn't work on all platforms, or failed in endcases (a product of
+-sys.maxint-1 has been a particular pain).
+
+Here's another way:
+
+The native long product x*y is either exactly right or *way* off, being
+just the last n bits of the true product, where n is the number of bits
+in a long (the delivered product is the true product plus i*2**n for
+some integer i).
+
+The native double product (double)x * (double)y is subject to three
+rounding errors:  on a sizeof(long)==8 box, each cast to double can lose
+info, and even on a sizeof(long)==4 box, the multiplication can lose info.
+But, unlike the native long product, it's not in *range* trouble:  even
+if sizeof(long)==32 (256-bit longs), the product easily fits in the
+dynamic range of a double.  So the leading 50 (or so) bits of the double
+product are correct.
+
+We check these two ways against each other, and declare victory if they're
+approximately the same.  Else, because the native long product is the only
+one that can lose catastrophic amounts of information, it's the native long
+product that must have overflowed.
 */
 
 static PyObject *
 int_mul(PyObject *v, PyObject *w)
 {
-	long a, b, ah, bh, x, y;
-	int s = 1;
+	long a, b;
+	long longprod;			/* a*b in native long arithmetic */
+	double doubled_longprod;	/* (double)longprod */
+	double doubleprod;		/* (double)a * (double)b */
 
 	if (!PyInt_Check(v) &&
 	    v->ob_type->tp_as_sequence &&
@@ -363,97 +362,34 @@ int_mul(PyObject *v, PyObject *w)
 
 	CONVERT_TO_LONG(v, a);
 	CONVERT_TO_LONG(w, b);
-	ah = a >> (LONG_BIT/2);
-	bh = b >> (LONG_BIT/2);
-
-	/* Quick test for common case: two small positive ints */
-
-	if (ah == 0 && bh == 0) {
-		x = a*b;
-		if (x < 0)
-			goto bad;
-		return PyInt_FromLong(x);
-	}
-
-	/* Arrange that a >= b >= 0 */
-
-	if (a < 0) {
-		a = -a;
-		if (a < 0) {
-			/* Largest negative */
-			if (b == 0 || b == 1) {
-				x = a*b;
-				goto ok;
-			}
-			else
-				goto bad;
-		}
-		s = -s;
-		ah = a >> (LONG_BIT/2);
-	}
-	if (b < 0) {
-		b = -b;
-		if (b < 0) {
-			/* Largest negative */
-			if (a == 0 || (a == 1 && s == 1)) {
-				x = a*b;
-				goto ok;
-			}
-			else
-				goto bad;
-		}
-		s = -s;
-		bh = b >> (LONG_BIT/2);
-	}
-
-	/* 1) both ah and bh > 0 : then report overflow */
-
-	if (ah != 0 && bh != 0)
-		goto bad;
-
-	/* 2) both ah and bh = 0 : then compute a*b and report
-				   overflow if it comes out negative */
-
-	if (ah == 0 && bh == 0) {
-		x = a*b;
-		if (x < 0)
-			goto bad;
-		return PyInt_FromLong(x*s);
-	}
-
-	if (a < b) {
-		/* Swap */
-		x = a;
-		a = b;
-		b = x;
-		ah = bh;
-		/* bh not used beyond this point */
+	longprod = a * b;
+	doubleprod = (double)a * (double)b;
+	doubled_longprod = (double)longprod;
+
+	/* Fast path for normal case:  small multiplicands, and no info
+	   is lost in either method. */
+	if (doubled_longprod == doubleprod)
+		return PyInt_FromLong(longprod);
+
+	/* Somebody somewhere lost info.  Close enough, or way off?  Note
+	   that a != 0 and b != 0 (else doubled_longprod == doubleprod == 0).
+	   The difference either is or isn't significant compared to the
+	   true value (of which doubleprod is a good approximation).
+	*/
+	{
+		const double diff = doubled_longprod - doubleprod;
+		const double absdiff = diff >= 0.0 ? diff : -diff;
+		const double absprod = doubleprod >= 0.0 ? doubleprod :
+							  -doubleprod;
+		/* absdiff/absprod <= 1/32 iff
+		   32 * absdiff <= absprod -- 5 good bits is "close enough" */
+		if (32.0 * absdiff <= absprod)
+			return PyInt_FromLong(longprod);
+		else if (err_ovf("integer multiplication"))
+			return NULL;
+		else
+			return PyLong_Type.tp_as_number->nb_multiply(v, w);
 	}
-
-	/* 3) ah > 0 and bh = 0  : compute ah*bl and report overflow if
-				   it's >= 2^31
-                        compute al*bl and report overflow if it's negative
-                        add (ah*bl)<<32 to al*bl and report overflow if
-                        it's negative
-			(NB b == bl in this case, and we make a = al) */
-
-	y = ah*b;
-	if (y >= (1L << (LONG_BIT/2 - 1)))
-		goto bad;
-	a &= (1L << (LONG_BIT/2)) - 1;
-	x = a*b;
-	if (x < 0)
-		goto bad;
-	x += y << (LONG_BIT/2);
-	if (x < 0)
-		goto bad;
- ok:
-	return PyInt_FromLong(x * s);
-
- bad:
-	if (err_ovf("integer multiplication"))
-		return NULL;
-	return PyLong_Type.tp_as_number->nb_multiply(v, w);
 }
 
 /* Return type of i_divmod */
@@ -468,7 +404,7 @@ i_divmod(register long x, register long y,
          long *p_xdivy, long *p_xmody)
 {
 	long xdivy, xmody;
-	
+
 	if (y == 0) {
 		PyErr_SetString(PyExc_ZeroDivisionError,
 				"integer division or modulo by zero");
@@ -623,7 +559,7 @@ int_pow(PyIntObject *v, PyIntObject *w, PyIntObject *z)
 	ix = 1;
 	while (iw > 0) {
 	 	prev = ix;	/* Save value for overflow check */
-	 	if (iw & 1) {	
+	 	if (iw & 1) {
 		 	ix = ix*temp;
 			if (temp == 0)
 				break; /* Avoid ix / 0 */
@@ -666,7 +602,7 @@ int_pow(PyIntObject *v, PyIntObject *w, PyIntObject *z)
 		}
 	}
 	return PyInt_FromLong(ix);
-}				
+}
 
 static PyObject *
 int_neg(PyIntObject *v)
-- 
cgit v0.12