From 8d87dc0c2995a4e1eb5bb889372bc7efe218c3a4 Mon Sep 17 00:00:00 2001
From: Mark Dickinson <dickinsm@gmail.com>
Date: Sun, 25 Oct 2009 20:39:06 +0000
Subject: Issue #1087418: Small performance boost for bitwise operations on
 longs. Initial patch by Gregory Smith;  some tweaks added.

---
 Misc/NEWS            |   2 +
 Objects/longobject.c | 166 +++++++++++++++++++++++++++++++--------------------
 2 files changed, 102 insertions(+), 66 deletions(-)

diff --git a/Misc/NEWS b/Misc/NEWS
index e26cd72..2810cab 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 2.7 alpha 1
 Core and Builtins
 -----------------
 
+- Issue #1087418: Boost performance of bitwise operations for longs.
+
 - Issue #1722344: threading._shutdown() is now called in Py_Finalize(), which
   fixes the problem of some exceptions being thrown at shutdown when the
   interpreter is killed. Patch by Adam Olsen.
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 376973e..8bdb283 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -3411,6 +3411,22 @@ lshift_error:
 	return (PyObject *) z;
 }
 
+/* Compute two's complement of digit vector a[0:m], writing result to
+   z[0:m].  The digit vector a need not be normalized, but should not
+   be entirely zero.  a and z may point to the same digit vector. */
+
+static void
+v_complement(digit *z, digit *a, Py_ssize_t m)
+{
+	Py_ssize_t i;
+	digit carry = 1;
+	for (i = 0; i < m; ++i) {
+		carry += a[i] ^ PyLong_MASK;
+		z[i] = carry & PyLong_MASK;
+		carry >>= PyLong_SHIFT;
+	}
+	assert(carry == 0);
+}
 
 /* Bitwise and/xor/or operations */
 
@@ -3419,104 +3435,122 @@ long_bitwise(PyLongObject *a,
 	     int op,  /* '&', '|', '^' */
 	     PyLongObject *b)
 {
-	digit maska, maskb; /* 0 or PyLong_MASK */
-	int negz;
+	int nega, negb, negz;
 	Py_ssize_t size_a, size_b, size_z, i;
 	PyLongObject *z;
-	digit diga, digb;
-	PyObject *v;
 
-	if (Py_SIZE(a) < 0) {
-		a = (PyLongObject *) long_invert(a);
-		if (a == NULL)
+	/* Bitwise operations for negative numbers operate as though
+	   on a two's complement representation.  So convert arguments
+	   from sign-magnitude to two's complement, and convert the
+	   result back to sign-magnitude at the end. */
+
+	/* If a is negative, replace it by its two's complement. */
+	size_a = ABS(Py_SIZE(a));
+	nega = Py_SIZE(a) < 0;
+	if (nega) {
+		z = _PyLong_New(size_a);
+		if (z == NULL)
 			return NULL;
-		maska = PyLong_MASK;
+		v_complement(z->ob_digit, a->ob_digit, size_a);
+		a = z;
 	}
-	else {
+	else
+		/* Keep reference count consistent. */
 		Py_INCREF(a);
-		maska = 0;
-	}
-	if (Py_SIZE(b) < 0) {
-		b = (PyLongObject *) long_invert(b);
-		if (b == NULL) {
+
+	/* Same for b. */
+	size_b = ABS(Py_SIZE(b));
+	negb = Py_SIZE(b) < 0;
+	if (negb) {
+		z = _PyLong_New(size_b);
+		if (z == NULL) {
 			Py_DECREF(a);
 			return NULL;
 		}
-		maskb = PyLong_MASK;
+		v_complement(z->ob_digit, b->ob_digit, size_b);
+		b = z;
 	}
-	else {
+	else
 		Py_INCREF(b);
-		maskb = 0;
+
+	/* Swap a and b if necessary to ensure size_a >= size_b. */
+	if (size_a < size_b) {
+		z = a; a = b; b = z;
+		size_z = size_a; size_a = size_b; size_b = size_z;
+		negz = nega; nega = negb; negb = negz;
 	}
 
-	negz = 0;
+	/* JRH: The original logic here was to allocate the result value (z)
+	   as the longer of the two operands.  However, there are some cases
+	   where the result is guaranteed to be shorter than that: AND of two
+	   positives, OR of two negatives: use the shorter number.  AND with
+	   mixed signs: use the positive number.  OR with mixed signs: use the
+	   negative number.
+	*/
 	switch (op) {
 	case '^':
-		if (maska != maskb) {
-			maska ^= PyLong_MASK;
-			negz = -1;
-		}
+		negz = nega ^ negb;
+		size_z = size_a;
 		break;
 	case '&':
-		if (maska && maskb) {
-			op = '|';
-			maska ^= PyLong_MASK;
-			maskb ^= PyLong_MASK;
-			negz = -1;
-		}
+		negz = nega & negb;
+		size_z = negb ? size_a : size_b;
 		break;
 	case '|':
-		if (maska || maskb) {
-			op = '&';
-			maska ^= PyLong_MASK;
-			maskb ^= PyLong_MASK;
-			negz = -1;
-		}
+		negz = nega | negb;
+		size_z = negb ? size_b : size_a;
 		break;
+	default:
+		PyErr_BadArgument();
+		return NULL;
 	}
 
-	/* JRH: The original logic here was to allocate the result value (z)
-	   as the longer of the two operands.  However, there are some cases
-	   where the result is guaranteed to be shorter than that: AND of two
-	   positives, OR of two negatives: use the shorter number.  AND with
-	   mixed signs: use the positive number.  OR with mixed signs: use the
-	   negative number.  After the transformations above, op will be '&'
-	   iff one of these cases applies, and mask will be non-0 for operands
-	   whose length should be ignored.
-	*/
-
-	size_a = Py_SIZE(a);
-	size_b = Py_SIZE(b);
-	size_z = op == '&'
-		? (maska
-		   ? size_b
-		   : (maskb ? size_a : MIN(size_a, size_b)))
-		: MAX(size_a, size_b);
-	z = _PyLong_New(size_z);
+	/* We allow an extra digit if z is negative, to make sure that
+	   the final two's complement of z doesn't overflow. */
+	z = _PyLong_New(size_z + negz);
 	if (z == NULL) {
 		Py_DECREF(a);
 		Py_DECREF(b);
 		return NULL;
 	}
 
-	for (i = 0; i < size_z; ++i) {
-		diga = (i < size_a ? a->ob_digit[i] : 0) ^ maska;
-		digb = (i < size_b ? b->ob_digit[i] : 0) ^ maskb;
-		switch (op) {
-		case '&': z->ob_digit[i] = diga & digb; break;
-		case '|': z->ob_digit[i] = diga | digb; break;
-		case '^': z->ob_digit[i] = diga ^ digb; break;
-		}
+	/* Compute digits for overlap of a and b. */
+	switch(op) {
+	case '&':
+		for (i = 0; i < size_b; ++i)
+			z->ob_digit[i] = a->ob_digit[i] & b->ob_digit[i];
+		break;
+	case '|':
+		for (i = 0; i < size_b; ++i)
+			z->ob_digit[i] = a->ob_digit[i] | b->ob_digit[i];
+		break;
+	case '^':
+		for (i = 0; i < size_b; ++i)
+			z->ob_digit[i] = a->ob_digit[i] ^ b->ob_digit[i];
+		break;
+	default:
+		PyErr_BadArgument();
+		return NULL;
+	}
+
+	/* Copy any remaining digits of a, inverting if necessary. */
+	if (op == '^' && negb)
+		for (; i < size_z; ++i)
+			z->ob_digit[i] = a->ob_digit[i] ^ PyLong_MASK;
+	else if (i < size_z)
+		memcpy(&z->ob_digit[i], &a->ob_digit[i],
+		       (size_z-i)*sizeof(digit));
+
+	/* Complement result if negative. */
+	if (negz) {
+		Py_SIZE(z) = -(Py_SIZE(z));
+		z->ob_digit[size_z] = PyLong_MASK;
+		v_complement(z->ob_digit, z->ob_digit, size_z+1);
 	}
 
 	Py_DECREF(a);
 	Py_DECREF(b);
-	z = long_normalize(z);
-	if (negz == 0)
-		return (PyObject *) z;
-	v = long_invert(z);
-	Py_DECREF(z);
-	return v;
+	return (PyObject *)long_normalize(z);
 }
 
 static PyObject *
-- 
cgit v0.12