summaryrefslogtreecommitdiffstats
path: root/Objects/longobject.c
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2002-08-12 22:01:34 (GMT)
committerTim Peters <tim.peters@gmail.com>2002-08-12 22:01:34 (GMT)
commit6000464d08b94114baeef0a464896654fb0faa62 (patch)
tree1f580137cffdcdc2f4c82624f964f9448b3a7489 /Objects/longobject.c
parent558fc977c5eea1af25fca4bf7f28a41756ba2ad6 (diff)
downloadcpython-6000464d08b94114baeef0a464896654fb0faa62.zip
cpython-6000464d08b94114baeef0a464896654fb0faa62.tar.gz
cpython-6000464d08b94114baeef0a464896654fb0faa62.tar.bz2
Added new function k_lopsided_mul(), which is much more efficient than
k_mul() when inputs have vastly different sizes, and a little more efficient when they're close to a factor of 2 out of whack. I consider this done now, although I'll set up some more correctness tests to run overnight.
Diffstat (limited to 'Objects/longobject.c')
-rw-r--r--Objects/longobject.c82
1 files changed, 72 insertions, 10 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 8c9f69a..3cc6f13 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1592,6 +1592,8 @@ kmul_split(PyLongObject *n, int size, PyLongObject **high, PyLongObject **low)
return 0;
}
+static PyLongObject *k_lopsided_mul(PyLongObject *a, PyLongObject *b);
+
/* Karatsuba multiplication. Ignores the input signs, and returns the
* absolute value of the product (or NULL if error).
* See Knuth Vol. 2 Chapter 4.3.3 (Pp. 294-295).
@@ -1633,15 +1635,21 @@ k_mul(PyLongObject *a, PyLongObject *b)
/* Use gradeschool math when either number is too small. */
if (asize <= KARATSUBA_CUTOFF) {
- /* 0 is inevitable if one kmul arg has more than twice
- * the digits of another, so it's worth special-casing.
- */
if (asize == 0)
return _PyLong_New(0);
else
return x_mul(a, b);
}
+ /* If a is small compared to b, splitting on b gives a degenerate
+ * case with ah==0, and Karatsuba may be (even much) less efficient
+ * than "grade school" then. However, we can still win, by viewing
+ * b as a string of "big digits", each of width a->ob_size. That
+ * leads to a sequence of balanced calls to k_mul.
+ */
+ if (2 * asize <= bsize)
+ return k_lopsided_mul(a, b);
+
shift = bsize >> 1;
if (kmul_split(a, shift, &ah, &al) < 0) goto fail;
if (kmul_split(b, shift, &bh, &bl) < 0) goto fail;
@@ -1750,6 +1758,67 @@ k_mul(PyLongObject *a, PyLongObject *b)
return NULL;
}
+/* b has at least twice the digits of a, and a is big enough that Karatsuba
+ * would pay off *if* the inputs had balanced sizes. View b as a sequence
+ * of slices, each with a->ob_size digits, and multiply the slices by a,
+ * one at a time. This gives k_mul balanced inputs to work with, and is
+ * also cache-friendly (we compute one double-width slice of the result
+ * at a time, then move on, never bactracking except for the helpful
+ * single-width slice overlap between successive partial sums).
+ */
+static PyLongObject *
+k_lopsided_mul(PyLongObject *a, PyLongObject *b)
+{
+ const int asize = ABS(a->ob_size);
+ int bsize = ABS(b->ob_size);
+ int nbdone; /* # of b digits already multiplied */
+ PyLongObject *ret;
+ PyLongObject *bslice = NULL;
+
+ assert(asize > KARATSUBA_CUTOFF);
+ assert(2 * asize <= bsize);
+
+ /* Allocate result space, and zero it out. */
+ ret = _PyLong_New(asize + bsize);
+ if (ret == NULL)
+ return NULL;
+ memset(ret->ob_digit, 0, ret->ob_size * sizeof(digit));
+
+ /* Successive slices of b are copied into bslice. */
+ bslice = _PyLong_New(bsize);
+ if (bslice == NULL)
+ goto fail;
+
+ nbdone = 0;
+ while (bsize > 0) {
+ PyLongObject *product;
+ const int nbtouse = MIN(bsize, asize);
+
+ /* Multiply the next slice of b by a. */
+ memcpy(bslice->ob_digit, b->ob_digit + nbdone,
+ nbtouse * sizeof(digit));
+ bslice->ob_size = nbtouse;
+ product = k_mul(a, bslice);
+ if (product == NULL)
+ goto fail;
+
+ /* Add into result. */
+ (void)v_iadd(ret->ob_digit + nbdone, ret->ob_size - nbdone,
+ product->ob_digit, product->ob_size);
+ Py_DECREF(product);
+
+ bsize -= nbtouse;
+ nbdone += nbtouse;
+ }
+
+ Py_DECREF(bslice);
+ return long_normalize(ret);
+
+ fail:
+ Py_DECREF(ret);
+ Py_XDECREF(bslice);
+ return NULL;
+}
static PyObject *
long_mul(PyLongObject *v, PyLongObject *w)
@@ -1769,14 +1838,7 @@ long_mul(PyLongObject *v, PyLongObject *w)
return Py_NotImplemented;
}
-#if 0
- if (Py_GETENV("KARAT") != NULL)
- z = k_mul(a, b);
- else
- z = x_mul(a, b);
-#else
z = k_mul(a, b);
-#endif
if(z == NULL) {
Py_DECREF(a);
Py_DECREF(b);