Issue #8692: Improve performance of math.factorial:

(1) use a different algorithm that roughly halves the total number of multiplications required and results in more balanced multiplications (2) use a lookup table for small arguments (3) fast accumulation of products in C integer arithmetic rather than PyLong arithmetic when possible. Typical speedup, from unscientific testing on a 64-bit laptop, is 4.5x to 6.5x for arguments in the range 100 - 10000. Patch by Daniel Stutzbach; extensive reviews by Alexander Belopolsky.
author: Mark Dickinson <dickinsm@gmail.com> 2010-05-15 17:02:38 (GMT)
committer: Mark Dickinson <dickinsm@gmail.com> 2010-05-15 17:02:38 (GMT)
commit: 4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72 (patch)
tree: f28718e84ae7a59ec3ec6780fa5fa2328362edf2 /Lib
parent: ae6265f8d06dbec7d08c73ca23dad0f040d09b8e (diff)
download: cpython-4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72.zip
cpython-4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72.tar.gz
cpython-4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72.tar.bz2
1 files changed, 61 insertions, 10 deletions
diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py
index 48d9b1a..6c44435 100644
--- a/Lib/test/test_math.py
+++ b/Lib/test/test_math.py
@@ -60,6 +60,56 @@ def ulps_check(expected, got, ulps=20):
     return "error = {} ulps; permitted error = {} ulps".format(ulps_error,
                                                                ulps)
 
+# Here's a pure Python version of the math.factorial algorithm, for
+# documentation and comparison purposes.
+#
+# Formula:
+#
+#   factorial(n) = factorial_odd_part(n) << (n - count_set_bits(n))
+#
+# where
+#
+#   factorial_odd_part(n) = product_{i >= 0} product_{0 < j <= n >> i; j odd} j
+#
+# The outer product above is an infinite product, but once i >= n.bit_length,
+# (n >> i) < 1 and the corresponding term of the product is empty.  So only the
+# finitely many terms for 0 <= i < n.bit_length() contribute anything.
+#
+# We iterate downwards from i == n.bit_length() - 1 to i == 0.  The inner
+# product in the formula above starts at 1 for i == n.bit_length(); for each i
+# < n.bit_length() we get the inner product for i from that for i + 1 by
+# multiplying by all j in {n >> i+1 < j <= n >> i; j odd}.  In Python terms,
+# this set is range((n >> i+1) + 1 | 1, (n >> i) + 1 | 1, 2).
+
+def count_set_bits(n):
+    """Number of '1' bits in binary expansion of a nonnnegative integer."""
+    return 1 + count_set_bits(n & n - 1) if n else 0
+
+def partial_product(start, stop):
+    """Product of integers in range(start, stop, 2), computed recursively.
+    start and stop should both be odd, with start <= stop.
+
+    """
+    numfactors = (stop - start) >> 1
+    if not numfactors:
+        return 1
+    elif numfactors == 1:
+        return start
+    else:
+        mid = (start + numfactors) | 1
+        return partial_product(start, mid) * partial_product(mid, stop)
+
+def py_factorial(n):
+    """Factorial of nonnegative integer n, via "Binary Split Factorial Formula"
+    described at http://www.luschny.de/math/factorial/binarysplitfact.html
+
+    """
+    inner = outer = 1
+    for i in reversed(range(n.bit_length())):
+        inner *= partial_product((n >> i + 1) + 1 | 1, (n >> i) + 1 | 1)
+        outer *= inner
+    return outer << (n - count_set_bits(n))
+
 def acc_check(expected, got, rel_err=2e-15, abs_err = 5e-323):
     """Determine whether non-NaN floats a and b are equal to within a
     (small) rounding error.  The default values for rel_err and
@@ -365,18 +415,19 @@ class MathTests(unittest.TestCase):
         self.ftest('fabs(1)', math.fabs(1), 1)
 
     def testFactorial(self):
-        def fact(n):
-            result = 1
-            for i in range(1, int(n)+1):
-                result *= i
-            return result
-        values = list(range(10)) + [50, 100, 500]
-        random.shuffle(values)
-        for x in values:
-            for cast in (int, float):
-                self.assertEqual(math.factorial(cast(x)), fact(x), (x, fact(x), math.factorial(x)))
+        self.assertEqual(math.factorial(0), 1)
+        self.assertEqual(math.factorial(0.0), 1)
+        total = 1
+        for i in range(1, 1000):
+            total *= i
+            self.assertEqual(math.factorial(i), total)
+            self.assertEqual(math.factorial(float(i)), total)
+            self.assertEqual(math.factorial(i), py_factorial(i))
         self.assertRaises(ValueError, math.factorial, -1)
+        self.assertRaises(ValueError, math.factorial, -1.0)
         self.assertRaises(ValueError, math.factorial, math.pi)
+        self.assertRaises(OverflowError, math.factorial, sys.maxsize+1)
+        self.assertRaises(OverflowError, math.factorial, 10e100)
 
     def testFloor(self):
         self.assertRaises(TypeError, math.floor)
author	Mark Dickinson <dickinsm@gmail.com>	2010-05-15 17:02:38 (GMT)
committer	Mark Dickinson <dickinsm@gmail.com>	2010-05-15 17:02:38 (GMT)
commit	4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72 (patch)
tree	f28718e84ae7a59ec3ec6780fa5fa2328362edf2 /Lib
parent	ae6265f8d06dbec7d08c73ca23dad0f040d09b8e (diff)
download	cpython-4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72.zip cpython-4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72.tar.gz cpython-4c8a9a2df3c31b1c29d0b3cf74523e3c8b3dae72.tar.bz2