Gave this a facelift: "/" vs "//", whrandom vs random, etc. Boosted

the default range to end at 2**20 (machines are much faster now). Fixed what was quite a arguably a bug, explaining an old mystery: the "!sort" case here contructs what *was* a quadratic-time disaster for the old quicksort implementation. But under the current samplesort, it always ran much faster than *sort (the random case). This never made sense. Turns out it was because !sort was sorting an integer array, while all the other cases sort floats; and comparing ints goes much quicker than comparing floats in Python. After changing !sort to chew on floats instead, it's now slower than the random sort case, which makes more sense (but is just a few percent slower; samplesort is massively less sensitive to "bad patterns" than quicksort).
author: Tim Peters <tim.peters@gmail.com> 2002-07-18 15:53:32 (GMT)
committer: Tim Peters <tim.peters@gmail.com> 2002-07-18 15:53:32 (GMT)
commit: 8b6ec79b74284873696b24ab979fb1cb579b86f8 (patch)
tree: c8754c65540f83d4ff624eed6a037d067382fddb /Lib
parent: 30d489651101ef9f34cad4124b404371a2eeef72 (diff)
download: cpython-8b6ec79b74284873696b24ab979fb1cb579b86f8.zip
cpython-8b6ec79b74284873696b24ab979fb1cb579b86f8.tar.gz
cpython-8b6ec79b74284873696b24ab979fb1cb579b86f8.tar.bz2
1 files changed, 46 insertions, 35 deletions
diff --git a/Lib/test/sortperf.py b/Lib/test/sortperf.py
index 4ad82a6..546afd5 100644
--- a/Lib/test/sortperf.py
+++ b/Lib/test/sortperf.py
@@ -10,20 +10,21 @@ import time
 import random
 import marshal
 import tempfile
-import operator
 import os
 
 td = tempfile.gettempdir()
 
-def randrange(n):
-    """Return a random shuffle of range(n)."""
+def randfloats(n):
+    """Return a list of n random floats in [0, 1)."""
+    # Generating floats is expensive, so this writes them out to a file in
+    # a temp directory.  If the file already exists, it just reads them
+    # back in and shuffles them a bit.
     fn = os.path.join(td, "rr%06d" % n)
     try:
         fp = open(fn, "rb")
     except IOError:
-        result = []
-        for i in range(n):
-            result.append(random.random())
+        r = random.random
+        result = [r() for i in xrange(n)]
         try:
             try:
                 fp = open(fn, "wb")
@@ -41,18 +42,18 @@ def randrange(n):
     else:
         result = marshal.load(fp)
         fp.close()
-        ##assert len(result) == n
         # Shuffle it a bit...
         for i in range(10):
-            i = random.randrange(0, n)
+            i = random.randrange(n)
             temp = result[:i]
             del result[:i]
             temp.reverse()
-            result[len(result):] = temp
+            result.extend(temp)
             del temp
+    assert len(result) == n
     return result
 
-def fl():
+def flush():
     sys.stdout.flush()
 
 def doit(L):
@@ -60,7 +61,7 @@ def doit(L):
     L.sort()
     t1 = time.clock()
     print "%6.2f" % (t1-t0),
-    fl()
+    flush()
 
 def tabulate(r):
     """Tabulate sort speed for lists of various sizes.
@@ -74,33 +75,50 @@ def tabulate(r):
     \sort: descending data
     /sort: ascending data
     ~sort: many duplicates
-    -sort: all equal
+    =sort: all equal
     !sort: worst case scenario
 
     """
-    cases = ("*sort", "\\sort", "/sort", "~sort", "-sort", "!sort")
-    fmt = ("%2s %6s" + " %6s"*len(cases))
+    cases = ("*sort", "\\sort", "/sort", "~sort", "=sort", "!sort")
+    fmt = ("%2s %7s" + " %6s"*len(cases))
     print fmt % (("i", "2**i") + cases)
     for i in r:
-        n = 1<<i
-        L = randrange(n)
-        ##assert len(L) == n
-        print "%2d %6d" % (i, n),
-        fl()
+        n = 1 << i
+        L = randfloats(n)
+        print "%2d %7d" % (i, n),
+        flush()
         doit(L) # *sort
         L.reverse()
         doit(L) # \sort
         doit(L) # /sort
+
+        # Arrange for lots of duplicates.
         if n > 4:
             del L[4:]
-            L = L*(n/4)
+            L = L * (n // 4)
+            # Force the elements to be distinct objects, else timings can be
+            # artificially low.
             L = map(lambda x: --x, L)
         doit(L) # ~sort
         del L
-        L = map(abs, [-0.5]*n)
-        doit(L) # -sort
-        L = range(n/2-1, -1, -1)
-        L[len(L):] = range(n/2)
+
+        # All equal.  Again, force the elements to be distinct objects.
+        L = map(abs, [-0.5] * n)
+        doit(L) # =sort
+        del L
+
+        # This one looks like [3, 2, 1, 0, 0, 1, 2, 3].  It was a bad case
+        # for an older implementation of quicksort, which used the median
+        # of the first, last and middle elements as the pivot.  It's still
+        # a worse-than-average case for samplesort, but on the order of a
+        # measly 5% worse, not a quadratic-time disaster as it was with
+        # quicksort.
+        half = n // 2
+        L = range(half - 1, -1, -1)
+        L.extend(range(half))
+        # Force to float, so that the timings are comparable.  This is
+        # significantly faster if we leave tham as ints.
+        L = map(float, L)
         doit(L) # !sort
         print
 
@@ -114,7 +132,7 @@ def main():
     """
     # default range (inclusive)
     k1 = 15
-    k2 = 19
+    k2 = 20
     if sys.argv[1:]:
         # one argument: single point
         k1 = k2 = int(sys.argv[1])
@@ -123,17 +141,10 @@ def main():
             k2 = int(sys.argv[2])
             if sys.argv[3:]:
                 # derive random seed from remaining arguments
-                x, y, z = 0, 0, 0
+                x = 1
                 for a in sys.argv[3:]:
-                    h = hash(a)
-                    h, d = divmod(h, 256)
-                    h = h & 0xffffff
-                    x = (x^h^d) & 255
-                    h = h>>8
-                    y = (y^h^d) & 255
-                    h = h>>8
-                    z = (z^h^d) & 255
-                whrandom.seed(x, y, z)
+                    x = 69069 * x + hash(a)
+                random.seed(x)
     r = range(k1, k2+1)                 # include the end point
     tabulate(r)
author	Tim Peters <tim.peters@gmail.com>	2002-07-18 15:53:32 (GMT)
committer	Tim Peters <tim.peters@gmail.com>	2002-07-18 15:53:32 (GMT)
commit	8b6ec79b74284873696b24ab979fb1cb579b86f8 (patch)
tree	c8754c65540f83d4ff624eed6a037d067382fddb /Lib
parent	30d489651101ef9f34cad4124b404371a2eeef72 (diff)
download	cpython-8b6ec79b74284873696b24ab979fb1cb579b86f8.zip cpython-8b6ec79b74284873696b24ab979fb1cb579b86f8.tar.gz cpython-8b6ec79b74284873696b24ab979fb1cb579b86f8.tar.bz2