summaryrefslogtreecommitdiffstats
path: root/Lib/test/sortperf.py
blob: cc83ee43d9e52bc655088ac3b9b709432a2a9a7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""Sort performance test.

See main() for command line syntax.
See tabulate() for output format.

"""

import sys
import time
import random
import marshal
import tempfile
import os

td = tempfile.gettempdir()

def randfloats(n):
    """Return a list of n random floats in [0, 1)."""
    # Generating floats is expensive, so this writes them out to a file in
    # a temp directory.  If the file already exists, it just reads them
    # back in and shuffles them a bit.
    fn = os.path.join(td, "rr%06d" % n)
    try:
        fp = open(fn, "rb")
    except IOError:
        r = random.random
        result = [r() for i in xrange(n)]
        try:
            try:
                fp = open(fn, "wb")
                marshal.dump(result, fp)
                fp.close()
                fp = None
            finally:
                if fp:
                    try:
                        os.unlink(fn)
                    except os.error:
                        pass
        except IOError, msg:
            print "can't write", fn, ":", msg
    else:
        result = marshal.load(fp)
        fp.close()
        # Shuffle it a bit...
        for i in range(10):
            i = random.randrange(n)
            temp = result[:i]
            del result[:i]
            temp.reverse()
            result.extend(temp)
            del temp
    assert len(result) == n
    return result

def flush():
    sys.stdout.flush()

def doit(L):
    t0 = time.clock()
    L.sort()
    t1 = time.clock()
    print "%6.2f" % (t1-t0),
    flush()

def tabulate(r):
    """Tabulate sort speed for lists of various sizes.

    The sizes are 2**i for i in r (the argument, a list).

    The output displays i, 2**i, and the time to sort arrays of 2**i
    floating point numbers with the following properties:

    *sort: random data
    \sort: descending data
    /sort: ascending data
    3sort: ascending, then 3 random exchanges
    +sort: ascending, then 10 random at the end
    %sort: ascending, then randomly replace 1% of the elements w/ random values
    ~sort: many duplicates
    =sort: all equal
    !sort: worst case scenario

    """
    cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
    fmt = ("%2s %7s" + " %6s"*len(cases))
    print fmt % (("i", "2**i") + cases)
    for i in r:
        n = 1 << i
        L = randfloats(n)
        print "%2d %7d" % (i, n),
        flush()
        doit(L) # *sort
        L.reverse()
        doit(L) # \sort
        doit(L) # /sort

        # Do 3 random exchanges.
        for dummy in range(3):
            i1 = random.randrange(n)
            i2 = random.randrange(n)
            L[i1], L[i2] = L[i2], L[i1]
        doit(L) # 3sort

        # Replace the last 10 with random floats.
        if n >= 10:
            L[-10:] = [random.random() for dummy in range(10)]
        doit(L) # +sort

        # Replace 1% of the elements at random.
        for dummy in xrange(n // 100):
            L[random.randrange(n)] = random.random()
        doit(L) # %sort

        # Arrange for lots of duplicates.
        if n > 4:
            del L[4:]
            L = L * (n // 4)
            # Force the elements to be distinct objects, else timings can be
            # artificially low.
            L = map(lambda x: --x, L)
        doit(L) # ~sort
        del L

        # All equal.  Again, force the elements to be distinct objects.
        L = map(abs, [-0.5] * n)
        doit(L) # =sort
        del L

        # This one looks like [3, 2, 1, 0, 0, 1, 2, 3].  It was a bad case
        # for an older implementation of quicksort, which used the median
        # of the first, last and middle elements as the pivot.
        half = n // 2
        L = range(half - 1, -1, -1)
        L.extend(range(half))
        # Force to float, so that the timings are comparable.  This is
        # significantly faster if we leave tham as ints.
        L = map(float, L)
        doit(L) # !sort
        print

def main():
    """Main program when invoked as a script.

    One argument: tabulate a single row.
    Two arguments: tabulate a range (inclusive).
    Extra arguments are used to seed the random generator.

    """
    # default range (inclusive)
    k1 = 15
    k2 = 20
    if sys.argv[1:]:
        # one argument: single point
        k1 = k2 = int(sys.argv[1])
        if sys.argv[2:]:
            # two arguments: specify range
            k2 = int(sys.argv[2])
            if sys.argv[3:]:
                # derive random seed from remaining arguments
                x = 1
                for a in sys.argv[3:]:
                    x = 69069 * x + hash(a)
                random.seed(x)
    r = range(k1, k2+1)                 # include the end point
    tabulate(r)

if __name__ == '__main__':
    main()