1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | Lib/test/sortperf.py
"""Sort performance test. See main() for command line syntax. See tabulate() for output format. """ import sys import time import random import marshal import tempfile import os td = tempfile.gettempdir() def randfloats(n): """Return a list of n random floats in [0, 1).""" # Generating floats is expensive, so this writes them out to a file in # a temp directory. If the file already exists, it just reads them # back in and shuffles them a bit. fn = os.path.join(td, "rr%06d" % n) try: fp = open(fn, "rb") except IOError: r = random.random result = [r() for i in xrange(n)] try: try: fp = open(fn, "wb") marshal.dump(result, fp) fp.close() fp = None finally: if fp: try: os.unlink(fn) except os.error: pass except IOError, msg: print "can't write", fn, ":", msg else: result = marshal.load(fp) fp.close() # Shuffle it a bit... for i in range(10): i = random.randrange(n) temp = result[:i] del result[:i] temp.reverse() result.extend(temp) del temp assert len(result) == n return result def flush(): sys.stdout.flush() def doit(L): t0 = time.clock() L.sort() t1 = time.clock() print "%6.2f" % (t1-t0), flush() def tabulate(r): """Tabulate sort speed for lists of various sizes. The sizes are 2**i for i in r (the argument, a list). The output displays i, 2**i, and the time to sort arrays of 2**i floating point numbers with the following properties: *sort: random data \sort: descending data /sort: ascending data 3sort: ascending, then 3 random exchanges +sort: ascending, then 10 random at the end %sort: ascending, then randomly replace 1% of the elements w/ random values ~sort: many duplicates =sort: all equal !sort: worst case scenario """ cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"]) fmt = ("%2s %7s" + " %6s"*len(cases)) print fmt % (("i", "2**i") + cases) for i in r: n = 1 << i L = randfloats(n) print "%2d %7d" % (i, n), flush() doit(L) # *sort L.reverse() doit(L) # \sort doit(L) # /sort # Do 3 random exchanges. for dummy in range(3): i1 = random.randrange(n) i2 = random.randrange(n) L[i1], L[i2] = L[i2], L[i1] doit(L) # 3sort # Replace the last 10 with random floats. if n >= 10: L[-10:] = [random.random() for dummy in range(10)] doit(L) # +sort # Replace 1% of the elements at random. for dummy in xrange(n // 100): L[random.randrange(n)] = random.random() doit(L) # %sort # Arrange for lots of duplicates. if n > 4: del L[4:] L = L * (n // 4) # Force the elements to be distinct objects, else timings can be # artificially low. L = map(lambda x: --x, L) doit(L) # ~sort del L # All equal. Again, force the elements to be distinct objects. L = map(abs, [-0.5] * n) doit(L) # =sort del L # This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case # for an older implementation of quicksort, which used the median # of the first, last and middle elements as the pivot. half = n // 2 L = range(half - 1, -1, -1) L.extend(range(half)) # Force to float, so that the timings are comparable. This is # significantly faster if we leave tham as ints. L = map(float, L) doit(L) # !sort print def main(): """Main program when invoked as a script. One argument: tabulate a single row. Two arguments: tabulate a range (inclusive). Extra arguments are used to seed the random generator. """ # default range (inclusive) k1 = 15 k2 = 20 if sys.argv[1:]: # one argument: single point k1 = k2 = int(sys.argv[1]) if sys.argv[2:]: # two arguments: specify range k2 = int(sys.argv[2]) if sys.argv[3:]: # derive random seed from remaining arguments x = 1 for a in sys.argv[3:]: x = 69069 * x + hash(a) random.seed(x) r = range(k1, k2+1) # include the end point tabulate(r) if __name__ == '__main__': main() |