diff options
author | Raymond Hettinger <python@rcn.com> | 2002-12-29 23:03:38 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2002-12-29 23:03:38 (GMT) |
commit | 40f621709286a7a0f7e6f260c0fd020d0fac0de0 (patch) | |
tree | bd602fee432a253a0f454fc696d14734f18dd915 /Lib | |
parent | 5e65ce671c3d3113035dd7783b79d395c9d71b3d (diff) | |
download | cpython-40f621709286a7a0f7e6f260c0fd020d0fac0de0.zip cpython-40f621709286a7a0f7e6f260c0fd020d0fac0de0.tar.gz cpython-40f621709286a7a0f7e6f260c0fd020d0fac0de0.tar.bz2 |
SF patch 658251: Install a C implementation of the Mersenne Twister as the
core generator for random.py.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/random.py | 403 | ||||
-rw-r--r-- | Lib/test/test_random.py | 215 |
2 files changed, 379 insertions, 239 deletions
diff --git a/Lib/random.py b/Lib/random.py index 057571a..8462061 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -18,61 +18,26 @@ negative exponential gamma beta + pareto + Weibull distributions on the circle (angles 0 to 2pi) --------------------------------------------- circular uniform von Mises -Translated from anonymously contributed C/C++ source. - -Multi-threading note: the random number generator used here is not thread- -safe; it is possible that two calls return the same random value. However, -you can instantiate a different instance of Random() in each thread to get -generators that don't share state, then use .setstate() and .jumpahead() to -move the generators to disjoint segments of the full period. For example, - -def create_generators(num, delta, firstseed=None): - ""\"Return list of num distinct generators. - Each generator has its own unique segment of delta elements from - Random.random()'s full period. - Seed the first generator with optional arg firstseed (default is - None, to seed from current time). - ""\" - - from random import Random - g = Random(firstseed) - result = [g] - for i in range(num - 1): - laststate = g.getstate() - g = Random() - g.setstate(laststate) - g.jumpahead(delta) - result.append(g) - return result - -gens = create_generators(10, 1000000) - -That creates 10 distinct generators, which can be passed out to 10 distinct -threads. The generators don't share state so can be called safely in -parallel. So long as no thread calls its g.random() more than a million -times (the second argument to create_generators), the sequences seen by -each thread will not overlap. - -The period of the underlying Wichmann-Hill generator is 6,953,607,871,644, -and that limits how far this technique can be pushed. - -Just for fun, note that since we know the period, .jumpahead() can also be -used to "move backward in time": - ->>> g = Random(42) # arbitrary ->>> g.random() -0.25420336316883324 ->>> g.jumpahead(6953607871644L - 1) # move *back* one ->>> g.random() -0.25420336316883324 +General notes on the underlying Mersenne Twister core generator: + +* The period is 2**19937-1. +* It is one of the most extensively tested generators in existence +* Without a direct way to compute N steps forward, the + semantics of jumpahead(n) are weakened to simply jump + to another distant state and rely on the large period + to avoid overlapping sequences. +* The random() method is implemented in C, executes in + a single Python step, and is, therefore, threadsafe. + """ -# XXX The docstring sucks. from math import log as _log, exp as _exp, pi as _pi, e as _e from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin @@ -82,32 +47,20 @@ __all__ = ["Random","seed","random","uniform","randint","choice","sample", "randrange","shuffle","normalvariate","lognormvariate", "cunifvariate","expovariate","vonmisesvariate","gammavariate", "stdgamma","gauss","betavariate","paretovariate","weibullvariate", - "getstate","setstate","jumpahead","whseed"] - -def _verify(name, computed, expected): - if abs(computed - expected) > 1e-7: - raise ValueError( - "computed value for %s deviates too much " - "(computed %g, expected %g)" % (name, computed, expected)) + "getstate","setstate","jumpahead"] NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0) -_verify('NV_MAGICCONST', NV_MAGICCONST, 1.71552776992141) - TWOPI = 2.0*_pi -_verify('TWOPI', TWOPI, 6.28318530718) - LOG4 = _log(4.0) -_verify('LOG4', LOG4, 1.38629436111989) - SG_MAGICCONST = 1.0 + _log(4.5) -_verify('SG_MAGICCONST', SG_MAGICCONST, 2.50407739677627) - -del _verify # Translated by Guido van Rossum from C source provided by -# Adrian Baddeley. +# Adrian Baddeley. Adapted by Raymond Hettinger for use with +# the Mersenne Twister core generator. -class Random: +from _random import Random as CoreGenerator + +class Random(CoreGenerator): """Random number generator base class used by bound module functions. Used to instantiate instances of Random to get generators that don't @@ -122,7 +75,7 @@ class Random: """ - VERSION = 1 # used by getstate/setstate + VERSION = 2 # used by getstate/setstate def __init__(self, x=None): """Initialize an instance. @@ -131,12 +84,7 @@ class Random: """ self.seed(x) - -## -------------------- core generator ------------------- - - # Specific to Wichmann-Hill generator. Subclasses wishing to use a - # different core generator should override the seed(), random(), - # getstate(), setstate() and jumpahead() methods. + self.gauss_next = None def seed(self, a=None): """Initialize internal state from hashable object. @@ -144,141 +92,26 @@ class Random: None or no argument seeds from current time. If a is not None or an int or long, hash(a) is used instead. - - If a is an int or long, a is used directly. Distinct values between - 0 and 27814431486575L inclusive are guaranteed to yield distinct - internal states (this guarantee is specific to the default - Wichmann-Hill generator). """ - if a is None: - # Initialize from current time - import time - a = long(time.time() * 256) - - if type(a) not in (type(3), type(3L)): - a = hash(a) - - a, x = divmod(a, 30268) - a, y = divmod(a, 30306) - a, z = divmod(a, 30322) - self._seed = int(x)+1, int(y)+1, int(z)+1 - + CoreGenerator.seed(self, a) self.gauss_next = None - def random(self): - """Get the next random number in the range [0.0, 1.0).""" - - # Wichman-Hill random number generator. - # - # Wichmann, B. A. & Hill, I. D. (1982) - # Algorithm AS 183: - # An efficient and portable pseudo-random number generator - # Applied Statistics 31 (1982) 188-190 - # - # see also: - # Correction to Algorithm AS 183 - # Applied Statistics 33 (1984) 123 - # - # McLeod, A. I. (1985) - # A remark on Algorithm AS 183 - # Applied Statistics 34 (1985),198-200 - - # This part is thread-unsafe: - # BEGIN CRITICAL SECTION - x, y, z = self._seed - x = (171 * x) % 30269 - y = (172 * y) % 30307 - z = (170 * z) % 30323 - self._seed = x, y, z - # END CRITICAL SECTION - - # Note: on a platform using IEEE-754 double arithmetic, this can - # never return 0.0 (asserted by Tim; proof too long for a comment). - return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 - def getstate(self): """Return internal state; can be passed to setstate() later.""" - return self.VERSION, self._seed, self.gauss_next + return self.VERSION, CoreGenerator.getstate(self), self.gauss_next def setstate(self, state): """Restore internal state from object returned by getstate().""" version = state[0] - if version == 1: - version, self._seed, self.gauss_next = state + if version == 2: + version, internalstate, self.gauss_next = state + CoreGenerator.setstate(self, internalstate) else: raise ValueError("state with version %s passed to " "Random.setstate() of version %s" % (version, self.VERSION)) - def jumpahead(self, n): - """Act as if n calls to random() were made, but quickly. - - n is an int, greater than or equal to 0. - - Example use: If you have 2 threads and know that each will - consume no more than a million random numbers, create two Random - objects r1 and r2, then do - r2.setstate(r1.getstate()) - r2.jumpahead(1000000) - Then r1 and r2 will use guaranteed-disjoint segments of the full - period. - """ - - if not n >= 0: - raise ValueError("n must be >= 0") - x, y, z = self._seed - x = int(x * pow(171, n, 30269)) % 30269 - y = int(y * pow(172, n, 30307)) % 30307 - z = int(z * pow(170, n, 30323)) % 30323 - self._seed = x, y, z - - def __whseed(self, x=0, y=0, z=0): - """Set the Wichmann-Hill seed from (x, y, z). - - These must be integers in the range [0, 256). - """ - - if not type(x) == type(y) == type(z) == int: - raise TypeError('seeds must be integers') - if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): - raise ValueError('seeds must be in range(0, 256)') - if 0 == x == y == z: - # Initialize from current time - import time - t = long(time.time() * 256) - t = int((t&0xffffff) ^ (t>>24)) - t, x = divmod(t, 256) - t, y = divmod(t, 256) - t, z = divmod(t, 256) - # Zero is a poor seed, so substitute 1 - self._seed = (x or 1, y or 1, z or 1) - - self.gauss_next = None - - def whseed(self, a=None): - """Seed from hashable object's hash code. - - None or no argument seeds from current time. It is not guaranteed - that objects with distinct hash codes lead to distinct internal - states. - - This is obsolete, provided for compatibility with the seed routine - used prior to Python 2.1. Use the .seed() method instead. - """ - - if a is None: - self.__whseed() - return - a = hash(a) - a, x = divmod(a, 256) - a, y = divmod(a, 256) - a, z = divmod(a, 256) - x = (x + a) % 256 or 1 - y = (y + a) % 256 or 1 - z = (z + a) % 256 or 1 - self.__whseed(x, y, z) - ## ---- Methods below this point do not need to be overridden when ## ---- subclassing for the purpose of using a different core generator. @@ -744,6 +577,153 @@ class Random: u = self.random() return alpha * pow(-_log(u), 1.0/beta) +## -------------------- Wichmann-Hill ------------------- + +class WichmannHill(Random): + + VERSION = 1 # used by getstate/setstate + + def seed(self, a=None): + """Initialize internal state from hashable object. + + None or no argument seeds from current time. + + If a is not None or an int or long, hash(a) is used instead. + + If a is an int or long, a is used directly. Distinct values between + 0 and 27814431486575L inclusive are guaranteed to yield distinct + internal states (this guarantee is specific to the default + Wichmann-Hill generator). + """ + + if a is None: + # Initialize from current time + import time + a = long(time.time() * 256) + + if not isinstance(a, (int, long)): + a = hash(a) + + a, x = divmod(a, 30268) + a, y = divmod(a, 30306) + a, z = divmod(a, 30322) + self._seed = int(x)+1, int(y)+1, int(z)+1 + + self.gauss_next = None + + def random(self): + """Get the next random number in the range [0.0, 1.0).""" + + # Wichman-Hill random number generator. + # + # Wichmann, B. A. & Hill, I. D. (1982) + # Algorithm AS 183: + # An efficient and portable pseudo-random number generator + # Applied Statistics 31 (1982) 188-190 + # + # see also: + # Correction to Algorithm AS 183 + # Applied Statistics 33 (1984) 123 + # + # McLeod, A. I. (1985) + # A remark on Algorithm AS 183 + # Applied Statistics 34 (1985),198-200 + + # This part is thread-unsafe: + # BEGIN CRITICAL SECTION + x, y, z = self._seed + x = (171 * x) % 30269 + y = (172 * y) % 30307 + z = (170 * z) % 30323 + self._seed = x, y, z + # END CRITICAL SECTION + + # Note: on a platform using IEEE-754 double arithmetic, this can + # never return 0.0 (asserted by Tim; proof too long for a comment). + return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 + + def getstate(self): + """Return internal state; can be passed to setstate() later.""" + return self.VERSION, self._seed, self.gauss_next + + def setstate(self, state): + """Restore internal state from object returned by getstate().""" + version = state[0] + if version == 1: + version, self._seed, self.gauss_next = state + else: + raise ValueError("state with version %s passed to " + "Random.setstate() of version %s" % + (version, self.VERSION)) + + def jumpahead(self, n): + """Act as if n calls to random() were made, but quickly. + + n is an int, greater than or equal to 0. + + Example use: If you have 2 threads and know that each will + consume no more than a million random numbers, create two Random + objects r1 and r2, then do + r2.setstate(r1.getstate()) + r2.jumpahead(1000000) + Then r1 and r2 will use guaranteed-disjoint segments of the full + period. + """ + + if not n >= 0: + raise ValueError("n must be >= 0") + x, y, z = self._seed + x = int(x * pow(171, n, 30269)) % 30269 + y = int(y * pow(172, n, 30307)) % 30307 + z = int(z * pow(170, n, 30323)) % 30323 + self._seed = x, y, z + + def __whseed(self, x=0, y=0, z=0): + """Set the Wichmann-Hill seed from (x, y, z). + + These must be integers in the range [0, 256). + """ + + if not type(x) == type(y) == type(z) == int: + raise TypeError('seeds must be integers') + if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): + raise ValueError('seeds must be in range(0, 256)') + if 0 == x == y == z: + # Initialize from current time + import time + t = long(time.time() * 256) + t = int((t&0xffffff) ^ (t>>24)) + t, x = divmod(t, 256) + t, y = divmod(t, 256) + t, z = divmod(t, 256) + # Zero is a poor seed, so substitute 1 + self._seed = (x or 1, y or 1, z or 1) + + self.gauss_next = None + + def whseed(self, a=None): + """Seed from hashable object's hash code. + + None or no argument seeds from current time. It is not guaranteed + that objects with distinct hash codes lead to distinct internal + states. + + This is obsolete, provided for compatibility with the seed routine + used prior to Python 2.1. Use the .seed() method instead. + """ + + if a is None: + self.__whseed() + return + a = hash(a) + a, x = divmod(a, 256) + a, y = divmod(a, 256) + a, z = divmod(a, 256) + x = (x + a) % 256 or 1 + y = (y + a) % 256 or 1 + z = (z + a) % 256 or 1 + self.__whseed(x, y, z) + ## -------------------- test program -------------------- def _test_generator(n, funccall): @@ -768,25 +748,11 @@ def _test_generator(n, funccall): print 'avg %g, stddev %g, min %g, max %g' % \ (avg, stddev, smallest, largest) -def _test_sample(n): - # For the entire allowable range of 0 <= k <= n, validate that - # the sample is of the correct length and contains only unique items - population = xrange(n) - for k in xrange(n+1): - s = sample(population, k) - uniq = dict.fromkeys(s) - assert len(uniq) == len(s) == k - assert None not in uniq - def _sample_generator(n, k): # Return a fixed element from the sample. Validates random ordering. return sample(xrange(n), k)[k//2] def _test(N=2000): - print 'TWOPI =', TWOPI - print 'LOG4 =', LOG4 - print 'NV_MAGICCONST =', NV_MAGICCONST - print 'SG_MAGICCONST =', SG_MAGICCONST _test_generator(N, 'random()') _test_generator(N, 'normalvariate(0.0, 1.0)') _test_generator(N, 'lognormvariate(0.0, 1.0)') @@ -808,25 +774,13 @@ def _test(N=2000): _test_generator(N, 'weibullvariate(1.0, 1.0)') _test_generator(N, '_sample_generator(50, 5)') # expected s.d.: 14.4 _test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4 - _test_sample(500) - - # Test jumpahead. - s = getstate() - jumpahead(N) - r1 = random() - # now do it the slow way - setstate(s) - for i in range(N): - random() - r2 = random() - if r1 != r2: - raise ValueError("jumpahead test failed " + `(N, r1, r2)`) # Create one instance, seeded from current time, and export its methods -# as module-level functions. The functions are not threadsafe, and state -# is shared across all uses (both in the user's code and in the Python -# libraries), but that's fine for most programs and is easier for the -# casual user than making them instantiate their own Random() instance. +# as module-level functions. The functions share state across all uses +#(both in the user's code and in the Python libraries), but that's fine +# for most programs and is easier for the casual user than making them +# instantiate their own Random() instance. + _inst = Random() seed = _inst.seed random = _inst.random @@ -850,7 +804,6 @@ weibullvariate = _inst.weibullvariate getstate = _inst.getstate setstate = _inst.setstate jumpahead = _inst.jumpahead -whseed = _inst.whseed if __name__ == '__main__': _test() diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 5f60f4b..d0a2a15 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -1,19 +1,206 @@ -from test import test_support +#!/usr/bin/env python + +import unittest import random +import time +from test import test_support + +class TestBasicOps(unittest.TestCase): + # Superclass with tests common to all generators. + # Subclasses must arrange for self.gen to retrieve the Random instance + # to be tested. + + def randomlist(self, n): + """Helper function to make a list of random numbers""" + return [self.gen.random() for i in xrange(n)] + + def test_autoseed(self): + self.gen.seed() + state1 = self.gen.getstate() + time.sleep(1) + self.gen.seed() # diffent seeds at different times + state2 = self.gen.getstate() + self.assertNotEqual(state1, state2) + + def test_saverestore(self): + N = 1000 + self.gen.seed() + state = self.gen.getstate() + randseq = self.randomlist(N) + self.gen.setstate(state) # should regenerate the same sequence + self.assertEqual(randseq, self.randomlist(N)) + + def test_seedargs(self): + for arg in [None, 0, 0L, 1, 1L, -1, -1L, 10**20, -(10**20), + 3.14, 1+2j, 'a', tuple('abc')]: + self.gen.seed(arg) + for arg in [range(3), dict(one=1)]: + self.assertRaises(TypeError, self.gen.seed, arg) + + def test_jumpahead(self): + self.gen.seed() + state1 = self.gen.getstate() + self.gen.jumpahead(100) + state2 = self.gen.getstate() # s/b distinct from state1 + self.assertNotEqual(state1, state2) + self.gen.jumpahead(100) + state3 = self.gen.getstate() # s/b distinct from state2 + self.assertNotEqual(state2, state3) + + self.assertRaises(TypeError, self.gen.jumpahead) # needs an arg + self.assertRaises(TypeError, self.gen.jumpahead, "ick") # wrong type + self.assertRaises(TypeError, self.gen.jumpahead, 2.3) # wrong type + self.assertRaises(TypeError, self.gen.jumpahead, 2, 3) # too many + + def test_sample(self): + # For the entire allowable range of 0 <= k <= N, validate that + # the sample is of the correct length and contains only unique items + N = 100 + population = xrange(N) + for k in xrange(N+1): + s = self.gen.sample(population, k) + self.assertEqual(len(s), k) + uniq = dict.fromkeys(s) + self.assertEqual(len(uniq), k) + self.failIf(None in uniq) + + def test_gauss(self): + # Ensure that the seed() method initializes all the hidden state. In + # particular, through 2.2.1 it failed to reset a piece of state used + # by (and only by) the .gauss() method. + + for seed in 1, 12, 123, 1234, 12345, 123456, 654321: + self.gen.seed(seed) + x1 = self.gen.random() + y1 = self.gen.gauss(0, 1) + + self.gen.seed(seed) + x2 = self.gen.random() + y2 = self.gen.gauss(0, 1) + + self.assertEqual(x1, x2) + self.assertEqual(y1, y2) + + +class WichmannHill_TestBasicOps(TestBasicOps): + gen = random.WichmannHill() + + def test_strong_jumpahead(self): + # tests that jumpahead(n) semantics correspond to n calls to random() + N = 1000 + s = self.gen.getstate() + self.gen.jumpahead(N) + r1 = self.gen.random() + # now do it the slow way + self.gen.setstate(s) + for i in xrange(N): + self.gen.random() + r2 = self.gen.random() + self.assertEqual(r1, r2) + + def test_gauss_with_whseed(self): + # Ensure that the seed() method initializes all the hidden state. In + # particular, through 2.2.1 it failed to reset a piece of state used + # by (and only by) the .gauss() method. + + for seed in 1, 12, 123, 1234, 12345, 123456, 654321: + self.gen.whseed(seed) + x1 = self.gen.random() + y1 = self.gen.gauss(0, 1) + + self.gen.whseed(seed) + x2 = self.gen.random() + y2 = self.gen.gauss(0, 1) + + self.assertEqual(x1, x2) + self.assertEqual(y1, y2) + +class MersenneTwister_TestBasicOps(TestBasicOps): + gen = random.Random() + + def test_referenceImplementation(self): + # Compare the python implementation with results from the original + # code. Create 2000 53-bit precision random floats. Compare only + # the last ten entries to show that the independent implementations + # are tracking. Here is the main() function needed to create the + # list of expected random numbers: + # void main(void){ + # int i; + # unsigned long init[4]={61731, 24903, 614, 42143}, length=4; + # init_by_array(init, length); + # for (i=0; i<2000; i++) { + # printf("%.15f ", genrand_res53()); + # if (i%5==4) printf("\n"); + # } + # } + expected = [0.45839803073713259, + 0.86057815201978782, + 0.92848331726782152, + 0.35932681119782461, + 0.081823493762449573, + 0.14332226470169329, + 0.084297823823520024, + 0.53814864671831453, + 0.089215024911993401, + 0.78486196105372907] + + self.gen.seed(61731L + (24903L<<32) + (614L<<64) + (42143L<<96)) + actual = self.randomlist(2000)[-10:] + for a, e in zip(actual, expected): + self.assertAlmostEqual(a,e,places=14) + + def test_strong_reference_implementation(self): + # Like test_referenceImplementation, but checks for exact bit-level + # equality. This should pass on any box where C double contains + # at least 53 bits of precision (the underlying algorithm suffers + # no rounding errors -- all results are exact). + from math import ldexp + + expected = [0x0eab3258d2231fL, + 0x1b89db315277a5L, + 0x1db622a5518016L, + 0x0b7f9af0d575bfL, + 0x029e4c4db82240L, + 0x04961892f5d673L, + 0x02b291598e4589L, + 0x11388382c15694L, + 0x02dad977c9e1feL, + 0x191d96d4d334c6L] + + self.gen.seed(61731L + (24903L<<32) + (614L<<64) + (42143L<<96)) + actual = self.randomlist(2000)[-10:] + for a, e in zip(actual, expected): + self.assertEqual(long(ldexp(a, 53)), e) + + def test_long_seed(self): + # This is most interesting to run in debug mode, just to make sure + # nothing blows up. Under the covers, a dynamically resized array + # is allocated, consuming space proportional to the number of bits + # in the seed. Unfortunately, that's a quadratic-time algorithm, + # so don't make this horribly big. + seed = (1L << (10000 * 8)) - 1 # about 10K bytes + self.gen.seed(seed) -# Ensure that the seed() method initializes all the hidden state. In -# particular, through 2.2.1 it failed to reset a piece of state used by -# (and only by) the .gauss() method. +class TestModule(unittest.TestCase): + def testMagicConstants(self): + self.assertAlmostEqual(random.NV_MAGICCONST, 1.71552776992141) + self.assertAlmostEqual(random.TWOPI, 6.28318530718) + self.assertAlmostEqual(random.LOG4, 1.38629436111989) + self.assertAlmostEqual(random.SG_MAGICCONST, 2.50407739677627) -for seed in 1, 12, 123, 1234, 12345, 123456, 654321: - for seeder in random.seed, random.whseed: - seeder(seed) - x1 = random.random() - y1 = random.gauss(0, 1) + def test__all__(self): + # tests validity but not completeness of the __all__ list + defined = dict.fromkeys(dir(random)) + for entry in random.__all__: + self.failUnless(entry in defined) - seeder(seed) - x2 = random.random() - y2 = random.gauss(0, 1) +def test_main(): + suite = unittest.TestSuite() + for testclass in (WichmannHill_TestBasicOps, + MersenneTwister_TestBasicOps, + TestModule): + suite.addTest(unittest.makeSuite(testclass)) + test_support.run_suite(suite) - test_support.vereq(x1, x2) - test_support.vereq(y1, y2) +if __name__ == "__main__": + test_main() |