diff options
author | Raymond Hettinger <python@rcn.com> | 2002-12-29 23:03:38 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2002-12-29 23:03:38 (GMT) |
commit | 40f621709286a7a0f7e6f260c0fd020d0fac0de0 (patch) | |
tree | bd602fee432a253a0f454fc696d14734f18dd915 /Lib/random.py | |
parent | 5e65ce671c3d3113035dd7783b79d395c9d71b3d (diff) | |
download | cpython-40f621709286a7a0f7e6f260c0fd020d0fac0de0.zip cpython-40f621709286a7a0f7e6f260c0fd020d0fac0de0.tar.gz cpython-40f621709286a7a0f7e6f260c0fd020d0fac0de0.tar.bz2 |
SF patch 658251: Install a C implementation of the Mersenne Twister as the
core generator for random.py.
Diffstat (limited to 'Lib/random.py')
-rw-r--r-- | Lib/random.py | 403 |
1 files changed, 178 insertions, 225 deletions
diff --git a/Lib/random.py b/Lib/random.py index 057571a..8462061 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -18,61 +18,26 @@ negative exponential gamma beta + pareto + Weibull distributions on the circle (angles 0 to 2pi) --------------------------------------------- circular uniform von Mises -Translated from anonymously contributed C/C++ source. - -Multi-threading note: the random number generator used here is not thread- -safe; it is possible that two calls return the same random value. However, -you can instantiate a different instance of Random() in each thread to get -generators that don't share state, then use .setstate() and .jumpahead() to -move the generators to disjoint segments of the full period. For example, - -def create_generators(num, delta, firstseed=None): - ""\"Return list of num distinct generators. - Each generator has its own unique segment of delta elements from - Random.random()'s full period. - Seed the first generator with optional arg firstseed (default is - None, to seed from current time). - ""\" - - from random import Random - g = Random(firstseed) - result = [g] - for i in range(num - 1): - laststate = g.getstate() - g = Random() - g.setstate(laststate) - g.jumpahead(delta) - result.append(g) - return result - -gens = create_generators(10, 1000000) - -That creates 10 distinct generators, which can be passed out to 10 distinct -threads. The generators don't share state so can be called safely in -parallel. So long as no thread calls its g.random() more than a million -times (the second argument to create_generators), the sequences seen by -each thread will not overlap. - -The period of the underlying Wichmann-Hill generator is 6,953,607,871,644, -and that limits how far this technique can be pushed. - -Just for fun, note that since we know the period, .jumpahead() can also be -used to "move backward in time": - ->>> g = Random(42) # arbitrary ->>> g.random() -0.25420336316883324 ->>> g.jumpahead(6953607871644L - 1) # move *back* one ->>> g.random() -0.25420336316883324 +General notes on the underlying Mersenne Twister core generator: + +* The period is 2**19937-1. +* It is one of the most extensively tested generators in existence +* Without a direct way to compute N steps forward, the + semantics of jumpahead(n) are weakened to simply jump + to another distant state and rely on the large period + to avoid overlapping sequences. +* The random() method is implemented in C, executes in + a single Python step, and is, therefore, threadsafe. + """ -# XXX The docstring sucks. from math import log as _log, exp as _exp, pi as _pi, e as _e from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin @@ -82,32 +47,20 @@ __all__ = ["Random","seed","random","uniform","randint","choice","sample", "randrange","shuffle","normalvariate","lognormvariate", "cunifvariate","expovariate","vonmisesvariate","gammavariate", "stdgamma","gauss","betavariate","paretovariate","weibullvariate", - "getstate","setstate","jumpahead","whseed"] - -def _verify(name, computed, expected): - if abs(computed - expected) > 1e-7: - raise ValueError( - "computed value for %s deviates too much " - "(computed %g, expected %g)" % (name, computed, expected)) + "getstate","setstate","jumpahead"] NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0) -_verify('NV_MAGICCONST', NV_MAGICCONST, 1.71552776992141) - TWOPI = 2.0*_pi -_verify('TWOPI', TWOPI, 6.28318530718) - LOG4 = _log(4.0) -_verify('LOG4', LOG4, 1.38629436111989) - SG_MAGICCONST = 1.0 + _log(4.5) -_verify('SG_MAGICCONST', SG_MAGICCONST, 2.50407739677627) - -del _verify # Translated by Guido van Rossum from C source provided by -# Adrian Baddeley. +# Adrian Baddeley. Adapted by Raymond Hettinger for use with +# the Mersenne Twister core generator. -class Random: +from _random import Random as CoreGenerator + +class Random(CoreGenerator): """Random number generator base class used by bound module functions. Used to instantiate instances of Random to get generators that don't @@ -122,7 +75,7 @@ class Random: """ - VERSION = 1 # used by getstate/setstate + VERSION = 2 # used by getstate/setstate def __init__(self, x=None): """Initialize an instance. @@ -131,12 +84,7 @@ class Random: """ self.seed(x) - -## -------------------- core generator ------------------- - - # Specific to Wichmann-Hill generator. Subclasses wishing to use a - # different core generator should override the seed(), random(), - # getstate(), setstate() and jumpahead() methods. + self.gauss_next = None def seed(self, a=None): """Initialize internal state from hashable object. @@ -144,141 +92,26 @@ class Random: None or no argument seeds from current time. If a is not None or an int or long, hash(a) is used instead. - - If a is an int or long, a is used directly. Distinct values between - 0 and 27814431486575L inclusive are guaranteed to yield distinct - internal states (this guarantee is specific to the default - Wichmann-Hill generator). """ - if a is None: - # Initialize from current time - import time - a = long(time.time() * 256) - - if type(a) not in (type(3), type(3L)): - a = hash(a) - - a, x = divmod(a, 30268) - a, y = divmod(a, 30306) - a, z = divmod(a, 30322) - self._seed = int(x)+1, int(y)+1, int(z)+1 - + CoreGenerator.seed(self, a) self.gauss_next = None - def random(self): - """Get the next random number in the range [0.0, 1.0).""" - - # Wichman-Hill random number generator. - # - # Wichmann, B. A. & Hill, I. D. (1982) - # Algorithm AS 183: - # An efficient and portable pseudo-random number generator - # Applied Statistics 31 (1982) 188-190 - # - # see also: - # Correction to Algorithm AS 183 - # Applied Statistics 33 (1984) 123 - # - # McLeod, A. I. (1985) - # A remark on Algorithm AS 183 - # Applied Statistics 34 (1985),198-200 - - # This part is thread-unsafe: - # BEGIN CRITICAL SECTION - x, y, z = self._seed - x = (171 * x) % 30269 - y = (172 * y) % 30307 - z = (170 * z) % 30323 - self._seed = x, y, z - # END CRITICAL SECTION - - # Note: on a platform using IEEE-754 double arithmetic, this can - # never return 0.0 (asserted by Tim; proof too long for a comment). - return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 - def getstate(self): """Return internal state; can be passed to setstate() later.""" - return self.VERSION, self._seed, self.gauss_next + return self.VERSION, CoreGenerator.getstate(self), self.gauss_next def setstate(self, state): """Restore internal state from object returned by getstate().""" version = state[0] - if version == 1: - version, self._seed, self.gauss_next = state + if version == 2: + version, internalstate, self.gauss_next = state + CoreGenerator.setstate(self, internalstate) else: raise ValueError("state with version %s passed to " "Random.setstate() of version %s" % (version, self.VERSION)) - def jumpahead(self, n): - """Act as if n calls to random() were made, but quickly. - - n is an int, greater than or equal to 0. - - Example use: If you have 2 threads and know that each will - consume no more than a million random numbers, create two Random - objects r1 and r2, then do - r2.setstate(r1.getstate()) - r2.jumpahead(1000000) - Then r1 and r2 will use guaranteed-disjoint segments of the full - period. - """ - - if not n >= 0: - raise ValueError("n must be >= 0") - x, y, z = self._seed - x = int(x * pow(171, n, 30269)) % 30269 - y = int(y * pow(172, n, 30307)) % 30307 - z = int(z * pow(170, n, 30323)) % 30323 - self._seed = x, y, z - - def __whseed(self, x=0, y=0, z=0): - """Set the Wichmann-Hill seed from (x, y, z). - - These must be integers in the range [0, 256). - """ - - if not type(x) == type(y) == type(z) == int: - raise TypeError('seeds must be integers') - if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): - raise ValueError('seeds must be in range(0, 256)') - if 0 == x == y == z: - # Initialize from current time - import time - t = long(time.time() * 256) - t = int((t&0xffffff) ^ (t>>24)) - t, x = divmod(t, 256) - t, y = divmod(t, 256) - t, z = divmod(t, 256) - # Zero is a poor seed, so substitute 1 - self._seed = (x or 1, y or 1, z or 1) - - self.gauss_next = None - - def whseed(self, a=None): - """Seed from hashable object's hash code. - - None or no argument seeds from current time. It is not guaranteed - that objects with distinct hash codes lead to distinct internal - states. - - This is obsolete, provided for compatibility with the seed routine - used prior to Python 2.1. Use the .seed() method instead. - """ - - if a is None: - self.__whseed() - return - a = hash(a) - a, x = divmod(a, 256) - a, y = divmod(a, 256) - a, z = divmod(a, 256) - x = (x + a) % 256 or 1 - y = (y + a) % 256 or 1 - z = (z + a) % 256 or 1 - self.__whseed(x, y, z) - ## ---- Methods below this point do not need to be overridden when ## ---- subclassing for the purpose of using a different core generator. @@ -744,6 +577,153 @@ class Random: u = self.random() return alpha * pow(-_log(u), 1.0/beta) +## -------------------- Wichmann-Hill ------------------- + +class WichmannHill(Random): + + VERSION = 1 # used by getstate/setstate + + def seed(self, a=None): + """Initialize internal state from hashable object. + + None or no argument seeds from current time. + + If a is not None or an int or long, hash(a) is used instead. + + If a is an int or long, a is used directly. Distinct values between + 0 and 27814431486575L inclusive are guaranteed to yield distinct + internal states (this guarantee is specific to the default + Wichmann-Hill generator). + """ + + if a is None: + # Initialize from current time + import time + a = long(time.time() * 256) + + if not isinstance(a, (int, long)): + a = hash(a) + + a, x = divmod(a, 30268) + a, y = divmod(a, 30306) + a, z = divmod(a, 30322) + self._seed = int(x)+1, int(y)+1, int(z)+1 + + self.gauss_next = None + + def random(self): + """Get the next random number in the range [0.0, 1.0).""" + + # Wichman-Hill random number generator. + # + # Wichmann, B. A. & Hill, I. D. (1982) + # Algorithm AS 183: + # An efficient and portable pseudo-random number generator + # Applied Statistics 31 (1982) 188-190 + # + # see also: + # Correction to Algorithm AS 183 + # Applied Statistics 33 (1984) 123 + # + # McLeod, A. I. (1985) + # A remark on Algorithm AS 183 + # Applied Statistics 34 (1985),198-200 + + # This part is thread-unsafe: + # BEGIN CRITICAL SECTION + x, y, z = self._seed + x = (171 * x) % 30269 + y = (172 * y) % 30307 + z = (170 * z) % 30323 + self._seed = x, y, z + # END CRITICAL SECTION + + # Note: on a platform using IEEE-754 double arithmetic, this can + # never return 0.0 (asserted by Tim; proof too long for a comment). + return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 + + def getstate(self): + """Return internal state; can be passed to setstate() later.""" + return self.VERSION, self._seed, self.gauss_next + + def setstate(self, state): + """Restore internal state from object returned by getstate().""" + version = state[0] + if version == 1: + version, self._seed, self.gauss_next = state + else: + raise ValueError("state with version %s passed to " + "Random.setstate() of version %s" % + (version, self.VERSION)) + + def jumpahead(self, n): + """Act as if n calls to random() were made, but quickly. + + n is an int, greater than or equal to 0. + + Example use: If you have 2 threads and know that each will + consume no more than a million random numbers, create two Random + objects r1 and r2, then do + r2.setstate(r1.getstate()) + r2.jumpahead(1000000) + Then r1 and r2 will use guaranteed-disjoint segments of the full + period. + """ + + if not n >= 0: + raise ValueError("n must be >= 0") + x, y, z = self._seed + x = int(x * pow(171, n, 30269)) % 30269 + y = int(y * pow(172, n, 30307)) % 30307 + z = int(z * pow(170, n, 30323)) % 30323 + self._seed = x, y, z + + def __whseed(self, x=0, y=0, z=0): + """Set the Wichmann-Hill seed from (x, y, z). + + These must be integers in the range [0, 256). + """ + + if not type(x) == type(y) == type(z) == int: + raise TypeError('seeds must be integers') + if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): + raise ValueError('seeds must be in range(0, 256)') + if 0 == x == y == z: + # Initialize from current time + import time + t = long(time.time() * 256) + t = int((t&0xffffff) ^ (t>>24)) + t, x = divmod(t, 256) + t, y = divmod(t, 256) + t, z = divmod(t, 256) + # Zero is a poor seed, so substitute 1 + self._seed = (x or 1, y or 1, z or 1) + + self.gauss_next = None + + def whseed(self, a=None): + """Seed from hashable object's hash code. + + None or no argument seeds from current time. It is not guaranteed + that objects with distinct hash codes lead to distinct internal + states. + + This is obsolete, provided for compatibility with the seed routine + used prior to Python 2.1. Use the .seed() method instead. + """ + + if a is None: + self.__whseed() + return + a = hash(a) + a, x = divmod(a, 256) + a, y = divmod(a, 256) + a, z = divmod(a, 256) + x = (x + a) % 256 or 1 + y = (y + a) % 256 or 1 + z = (z + a) % 256 or 1 + self.__whseed(x, y, z) + ## -------------------- test program -------------------- def _test_generator(n, funccall): @@ -768,25 +748,11 @@ def _test_generator(n, funccall): print 'avg %g, stddev %g, min %g, max %g' % \ (avg, stddev, smallest, largest) -def _test_sample(n): - # For the entire allowable range of 0 <= k <= n, validate that - # the sample is of the correct length and contains only unique items - population = xrange(n) - for k in xrange(n+1): - s = sample(population, k) - uniq = dict.fromkeys(s) - assert len(uniq) == len(s) == k - assert None not in uniq - def _sample_generator(n, k): # Return a fixed element from the sample. Validates random ordering. return sample(xrange(n), k)[k//2] def _test(N=2000): - print 'TWOPI =', TWOPI - print 'LOG4 =', LOG4 - print 'NV_MAGICCONST =', NV_MAGICCONST - print 'SG_MAGICCONST =', SG_MAGICCONST _test_generator(N, 'random()') _test_generator(N, 'normalvariate(0.0, 1.0)') _test_generator(N, 'lognormvariate(0.0, 1.0)') @@ -808,25 +774,13 @@ def _test(N=2000): _test_generator(N, 'weibullvariate(1.0, 1.0)') _test_generator(N, '_sample_generator(50, 5)') # expected s.d.: 14.4 _test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4 - _test_sample(500) - - # Test jumpahead. - s = getstate() - jumpahead(N) - r1 = random() - # now do it the slow way - setstate(s) - for i in range(N): - random() - r2 = random() - if r1 != r2: - raise ValueError("jumpahead test failed " + `(N, r1, r2)`) # Create one instance, seeded from current time, and export its methods -# as module-level functions. The functions are not threadsafe, and state -# is shared across all uses (both in the user's code and in the Python -# libraries), but that's fine for most programs and is easier for the -# casual user than making them instantiate their own Random() instance. +# as module-level functions. The functions share state across all uses +#(both in the user's code and in the Python libraries), but that's fine +# for most programs and is easier for the casual user than making them +# instantiate their own Random() instance. + _inst = Random() seed = _inst.seed random = _inst.random @@ -850,7 +804,6 @@ weibullvariate = _inst.weibullvariate getstate = _inst.getstate setstate = _inst.setstate jumpahead = _inst.jumpahead -whseed = _inst.whseed if __name__ == '__main__': _test() |