diff options
author | Raymond Hettinger <python@rcn.com> | 2003-10-05 09:09:15 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2003-10-05 09:09:15 (GMT) |
commit | 2f726e9093381572b21edbfc42659ea89fbdf686 (patch) | |
tree | 9625e748344e1709fc69a8b98298efdd602b4cc2 | |
parent | 5c68ef04b7f0c0c1d342647a7db2d3f76637d3fa (diff) | |
download | cpython-2f726e9093381572b21edbfc42659ea89fbdf686.zip cpython-2f726e9093381572b21edbfc42659ea89fbdf686.tar.gz cpython-2f726e9093381572b21edbfc42659ea89fbdf686.tar.bz2 |
SF bug #812202: randint is always even
* Added C coded getrandbits(k) method that runs in linear time.
* Call the new method from randrange() for ranges >= 2**53.
* Adds a warning for generators not defining getrandbits() whenever they
have a call to randrange() with too large of a population.
-rw-r--r-- | Doc/lib/librandom.tex | 12 | ||||
-rw-r--r-- | Lib/random.py | 64 | ||||
-rw-r--r-- | Lib/test/test_random.py | 78 | ||||
-rw-r--r-- | Misc/NEWS | 8 | ||||
-rw-r--r-- | Modules/_randommodule.c | 44 |
5 files changed, 196 insertions, 10 deletions
diff --git a/Doc/lib/librandom.tex b/Doc/lib/librandom.tex index 5483ff2..2b686ec 100644 --- a/Doc/lib/librandom.tex +++ b/Doc/lib/librandom.tex @@ -41,6 +41,10 @@ Class \class{Random} can also be subclassed if you want to use a different basic generator of your own devising: in that case, override the \method{random()}, \method{seed()}, \method{getstate()}, \method{setstate()} and \method{jumpahead()} methods. +Optionally, a new generator can supply a \method{getrandombits()} +method --- this allows \method{randrange()} to produce selections +over an arbitrarily large range. +\versionadded[the \method{getrandombits()} method]{2.4} As an example of subclassing, the \module{random} module provides the \class{WichmannHill} class which implements an alternative generator @@ -92,6 +96,14 @@ Bookkeeping functions: separated by many steps.]{2.3} \end{funcdesc} +\begin{funcdesc}{getrandbits}{k} + Returns a python \class{long} int with \var{k} random bits. + This method is supplied with the MersenneTwister generator and some + other generators may also provide it as an optional part of the API. + When available, \method{getrandbits()} enables \method{randrange()} + to handle arbitrarily large ranges. + \versionadded{2.4} +\end{funcdesc} Functions for integers: diff --git a/Lib/random.py b/Lib/random.py index 2530c39..5916864 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -39,6 +39,8 @@ General notes on the underlying Mersenne Twister core generator: """ +from warnings import warn as _warn +from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType from math import log as _log, exp as _exp, pi as _pi, e as _e from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin from math import floor as _floor @@ -47,12 +49,14 @@ __all__ = ["Random","seed","random","uniform","randint","choice","sample", "randrange","shuffle","normalvariate","lognormvariate", "expovariate","vonmisesvariate","gammavariate", "gauss","betavariate","paretovariate","weibullvariate", - "getstate","setstate","jumpahead"] + "getstate","setstate","jumpahead", "WichmannHill", "getrandbits", + "Random"] NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0) TWOPI = 2.0*_pi LOG4 = _log(4.0) SG_MAGICCONST = 1.0 + _log(4.5) +BPF = 53 # Number of bits in a float # Translated by Guido van Rossum from C source provided by # Adrian Baddeley. Adapted by Raymond Hettinger for use with @@ -72,6 +76,8 @@ class Random(_random.Random): Class Random can also be subclassed if you want to use a different basic generator of your own devising: in that case, override the following methods: random(), seed(), getstate(), setstate() and jumpahead(). + Optionally, implement a getrandombits() method so that randrange() + can cover arbitrarily large ranges. """ @@ -131,12 +137,13 @@ class Random(_random.Random): ## -------------------- integer methods ------------------- - def randrange(self, start, stop=None, step=1, int=int, default=None): + def randrange(self, start, stop=None, step=1, int=int, default=None, + maxwidth=1L<<BPF): """Choose a random item from range(start, stop[, step]). This fixes the problem with randint() which includes the endpoint; in Python this is usually not what you want. - Do not supply the 'int' and 'default' arguments. + Do not supply the 'int', 'default', and 'maxwidth' arguments. """ # This code is a bit messy to make it fast for the @@ -146,6 +153,8 @@ class Random(_random.Random): raise ValueError, "non-integer arg 1 for randrange()" if stop is default: if istart > 0: + if istart >= maxwidth: + return self._randbelow(istart) return int(self.random() * istart) raise ValueError, "empty range for randrange()" @@ -153,36 +162,43 @@ class Random(_random.Random): istop = int(stop) if istop != stop: raise ValueError, "non-integer stop for randrange()" - if step == 1 and istart < istop: + width = istop - istart + if step == 1 and width > 0: # Note that - # int(istart + self.random()*(istop - istart)) + # int(istart + self.random()*width) # instead would be incorrect. For example, consider istart # = -2 and istop = 0. Then the guts would be in # -2.0 to 0.0 exclusive on both ends (ignoring that random() # might return 0.0), and because int() truncates toward 0, the # final result would be -1 or 0 (instead of -2 or -1). - # istart + int(self.random()*(istop - istart)) + # istart + int(self.random()*width) # would also be incorrect, for a subtler reason: the RHS # can return a long, and then randrange() would also return # a long, but we're supposed to return an int (for backward # compatibility). - return int(istart + int(self.random()*(istop - istart))) + + if width >= maxwidth: + return int(istart + self._randbelow(width)) + return int(istart + int(self.random()*width)) if step == 1: - raise ValueError, "empty range for randrange()" + raise ValueError, "empty range for randrange() (%d,%d, %d)" % (istart, istop, width) # Non-unit step argument supplied. istep = int(step) if istep != step: raise ValueError, "non-integer step for randrange()" if istep > 0: - n = (istop - istart + istep - 1) / istep + n = (width + istep - 1) / istep elif istep < 0: - n = (istop - istart + istep + 1) / istep + n = (width + istep + 1) / istep else: raise ValueError, "zero step for randrange()" if n <= 0: raise ValueError, "empty range for randrange()" + + if n >= maxwidth: + return istart + self._randbelow(n) return istart + istep*int(self.random() * n) def randint(self, a, b): @@ -191,6 +207,33 @@ class Random(_random.Random): return self.randrange(a, b+1) + def _randbelow(self, n, _log=_log, int=int, _maxwidth=1L<<BPF, + _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType): + """Return a random int in the range [0,n) + + Handles the case where n has more bits than returned + by a single call to the underlying generator. + """ + + try: + getrandbits = self.getrandbits + except AttributeError: + pass + else: + # Only call self.getrandbits if the original random() builtin method + # has not been overridden or if a new getrandbits() was supplied. + # This assures that the two methods correspond. + if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method: + k = int(1.00001 + _log(n-1, 2.0)) # 2**k > n-1 > 2**(k-2) + r = getrandbits(k) + while r >= n: + r = getrandbits(k) + return r + if n >= _maxwidth: + _warn("Underlying random() generator does not supply \n" + "enough bits to choose from a population range this large") + return int(self.random() * n) + ## -------------------- sequence methods ------------------- def choice(self, seq): @@ -757,6 +800,7 @@ weibullvariate = _inst.weibullvariate getstate = _inst.getstate setstate = _inst.setstate jumpahead = _inst.jumpahead +getrandbits = _inst.getrandbits if __name__ == '__main__': _test() diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index fbd4184..3796c3b 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -4,6 +4,7 @@ import unittest import random import time import pickle +import warnings from math import log, exp, sqrt, pi from sets import Set from test import test_support @@ -153,6 +154,13 @@ class WichmannHill_TestBasicOps(TestBasicOps): self.assertEqual(x1, x2) self.assertEqual(y1, y2) + def test_bigrand(self): + # Verify warnings are raised when randrange is too large for random() + oldfilters = warnings.filters[:] + warnings.filterwarnings("error", "Underlying random") + self.assertRaises(UserWarning, self.gen.randrange, 2**60) + warnings.filters[:] = oldfilters + class MersenneTwister_TestBasicOps(TestBasicOps): gen = random.Random() @@ -219,6 +227,76 @@ class MersenneTwister_TestBasicOps(TestBasicOps): seed = (1L << (10000 * 8)) - 1 # about 10K bytes self.gen.seed(seed) + def test_53_bits_per_float(self): + # This should pass whenever a C double has 53 bit precision. + span = 2 ** 53 + cum = 0 + for i in xrange(100): + cum |= int(self.gen.random() * span) + self.assertEqual(cum, span-1) + + def test_bigrand(self): + # The randrange routine should build-up the required number of bits + # in stages so that all bit positions are active. + span = 2 ** 500 + cum = 0 + for i in xrange(100): + r = self.gen.randrange(span) + self.assert_(0 <= r < span) + cum |= r + self.assertEqual(cum, span-1) + + def test_bigrand_ranges(self): + for i in [40,80, 160, 200, 211, 250, 375, 512, 550]: + start = self.gen.randrange(2 ** i) + stop = self.gen.randrange(2 ** (i-2)) + if stop <= start: + return + self.assert_(start <= self.gen.randrange(start, stop) < stop) + + def test_rangelimits(self): + for start, stop in [(-2,0), (-(2**60)-2,-(2**60)), (2**60,2**60+2)]: + self.assertEqual(Set(range(start,stop)), + Set([self.gen.randrange(start,stop) for i in xrange(100)])) + + def test_genrandbits(self): + # Verify cross-platform repeatability + self.gen.seed(1234567) + self.assertEqual(self.gen.getrandbits(100), + 97904845777343510404718956115L) + # Verify ranges + for k in xrange(1, 1000): + self.assert_(0 <= self.gen.getrandbits(k) < 2**k) + + # Verify all bits active + getbits = self.gen.getrandbits + for span in [1, 2, 3, 4, 31, 32, 32, 52, 53, 54, 119, 127, 128, 129]: + cum = 0 + for i in xrange(100): + cum |= getbits(span) + self.assertEqual(cum, 2**span-1) + + def test_randbelow_logic(self, _log=log, int=int): + # check bitcount transition points: 2**i and 2**(i+1)-1 + # show that: k = int(1.001 + _log(n, 2)) + # is equal to or one greater than the number of bits in n + for i in xrange(1, 1000): + n = 1L << i # check an exact power of two + numbits = i+1 + k = int(1.00001 + _log(n, 2)) + self.assertEqual(k, numbits) + self.assert_(n == 2**(k-1)) + + n += n - 1 # check 1 below the next power of two + k = int(1.00001 + _log(n, 2)) + self.assert_(k in [numbits, numbits+1]) + self.assert_(2**k > n > 2**(k-2)) + + n -= n >> 15 # check a little farther below the next power of two + k = int(1.00001 + _log(n, 2)) + self.assertEqual(k, numbits) # note the stronger assertion + self.assert_(2**k > n > 2**(k-1)) # note the stronger assertion + _gammacoeff = (0.9999999999995183, 676.5203681218835, -1259.139216722289, 771.3234287757674, -176.6150291498386, 12.50734324009056, -0.1385710331296526, 0.9934937113930748e-05, 0.1659470187408462e-06) @@ -84,6 +84,14 @@ Library seed. Modified to match Py2.2 behavior and use fractional seconds so that successive runs are more likely to produce different sequences. +- random.Random has a new method, getrandbits(k), which returns an int + with k random bits. This method is now an optional part of the API + for user defined generators. Any generator that defines genrandbits() + can now use randrange() for ranges with a length >= 2**53. Formerly, + randrange would return only even numbers for ranges that large (see + SF bug #812202). Generators that do not define genrandbits() now + issue a warning when randrange() is called with a range that large. + - itertools.izip() with no arguments now returns an empty iterator instead of raising a TypeError exception. diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index 4a87a8e..1189036 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -435,6 +435,47 @@ random_jumpahead(RandomObject *self, PyObject *n) } static PyObject * +random_getrandbits(RandomObject *self, PyObject *args) +{ + int k, i, bytes; + unsigned long r; + unsigned char *bytearray; + PyObject *result; + + if (!PyArg_ParseTuple(args, "i:getrandbits", &k)) + return NULL; + + if (k <= 0) { + PyErr_SetString(PyExc_ValueError, + "number of bits must be greater than zero"); + return NULL; + } + + bytes = ((k - 1) / 32 + 1) * 4; + bytearray = (unsigned char *)PyMem_Malloc(bytes); + if (bytearray == NULL) { + PyErr_NoMemory(); + return NULL; + } + + /* Fill-out whole words, byte-by-byte to avoid endianness issues */ + for (i=0 ; i<bytes ; i+=4, k-=32) { + r = genrand_int32(self); + if (k < 32) + r >>= (32 - k); + bytearray[i+0] = (unsigned char)r; + bytearray[i+1] = (unsigned char)(r >> 8); + bytearray[i+2] = (unsigned char)(r >> 16); + bytearray[i+3] = (unsigned char)(r >> 24); + } + + /* little endian order to match bytearray assignment order */ + result = _PyLong_FromByteArray(bytearray, bytes, 1, 0); + PyMem_Free(bytearray); + return result; +} + +static PyObject * random_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { RandomObject *self; @@ -464,6 +505,9 @@ static PyMethodDef random_methods[] = { {"jumpahead", (PyCFunction)random_jumpahead, METH_O, PyDoc_STR("jumpahead(int) -> None. Create new state from " "existing state and integer.")}, + {"getrandbits", (PyCFunction)random_getrandbits, METH_VARARGS, + PyDoc_STR("getrandbits(k) -> x. Generates a long int with " + "k random bits.")}, {NULL, NULL} /* sentinel */ }; |