summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2010-09-07 04:44:52 (GMT)
committerRaymond Hettinger <python@rcn.com>2010-09-07 04:44:52 (GMT)
commit0515661314c4e5b9235e07b2c46b8f456c7fadc3 (patch)
treeb8f06b2f3a9d5f9d99b91de02052f9bf38f6aff7
parent3051cc3a0d390ba153c07db9ce31a44700e332f2 (diff)
downloadcpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.zip
cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.tar.gz
cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.tar.bz2
Issues #7889, #9025 and #9379: Improvements to the random module.
-rw-r--r--Doc/library/random.rst7
-rw-r--r--Lib/random.py43
-rw-r--r--Lib/test/test_random.py10
-rw-r--r--Misc/NEWS11
4 files changed, 46 insertions, 25 deletions
diff --git a/Doc/library/random.rst b/Doc/library/random.rst
index 270518c..c690eea 100644
--- a/Doc/library/random.rst
+++ b/Doc/library/random.rst
@@ -98,6 +98,13 @@ Functions for integers:
equivalent to ``choice(range(start, stop, step))``, but doesn't actually build a
range object.
+ The positional argument pattern matches that of :func:`range`. Keyword arguments
+ should not be used because the function may use them in unexpected ways.
+
+ .. versionchanged:: 3.2
+ :meth:`randrange` is more sophisticated about producing equally distributed
+ values. Formerly it used a style like ``int(random()*n)`` which could produce
+ slightly uneven distributions.
.. function:: randint(a, b)
diff --git a/Lib/random.py b/Lib/random.py
index 8bfae1d..636d102 100644
--- a/Lib/random.py
+++ b/Lib/random.py
@@ -161,7 +161,7 @@ class Random(_random.Random):
## -------------------- integer methods -------------------
- def randrange(self, start, stop=None, step=1, int=int, maxwidth=1<<BPF):
+ def randrange(self, start, stop=None, step=1, int=int):
"""Choose a random item from range(start, stop[, step]).
This fixes the problem with randint() which includes the
@@ -177,9 +177,7 @@ class Random(_random.Random):
raise ValueError("non-integer arg 1 for randrange()")
if stop is None:
if istart > 0:
- if istart >= maxwidth:
- return self._randbelow(istart)
- return int(self.random() * istart)
+ return self._randbelow(istart)
raise ValueError("empty range for randrange()")
# stop argument supplied.
@@ -201,9 +199,7 @@ class Random(_random.Random):
# a long, but we're supposed to return an int (for backward
# compatibility).
- if width >= maxwidth:
- return int(istart + self._randbelow(width))
- return int(istart + int(self.random()*width))
+ return int(istart + self._randbelow(width))
if step == 1:
raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
@@ -221,9 +217,7 @@ class Random(_random.Random):
if n <= 0:
raise ValueError("empty range for randrange()")
- if n >= maxwidth:
- return istart + istep*self._randbelow(n)
- return istart + istep*int(self.random() * n)
+ return istart + istep*self._randbelow(n)
def randint(self, a, b):
"""Return random integer in range [a, b], including both end points.
@@ -231,7 +225,7 @@ class Random(_random.Random):
return self.randrange(a, b+1)
- def _randbelow(self, n, _log=_log, int=int, _maxwidth=1<<BPF,
+ def _randbelow(self, n, int=int, _maxwidth=1<<BPF, type=type,
_Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
"""Return a random int in the range [0,n)
@@ -248,8 +242,8 @@ class Random(_random.Random):
# has not been overridden or if a new getrandbits() was supplied.
# This assures that the two methods correspond.
if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
- k = int(1.00001 + _log(n-1, 2.0)) # 2**k > n-1 > 2**(k-2)
- r = getrandbits(k)
+ k = n.bit_length() # don't use (n-1) here because n can be 1
+ r = getrandbits(k) # 0 <= r < 2**k
while r >= n:
r = getrandbits(k)
return r
@@ -262,7 +256,7 @@ class Random(_random.Random):
def choice(self, seq):
"""Choose a random element from a non-empty sequence."""
- return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
+ return seq[self._randbelow(len(seq))] # raises IndexError if seq is empty
def shuffle(self, x, random=None, int=int):
"""x, random=random.random -> shuffle list x in place; return None.
@@ -272,11 +266,15 @@ class Random(_random.Random):
"""
if random is None:
- random = self.random
- for i in reversed(range(1, len(x))):
- # pick an element in x[:i+1] with which to exchange x[i]
- j = int(random() * (i+1))
- x[i], x[j] = x[j], x[i]
+ for i in reversed(range(1, len(x))):
+ # pick an element in x[:i+1] with which to exchange x[i]
+ j = self._randbelow(i+1)
+ x[i], x[j] = x[j], x[i]
+ else:
+ for i in reversed(range(1, len(x))):
+ # pick an element in x[:i+1] with which to exchange x[i]
+ j = int(random() * (i+1))
+ x[i], x[j] = x[j], x[i]
def sample(self, population, k):
"""Chooses k unique random elements from a population sequence or set.
@@ -314,7 +312,6 @@ class Random(_random.Random):
n = len(population)
if not 0 <= k <= n:
raise ValueError("Sample larger than population")
- _int = int
result = [None] * k
setsize = 21 # size of a small set minus size of an empty list
if k > 5:
@@ -323,16 +320,16 @@ class Random(_random.Random):
# An n-length list is smaller than a k-length set
pool = list(population)
for i in range(k): # invariant: non-selected at [0,n-i)
- j = _int(random() * (n-i))
+ j = self._randbelow(n-i)
result[i] = pool[j]
pool[j] = pool[n-i-1] # move non-selected item into vacancy
else:
selected = set()
selected_add = selected.add
for i in range(k):
- j = _int(random() * n)
+ j = self._randbelow(n)
while j in selected:
- j = _int(random() * n)
+ j = self._randbelow(n)
selected_add(j)
result[i] = population[j]
return result
diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py
index 78cd4d5..f5c0030 100644
--- a/Lib/test/test_random.py
+++ b/Lib/test/test_random.py
@@ -121,7 +121,15 @@ class TestBasicOps(unittest.TestCase):
f = open(support.findfile(file),"rb")
r = pickle.load(f)
f.close()
- self.assertEqual(r.randrange(1000), value)
+ self.assertEqual(int(r.random()*1000), value)
+
+ def test_bug_9025(self):
+ # Had problem with an uneven distribution in int(n*random())
+ # Verify the fix by checking that distributions fall within expectations.
+ n = 100000
+ randrange = self.gen.randrange
+ k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n))
+ self.assertTrue(0.30 < k/n < .37, (k/n))
class SystemRandom_TestBasicOps(TestBasicOps):
gen = random.SystemRandom()
diff --git a/Misc/NEWS b/Misc/NEWS
index a69064e..600c000 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -19,7 +19,16 @@ Library
across versions and which parts are subject to change.
* Update the seed() method to use all of the bits in a string
- instead of just the hash value.
+ instead of just the hash value. This makes better use of the
+ seed value and assures the seeding is platform independent.
+ Issue #7889.
+
+ * Improved the random()-->integer algorithm used in choice(),
+ shuffle(), sample(), randrange(), and randint(). Formerly, it
+ used int(n*random()) which has a slight bias whenever n is not
+ a power of two. Issue #9025.
+
+ * Improved documentation of arguments to randrange(). Issue #9379.
- collections.OrderedDict now supports a new method for repositioning
keys to either end.