Issues #7889, #9025 and #9379: Improvements to the random module.

author: Raymond Hettinger <python@rcn.com> 2010-09-07 04:44:52 (GMT)
committer: Raymond Hettinger <python@rcn.com> 2010-09-07 04:44:52 (GMT)
commit: 0515661314c4e5b9235e07b2c46b8f456c7fadc3 (patch)
tree: b8f06b2f3a9d5f9d99b91de02052f9bf38f6aff7
parent: 3051cc3a0d390ba153c07db9ce31a44700e332f2 (diff)
download: cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.zip
cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.tar.gz
cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.tar.bz2
4 files changed, 46 insertions, 25 deletions
diff --git a/Doc/library/random.rst b/Doc/library/random.rst
index 270518c..c690eea 100644
--- a/Doc/library/random.rst
+++ b/Doc/library/random.rst
@@ -98,6 +98,13 @@ Functions for integers:
    equivalent to ``choice(range(start, stop, step))``, but doesn't actually build a
    range object.
 
+   The positional argument pattern matches that of :func:`range`.  Keyword arguments
+   should not be used because the function may use them in unexpected ways.
+
+   .. versionchanged:: 3.2
+      :meth:`randrange` is more sophisticated about producing equally distributed
+      values.  Formerly it used a style like ``int(random()*n)`` which could produce
+      slightly uneven distributions.
 
 .. function:: randint(a, b)
 
diff --git a/Lib/random.py b/Lib/random.py
index 8bfae1d..636d102 100644
--- a/Lib/random.py
+++ b/Lib/random.py
@@ -161,7 +161,7 @@ class Random(_random.Random):
 
 ## -------------------- integer methods  -------------------
 
-    def randrange(self, start, stop=None, step=1, int=int, maxwidth=1<<BPF):
+    def randrange(self, start, stop=None, step=1, int=int):
         """Choose a random item from range(start, stop[, step]).
 
         This fixes the problem with randint() which includes the
@@ -177,9 +177,7 @@ class Random(_random.Random):
             raise ValueError("non-integer arg 1 for randrange()")
         if stop is None:
             if istart > 0:
-                if istart >= maxwidth:
-                    return self._randbelow(istart)
-                return int(self.random() * istart)
+                return self._randbelow(istart)
             raise ValueError("empty range for randrange()")
 
         # stop argument supplied.
@@ -201,9 +199,7 @@ class Random(_random.Random):
             # a long, but we're supposed to return an int (for backward
             # compatibility).
 
-            if width >= maxwidth:
-                return int(istart + self._randbelow(width))
-            return int(istart + int(self.random()*width))
+            return int(istart + self._randbelow(width))
         if step == 1:
             raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
 
@@ -221,9 +217,7 @@ class Random(_random.Random):
         if n <= 0:
             raise ValueError("empty range for randrange()")
 
-        if n >= maxwidth:
-            return istart + istep*self._randbelow(n)
-        return istart + istep*int(self.random() * n)
+        return istart + istep*self._randbelow(n)
 
     def randint(self, a, b):
         """Return random integer in range [a, b], including both end points.
@@ -231,7 +225,7 @@ class Random(_random.Random):
 
         return self.randrange(a, b+1)
 
-    def _randbelow(self, n, _log=_log, int=int, _maxwidth=1<<BPF,
+    def _randbelow(self, n, int=int, _maxwidth=1<<BPF, type=type,
                    _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
         """Return a random int in the range [0,n)
 
@@ -248,8 +242,8 @@ class Random(_random.Random):
             # has not been overridden or if a new getrandbits() was supplied.
             # This assures that the two methods correspond.
             if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
-                k = int(1.00001 + _log(n-1, 2.0))   # 2**k > n-1 > 2**(k-2)
-                r = getrandbits(k)
+                k = n.bit_length()  # don't use (n-1) here because n can be 1
+                r = getrandbits(k)  # 0 <= r < 2**k
                 while r >= n:
                     r = getrandbits(k)
                 return r
@@ -262,7 +256,7 @@ class Random(_random.Random):
 
     def choice(self, seq):
         """Choose a random element from a non-empty sequence."""
-        return seq[int(self.random() * len(seq))]  # raises IndexError if seq is empty
+        return seq[self._randbelow(len(seq))]   # raises IndexError if seq is empty
 
     def shuffle(self, x, random=None, int=int):
         """x, random=random.random -> shuffle list x in place; return None.
@@ -272,11 +266,15 @@ class Random(_random.Random):
         """
 
         if random is None:
-            random = self.random
-        for i in reversed(range(1, len(x))):
-            # pick an element in x[:i+1] with which to exchange x[i]
-            j = int(random() * (i+1))
-            x[i], x[j] = x[j], x[i]
+            for i in reversed(range(1, len(x))):
+                # pick an element in x[:i+1] with which to exchange x[i]
+                j = self._randbelow(i+1)
+                x[i], x[j] = x[j], x[i]
+        else:
+            for i in reversed(range(1, len(x))):
+                # pick an element in x[:i+1] with which to exchange x[i]
+                j = int(random() * (i+1))
+                x[i], x[j] = x[j], x[i]
 
     def sample(self, population, k):
         """Chooses k unique random elements from a population sequence or set.
@@ -314,7 +312,6 @@ class Random(_random.Random):
         n = len(population)
         if not 0 <= k <= n:
             raise ValueError("Sample larger than population")
-        _int = int
         result = [None] * k
         setsize = 21        # size of a small set minus size of an empty list
         if k > 5:
@@ -323,16 +320,16 @@ class Random(_random.Random):
             # An n-length list is smaller than a k-length set
             pool = list(population)
             for i in range(k):         # invariant:  non-selected at [0,n-i)
-                j = _int(random() * (n-i))
+                j = self._randbelow(n-i)
                 result[i] = pool[j]
                 pool[j] = pool[n-i-1]   # move non-selected item into vacancy
         else:
             selected = set()
             selected_add = selected.add
             for i in range(k):
-                j = _int(random() * n)
+                j = self._randbelow(n)
                 while j in selected:
-                    j = _int(random() * n)
+                    j = self._randbelow(n)
                 selected_add(j)
                 result[i] = population[j]
         return result
diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py
index 78cd4d5..f5c0030 100644
--- a/Lib/test/test_random.py
+++ b/Lib/test/test_random.py
@@ -121,7 +121,15 @@ class TestBasicOps(unittest.TestCase):
             f = open(support.findfile(file),"rb")
             r = pickle.load(f)
             f.close()
-            self.assertEqual(r.randrange(1000), value)
+            self.assertEqual(int(r.random()*1000), value)
+
+    def test_bug_9025(self):
+        # Had problem with an uneven distribution in int(n*random())
+        # Verify the fix by checking that distributions fall within expectations.
+        n = 100000
+        randrange = self.gen.randrange
+        k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n))
+        self.assertTrue(0.30 < k/n < .37, (k/n))
 
 class SystemRandom_TestBasicOps(TestBasicOps):
     gen = random.SystemRandom()
diff --git a/Misc/NEWS b/Misc/NEWS
index a69064e..600c000 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -19,7 +19,16 @@ Library
     across versions and which parts are subject to change.
 
   * Update the seed() method to use all of the bits in a string
-    instead of just the hash value.
+    instead of just the hash value.  This makes better use of the
+    seed value and assures the seeding is platform independent.
+    Issue #7889.
+
+  * Improved the random()-->integer algorithm used in choice(),
+    shuffle(), sample(), randrange(), and randint().  Formerly, it
+    used int(n*random()) which has a slight bias whenever n is not
+    a power of two.  Issue #9025.
+
+  * Improved documentation of arguments to randrange().  Issue #9379.
 
 - collections.OrderedDict now supports a new method for repositioning
   keys to either end.
author	Raymond Hettinger <python@rcn.com>	2010-09-07 04:44:52 (GMT)
committer	Raymond Hettinger <python@rcn.com>	2010-09-07 04:44:52 (GMT)
commit	0515661314c4e5b9235e07b2c46b8f456c7fadc3 (patch)
tree	b8f06b2f3a9d5f9d99b91de02052f9bf38f6aff7
parent	3051cc3a0d390ba153c07db9ce31a44700e332f2 (diff)
download	cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.zip cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.tar.gz cpython-0515661314c4e5b9235e07b2c46b8f456c7fadc3.tar.bz2