summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2012-02-20 20:31:46 (GMT)
committerGeorg Brandl <georg@python.org>2012-02-20 20:31:46 (GMT)
commit09a7c72cad48f568e0781541167cf9ea6a3f0760 (patch)
treed925894bfc3662e33c03ff7b6b2c5e9e38749b73
parentfee358b0df547e9451cfb0b3d25980e6cc7177cc (diff)
parent2daf6ae2495c862adf8bc717bfe9964081ea0b10 (diff)
downloadcpython-09a7c72cad48f568e0781541167cf9ea6a3f0760.zip
cpython-09a7c72cad48f568e0781541167cf9ea6a3f0760.tar.gz
cpython-09a7c72cad48f568e0781541167cf9ea6a3f0760.tar.bz2
Merge from 3.1: Issue #13703: add a way to randomize the hash values of basic types (str, bytes, datetime)
in order to make algorithmic complexity attacks on (e.g.) web apps much more complicated. The environment variable PYTHONHASHSEED and the new command line flag -R control this behavior.
-rw-r--r--Doc/library/sys.rst4
-rw-r--r--Doc/reference/datamodel.rst2
-rw-r--r--Doc/using/cmdline.rst47
-rw-r--r--Include/object.h6
-rw-r--r--Include/pydebug.h1
-rw-r--r--Include/pythonrun.h2
-rw-r--r--Lib/json/__init__.py4
-rw-r--r--Lib/os.py17
-rw-r--r--Lib/test/mapping_tests.py2
-rwxr-xr-xLib/test/regrtest.py5
-rw-r--r--Lib/test/script_helper.py7
-rw-r--r--Lib/test/test_cmd_line.py16
-rw-r--r--Lib/test/test_descr.py12
-rw-r--r--Lib/test/test_gdb.py22
-rw-r--r--Lib/test/test_hash.py92
-rw-r--r--Lib/test/test_os.py36
-rw-r--r--Lib/test/test_set.py23
-rw-r--r--Lib/test/test_sys.py2
-rw-r--r--Lib/test/test_urllib.py4
-rwxr-xr-x[-rw-r--r--]Lib/test/test_urlparse.py3
-rw-r--r--Lib/tkinter/test/test_ttk/test_functions.py2
-rw-r--r--Makefile.pre.in1
-rw-r--r--Misc/NEWS5
-rw-r--r--Misc/python.man29
-rw-r--r--Modules/_datetimemodule.c4
-rw-r--r--Modules/main.c20
-rw-r--r--Modules/posixmodule.c124
-rw-r--r--Objects/bytesobject.c12
-rw-r--r--Objects/object.c2
-rw-r--r--Objects/unicodeobject.c12
-rw-r--r--PCbuild/pythoncore.vcproj4
-rw-r--r--Python/pythonrun.c8
-rw-r--r--Python/random.c302
-rw-r--r--Python/sysmodule.c6
34 files changed, 676 insertions, 162 deletions
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index d611acf..063e0a5 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -253,11 +253,15 @@ always available.
:const:`verbose` :option:`-v`
:const:`bytes_warning` :option:`-b`
:const:`quiet` :option:`-q`
+ :const:`hash_randomization` :option:`-R`
============================= =============================
.. versionchanged:: 3.2
Added ``quiet`` attribute for the new :option:`-q` flag.
+ .. versionadded:: 3.2.3
+ The ``hash_randomization`` attribute.
+
.. data:: float_info
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst
index d01b1f2..7dcd459 100644
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -1272,6 +1272,8 @@ Basic customization
inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__`
had been explicitly set to :const:`None`.
+ See also the :option:`-R` command-line option.
+
.. method:: object.__bool__(self)
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index c386fb4..d0b330d 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -24,7 +24,7 @@ Command line
When invoking Python, you may specify any of these options::
- python [-bBdEhiOsSuvVWx?] [-c command | -m module-name | script | - ] [args]
+ python [-bBdEhiORqsSuvVWx?] [-c command | -m module-name | script | - ] [args]
The most common use case is, of course, a simple invocation of a script::
@@ -227,6 +227,29 @@ Miscellaneous options
.. versionadded:: 3.2
+.. cmdoption:: -R
+
+ Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes
+ and datetime objects are "salted" with an unpredictable random value.
+ Although they remain constant within an individual Python process, they are
+ not predictable between repeated invocations of Python.
+
+ This is intended to provide protection against a denial-of-service caused by
+ carefully-chosen inputs that exploit the worst case performance of a dict
+ insertion, O(n^2) complexity. See
+ http://www.ocert.org/advisories/ocert-2011-003.html for details.
+
+ Changing hash values affects the order in which keys are retrieved from a
+ dict. Although Python has never made guarantees about this ordering (and it
+ typically varies between 32-bit and 64-bit builds), enough real-world code
+ implicitly relies on this non-guaranteed behavior that the randomization is
+ disabled by default.
+
+ See also :envvar:`PYTHONHASHSEED`.
+
+ .. versionadded:: 3.2.3
+
+
.. cmdoption:: -s
Don't add the :data:`user site-packages directory <site.USER_SITE>` to
@@ -350,6 +373,7 @@ Options you shouldn't use
.. _Jython: http://jython.org
+
.. _using-on-envvars:
Environment variables
@@ -458,6 +482,27 @@ These environment variables influence Python's behavior.
option.
+.. envvar:: PYTHONHASHSEED
+
+ If this variable is set to ``random``, the effect is the same as specifying
+ the :option:`-R` option: a random value is used to seed the hashes of str,
+ bytes and datetime objects.
+
+ If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed
+ seed for generating the hash() of the types covered by the hash
+ randomization.
+
+ Its purpose is to allow repeatable hashing, such as for selftests for the
+ interpreter itself, or to allow a cluster of python processes to share hash
+ values.
+
+ The integer must be a decimal number in the range [0,4294967295]. Specifying
+ the value 0 will lead to the same hash values as when hash randomization is
+ disabled.
+
+ .. versionadded:: 3.2.3
+
+
.. envvar:: PYTHONIOENCODING
If this is set before running the interpreter, it overrides the encoding used
diff --git a/Include/object.h b/Include/object.h
index 2528841..a54c400 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -517,6 +517,12 @@ PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
#endif
+typedef struct {
+ Py_hash_t prefix;
+ Py_hash_t suffix;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
+
/* Helper for passing objects to printf and the like */
#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))
diff --git a/Include/pydebug.h b/Include/pydebug.h
index 70c88f6..e23cbdc 100644
--- a/Include/pydebug.h
+++ b/Include/pydebug.h
@@ -20,6 +20,7 @@ PyAPI_DATA(int) Py_DivisionWarningFlag;
PyAPI_DATA(int) Py_DontWriteBytecodeFlag;
PyAPI_DATA(int) Py_NoUserSiteDirectory;
PyAPI_DATA(int) Py_UnbufferedStdioFlag;
+PyAPI_DATA(int) Py_HashRandomizationFlag;
/* this is a wrapper around getenv() that pays attention to
Py_IgnoreEnvironmentFlag. It should be used for getting variables like
diff --git a/Include/pythonrun.h b/Include/pythonrun.h
index 00b4972..5054932 100644
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -248,6 +248,8 @@ typedef void (*PyOS_sighandler_t)(int);
PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
+/* Random */
+PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
#ifdef __cplusplus
}
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 6d88931..ba2bc1d 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -31,7 +31,9 @@ Encoding basic Python object hierarchies::
Compact encoding::
>>> import json
- >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':'))
+ >>> from collections import OrderedDict
+ >>> mydict = OrderedDict([('4', 5), ('6', 7)])
+ >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
'[1,2,3,{"4":5,"6":7}]'
Pretty printing::
diff --git a/Lib/os.py b/Lib/os.py
index a894ee0..5862383 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -761,23 +761,6 @@ try:
except NameError: # statvfs_result may not exist
pass
-if not _exists("urandom"):
- def urandom(n):
- """urandom(n) -> str
-
- Return a string of n random bytes suitable for cryptographic use.
-
- """
- try:
- _urandomfd = open("/dev/urandom", O_RDONLY)
- except (OSError, IOError):
- raise NotImplementedError("/dev/urandom (or equivalent) not found")
- bs = b""
- while len(bs) < n:
- bs += read(_urandomfd, n - len(bs))
- close(_urandomfd)
- return bs
-
# Supply os.popen()
def popen(cmd, mode="r", buffering=-1):
if not isinstance(cmd, str):
diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py
index d2b7a59..bc12c77 100644
--- a/Lib/test/mapping_tests.py
+++ b/Lib/test/mapping_tests.py
@@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase):
def _reference(self):
"""Return a dictionary of values which are invariant by storage
in the object under test."""
- return {1:2, "key1":"value1", "key2":(1,2,3)}
+ return {"1": "2", "key1":"value1", "key2":(1,2,3)}
def _empty_mapping(self):
"""Return an empty mapping object"""
return self.type2test()
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
index 135a90e..26ba982 100755
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -496,6 +496,11 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
except ValueError:
print("Couldn't find starting test (%s), using all tests" % start)
if randomize:
+ hashseed = os.getenv('PYTHONHASHSEED')
+ if not hashseed:
+ os.environ['PYTHONHASHSEED'] = str(random_seed)
+ os.execv(sys.executable, [sys.executable] + sys.argv)
+ return
random.seed(random_seed)
print("Using random seed", random_seed)
random.shuffle(selected)
diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py
index 371c33d..ba446cd 100644
--- a/Lib/test/script_helper.py
+++ b/Lib/test/script_helper.py
@@ -3,7 +3,6 @@
import sys
import os
-import re
import os.path
import tempfile
import subprocess
@@ -20,11 +19,15 @@ def _assert_python(expected_success, *args, **env_vars):
cmd_line = [sys.executable]
if not env_vars:
cmd_line.append('-E')
- cmd_line.extend(args)
# Need to preserve the original environment, for in-place testing of
# shared library builds.
env = os.environ.copy()
+ # But a special flag that can be set to override -- in this case, the
+ # caller is responsible to pass the full environment.
+ if env_vars.pop('__cleanenv', None):
+ env = {}
env.update(env_vars)
+ cmd_line.extend(args)
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
env=env)
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 2fca25e..1a21281 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -330,6 +330,22 @@ class CmdLineTest(unittest.TestCase):
def test_no_std_streams(self):
self._test_no_stdio(['stdin', 'stdout', 'stderr'])
+ def test_hash_randomization(self):
+ # Verify that -R enables hash randomization:
+ self.verify_valid_flag('-R')
+ hashes = []
+ for i in range(2):
+ code = 'print(hash("spam"))'
+ rc, out, err = assert_python_ok('-R', '-c', code)
+ self.assertEqual(rc, 0)
+ hashes.append(out)
+ self.assertNotEqual(hashes[0], hashes[1])
+
+ # Verify that sys.flags contains hash_randomization
+ code = 'import sys; print("random is", sys.flags.hash_randomization)'
+ rc, out, err = assert_python_ok('-R', '-c', code)
+ self.assertEqual(rc, 0)
+ self.assertIn(b'random is 1', out)
def test_main():
test.support.run_unittest(CmdLineTest)
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
index b214996..77cadc0 100644
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -4474,8 +4474,18 @@ class DictProxyTests(unittest.TestCase):
def test_repr(self):
# Testing dict_proxy.__repr__
+ def sorted_dict_repr(repr_):
+ # Given the repr of a dict, sort the keys
+ assert repr_.startswith('{')
+ assert repr_.endswith('}')
+ kvs = repr_[1:-1].split(', ')
+ return '{' + ', '.join(sorted(kvs)) + '}'
dict_ = {k: v for k, v in self.C.__dict__.items()}
- self.assertEqual(repr(self.C.__dict__), 'dict_proxy({!r})'.format(dict_))
+ repr_ = repr(self.C.__dict__)
+ self.assert_(repr_.startswith('dict_proxy('))
+ self.assert_(repr_.endswith(')'))
+ self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]),
+ sorted_dict_repr('{!r}'.format(dict_)))
class PTypesLongInitTest(unittest.TestCase):
diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py
index 651aaec..aea7c0c 100644
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -52,13 +52,18 @@ class DebuggerTests(unittest.TestCase):
"""Test that the debugger can debug Python."""
- def run_gdb(self, *args):
+ def run_gdb(self, *args, **env_vars):
"""Runs gdb with the command line given by *args.
Returns its stdout, stderr
"""
+ if env_vars:
+ env = os.environ.copy()
+ env.update(env_vars)
+ else:
+ env = None
out, err = subprocess.Popen(
- args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env,
).communicate()
return out.decode('utf-8', 'replace'), err.decode('utf-8', 'replace')
@@ -118,7 +123,7 @@ class DebuggerTests(unittest.TestCase):
# print ' '.join(args)
# Use "args" to invoke gdb, capturing stdout, stderr:
- out, err = self.run_gdb(*args)
+ out, err = self.run_gdb(*args, PYTHONHASHSEED='0')
# Ignore some noise on stderr due to the pending breakpoint:
err = err.replace('Function "%s" not defined.\n' % breakpoint, '')
@@ -207,7 +212,8 @@ class PrettyPrintTests(DebuggerTests):
'Verify the pretty-printing of dictionaries'
self.assertGdbRepr({})
self.assertGdbRepr({'foo': 'bar'})
- self.assertGdbRepr({'foo': 'bar', 'douglas':42})
+ self.assertGdbRepr({'foo': 'bar', 'douglas': 42},
+ "{'foo': 'bar', 'douglas': 42}")
def test_lists(self):
'Verify the pretty-printing of lists'
@@ -269,8 +275,8 @@ class PrettyPrintTests(DebuggerTests):
def test_sets(self):
'Verify the pretty-printing of sets'
self.assertGdbRepr(set())
- self.assertGdbRepr(set(['a', 'b']))
- self.assertGdbRepr(set([4, 5, 6]))
+ self.assertGdbRepr(set(['a', 'b']), "{'a', 'b'}")
+ self.assertGdbRepr(set([4, 5, 6]), "{4, 5, 6}")
# Ensure that we handle sets containing the "dummy" key value,
# which happens on deletion:
@@ -282,8 +288,8 @@ id(s)''')
def test_frozensets(self):
'Verify the pretty-printing of frozensets'
self.assertGdbRepr(frozenset())
- self.assertGdbRepr(frozenset(['a', 'b']))
- self.assertGdbRepr(frozenset([4, 5, 6]))
+ self.assertGdbRepr(frozenset(['a', 'b']), "frozenset({'a', 'b'})")
+ self.assertGdbRepr(frozenset([4, 5, 6]), "frozenset({4, 5, 6})")
def test_exceptions(self):
# Test a RuntimeError
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
index fea1025..8253610 100644
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -3,10 +3,16 @@
#
# Also test that hash implementations are inherited as expected
+import datetime
+import os
+import sys
import unittest
from test import support
+from test.script_helper import assert_python_ok
from collections import Hashable
+IS_64BIT = sys.maxsize > 2**32
+
class HashEqualityTestCase(unittest.TestCase):
@@ -118,10 +124,92 @@ class HashBuiltinsTestCase(unittest.TestCase):
for obj in self.hashes_to_check:
self.assertEqual(hash(obj), _default_hash(obj))
+class HashRandomizationTests(unittest.TestCase):
+
+ # Each subclass should define a field "repr_", containing the repr() of
+ # an object to be tested
+
+ def get_hash_command(self, repr_):
+ return 'print(hash(%s))' % repr_
+
+ def get_hash(self, repr_, seed=None):
+ env = os.environ.copy()
+ env['__cleanenv'] = True # signal to assert_python not to do a copy
+ # of os.environ on its own
+ if seed is not None:
+ env['PYTHONHASHSEED'] = str(seed)
+ else:
+ env.pop('PYTHONHASHSEED', None)
+ out = assert_python_ok(
+ '-c', self.get_hash_command(repr_),
+ **env)
+ stdout = out[1].strip()
+ return int(stdout)
+
+ def test_randomized_hash(self):
+ # two runs should return different hashes
+ run1 = self.get_hash(self.repr_, seed='random')
+ run2 = self.get_hash(self.repr_, seed='random')
+ self.assertNotEqual(run1, run2)
+
+class StringlikeHashRandomizationTests(HashRandomizationTests):
+ def test_null_hash(self):
+ # PYTHONHASHSEED=0 disables the randomized hash
+ if IS_64BIT:
+ known_hash_of_obj = 1453079729188098211
+ else:
+ known_hash_of_obj = -1600925533
+
+ # Randomization is disabled by default:
+ self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
+
+ # It can also be disabled by setting the seed to 0:
+ self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+
+ def test_fixed_hash(self):
+ # test a fixed seed for the randomized hash
+ # Note that all types share the same values:
+ if IS_64BIT:
+ h = -4410911502303878509
+ else:
+ h = -206076799
+ self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+
+class StrHashRandomizationTests(StringlikeHashRandomizationTests):
+ repr_ = repr('abc')
+
+ def test_empty_string(self):
+ self.assertEqual(hash(""), 0)
+
+class BytesHashRandomizationTests(StringlikeHashRandomizationTests):
+ repr_ = repr(b'abc')
+
+ def test_empty_string(self):
+ self.assertEqual(hash(b""), 0)
+
+class DatetimeTests(HashRandomizationTests):
+ def get_hash_command(self, repr_):
+ return 'import datetime; print(hash(%s))' % repr_
+
+class DatetimeDateTests(DatetimeTests):
+ repr_ = repr(datetime.date(1066, 10, 14))
+
+class DatetimeDatetimeTests(DatetimeTests):
+ repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
+
+class DatetimeTimeTests(DatetimeTests):
+ repr_ = repr(datetime.time(0))
+
+
def test_main():
support.run_unittest(HashEqualityTestCase,
- HashInheritanceTestCase,
- HashBuiltinsTestCase)
+ HashInheritanceTestCase,
+ HashBuiltinsTestCase,
+ StrHashRandomizationTests,
+ BytesHashRandomizationTests,
+ DatetimeDateTests,
+ DatetimeDatetimeTests,
+ DatetimeTimeTests)
if __name__ == "__main__":
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index e573bd2..8bc8ba9 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -15,6 +15,7 @@ from test import support
import contextlib
import mmap
import uuid
+from test.script_helper import assert_python_ok
# Detect whether we're on a Linux system that uses the (now outdated
# and unmaintained) linuxthreads threading library. There's an issue
@@ -611,14 +612,33 @@ class DevNullTests(unittest.TestCase):
self.assertEqual(f.read(), b'')
class URandomTests(unittest.TestCase):
- def test_urandom(self):
- try:
- self.assertEqual(len(os.urandom(1)), 1)
- self.assertEqual(len(os.urandom(10)), 10)
- self.assertEqual(len(os.urandom(100)), 100)
- self.assertEqual(len(os.urandom(1000)), 1000)
- except NotImplementedError:
- pass
+ def test_urandom_length(self):
+ self.assertEqual(len(os.urandom(0)), 0)
+ self.assertEqual(len(os.urandom(1)), 1)
+ self.assertEqual(len(os.urandom(10)), 10)
+ self.assertEqual(len(os.urandom(100)), 100)
+ self.assertEqual(len(os.urandom(1000)), 1000)
+
+ def test_urandom_value(self):
+ data1 = os.urandom(16)
+ data2 = os.urandom(16)
+ self.assertNotEqual(data1, data2)
+
+ def get_urandom_subprocess(self, count):
+ code = '\n'.join((
+ 'import os, sys',
+ 'data = os.urandom(%s)' % count,
+ 'sys.stdout.buffer.write(data)',
+ 'sys.stdout.buffer.flush()'))
+ out = assert_python_ok('-c', code)
+ stdout = out[1]
+ self.assertEqual(len(stdout), 16)
+ return stdout
+
+ def test_urandom_subprocess(self):
+ data1 = self.get_urandom_subprocess(16)
+ data2 = self.get_urandom_subprocess(16)
+ self.assertNotEqual(data1, data2)
@contextlib.contextmanager
def _execvpe_mockup(defpath=None):
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
index 07bfe06..6642440 100644
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -733,6 +733,17 @@ class TestBasicOps(unittest.TestCase):
if self.repr is not None:
self.assertEqual(repr(self.set), self.repr)
+ def check_repr_against_values(self):
+ text = repr(self.set)
+ self.assertTrue(text.startswith('{'))
+ self.assertTrue(text.endswith('}'))
+
+ result = text[1:-1].split(', ')
+ result.sort()
+ sorted_repr_values = [repr(value) for value in self.values]
+ sorted_repr_values.sort()
+ self.assertEqual(result, sorted_repr_values)
+
def test_print(self):
try:
fo = open(support.TESTFN, "w")
@@ -891,7 +902,9 @@ class TestBasicOpsString(TestBasicOps):
self.set = set(self.values)
self.dup = set(self.values)
self.length = 3
- self.repr = "{'a', 'c', 'b'}"
+
+ def test_repr(self):
+ self.check_repr_against_values()
#------------------------------------------------------------------------------
@@ -902,7 +915,9 @@ class TestBasicOpsBytes(TestBasicOps):
self.set = set(self.values)
self.dup = set(self.values)
self.length = 3
- self.repr = "{b'a', b'c', b'b'}"
+
+ def test_repr(self):
+ self.check_repr_against_values()
#------------------------------------------------------------------------------
@@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps):
self.set = set(self.values)
self.dup = set(self.values)
self.length = 4
- self.repr = "{'a', b'a', 'b', b'b'}"
def tearDown(self):
self._warning_filters.__exit__(None, None, None)
+ def test_repr(self):
+ self.check_repr_against_values()
+
#==============================================================================
def baditer():
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 5d3404f..3268b1a 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -503,7 +503,7 @@ class SysModuleTest(unittest.TestCase):
attrs = ("debug", "division_warning",
"inspect", "interactive", "optimize", "dont_write_bytecode",
"no_user_site", "no_site", "ignore_environment", "verbose",
- "bytes_warning", "quiet")
+ "bytes_warning", "quiet", "hash_randomization")
for attr in attrs:
self.assertTrue(hasattr(sys.flags, attr), attr)
self.assertEqual(type(getattr(sys.flags, attr)), int, attr)
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index f6b48cb..c6f6f61 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -13,6 +13,7 @@ import sys
import tempfile
from base64 import b64encode
+import collections
def hexescape(char):
"""Escape char as RFC 2396 specifies"""
@@ -953,8 +954,9 @@ class urlencode_Tests(unittest.TestCase):
self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
self.assertEqual("a=None&a=a",
urllib.parse.urlencode({"a": [None, "a"]}, True))
+ data = collections.OrderedDict([("a", 1), ("b", 1)])
self.assertEqual("a=a&a=b",
- urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
+ urllib.parse.urlencode({"a": data}, True))
def test_urlencode_encoding(self):
# ASCII encoding. Expect %3F with errors="replace'
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index a6e7ee8..ada0ca8 100644..100755
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -769,7 +769,8 @@ class UrlParseTestCase(unittest.TestCase):
# Other tests incidentally urlencode things; test non-covered cases:
# Sequence and object values.
result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
- self.assertEqual(result, 'a=1&a=2&b=3&b=4&b=5')
+ # we cannot rely on ordering here
+ assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
class Trivial:
def __str__(self):
diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py
index df593cd..2303e4c 100644
--- a/Lib/tkinter/test/test_ttk/test_functions.py
+++ b/Lib/tkinter/test/test_ttk/test_functions.py
@@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.TestCase):
('a', 'b', 'c')), ("test {a b} c", ()))
# state spec and options
self.assertEqual(ttk._format_elemcreate('image', False, 'test',
- ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y")))
+ ('a', 'b'), a='x'), ("test a b", ("-a", "x")))
# format returned values as a tcl script
# state spec with multiple states and an option with a multivalue
self.assertEqual(ttk._format_elemcreate('image', True, 'test',
diff --git a/Makefile.pre.in b/Makefile.pre.in
index c49d777..7ffc3ec 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -322,6 +322,7 @@ PYTHON_OBJS= \
Python/pystate.o \
Python/pythonrun.o \
Python/pytime.o \
+ Python/random.o \
Python/structmember.o \
Python/symtable.o \
Python/sysmodule.o \
diff --git a/Misc/NEWS b/Misc/NEWS
index b9cd764..63281cf 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,11 @@ What's New in Python 3.2.3?
Core and Builtins
-----------------
+- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED
+ environment variables, to provide an opt-in way to protect against denial of
+ service attacks due to hash collisions within the dict and set types. Patch
+ by David Malcolm, based on work by Victor Stinner.
+
- Issue #13020: Fix a reference leak when allocating a structsequence object
fails. Patch by Suman Saha.
diff --git a/Misc/python.man b/Misc/python.man
index 3dca604..fc3566f 100644
--- a/Misc/python.man
+++ b/Misc/python.man
@@ -37,6 +37,9 @@ python \- an interpreted, interactive, object-oriented programming language
.B \-OO
]
[
+.B \-R
+]
+[
.B -Q
.I argument
]
@@ -152,6 +155,18 @@ Discard docstrings in addition to the \fB-O\fP optimizations.
Do not print the version and copyright messages. These messages are
also suppressed in non-interactive mode.
.TP
+.B \-R
+Turn on "hash randomization", so that the hash() values of str, bytes and
+datetime objects are "salted" with an unpredictable pseudo-random value.
+Although they remain constant within an individual Python process, they are
+not predictable between repeated invocations of Python.
+.IP
+This is intended to provide protection against a denial of service
+caused by carefully-chosen inputs that exploit the worst case performance
+of a dict insertion, O(n^2) complexity. See
+http://www.ocert.org/advisories/ocert-2011-003.html
+for details.
+.TP
.BI "\-Q " argument
Division control; see PEP 238. The argument must be one of "old" (the
default, int/int and long/long return an int or long), "new" (new
@@ -413,6 +428,20 @@ specifying \fB\-v\fP multiple times.
.IP PYTHONWARNINGS
If this is set to a comma-separated string it is equivalent to
specifying the \fB\-W\fP option for each separate value.
+.IP PYTHONHASHSEED
+If this variable is set to "random", the effect is the same as specifying
+the \fB-R\fP option: a random value is used to seed the hashes of str,
+bytes and datetime objects.
+
+If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for
+generating the hash() of the types covered by the hash randomization. Its
+purpose is to allow repeatable hashing, such as for selftests for the
+interpreter itself, or to allow a cluster of python processes to share hash
+values.
+
+The integer must be a decimal number in the range [0,4294967295]. Specifying
+the value 0 will lead to the same hash values as when hash randomization is
+disabled.
.SH AUTHOR
The Python Software Foundation: http://www.python.org/psf
.SH INTERNET RESOURCES
diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c
index 6ee5317..85c5c4d 100644
--- a/Modules/_datetimemodule.c
+++ b/Modules/_datetimemodule.c
@@ -2785,10 +2785,12 @@ generic_hash(unsigned char *data, int len)
register Py_hash_t x;
p = (unsigned char *) data;
- x = *p << 7;
+ x = _Py_HashSecret.prefix;
+ x ^= *p << 7;
while (--len >= 0)
x = (1000003*x) ^ *p++;
x ^= len;
+ x ^= _Py_HashSecret.suffix;
if (x == -1)
x = -2;
diff --git a/Modules/main.c b/Modules/main.c
index 5a985a5..ed84aa0 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -46,7 +46,7 @@ static wchar_t **orig_argv;
static int orig_argc;
/* command line options */
-#define BASE_OPTS L"bBc:dEhiJm:OqsStuvVW:xX:?"
+#define BASE_OPTS L"bBc:dEhiJm:OqRsStuvVW:xX:?"
#define PROGRAM_OPTS BASE_OPTS
@@ -72,6 +72,9 @@ static char *usage_2 = "\
-O : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\
-OO : remove doc-strings in addition to the -O optimizations\n\
-q : don't print version and copyright messages on interactive startup\n\
+-R : use a pseudo-random salt to make hash() values of various types be\n\
+ unpredictable between separate invocations of the interpreter, as\n\
+ a defence against denial-of-service attacks\n\
-s : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\
-S : don't imply 'import site' on initialization\n\
";
@@ -99,8 +102,14 @@ static char *usage_5 =
"PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n"
" The default module search path uses %s.\n"
"PYTHONCASEOK : ignore case in 'import' statements (Windows).\n"
-"PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n"
-;
+"PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
+";
+static char *usage_6 = "\
+PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\
+ as specifying the :option:`-R` option: a random value is used to seed the\n\
+ hashes of str, bytes and datetime objects. It can also be set to an integer\n\
+ in the range [0,4294967295] to get hash values with a predictable seed.\n\
+";
static int
usage(int exitcode, wchar_t* program)
@@ -116,6 +125,7 @@ usage(int exitcode, wchar_t* program)
fputs(usage_3, f);
fprintf(f, usage_4, DELIM);
fprintf(f, usage_5, DELIM, PYTHONHOMEHELP);
+ fputs(usage_6, f);
}
#if defined(__VMS)
if (exitcode == 0) {
@@ -429,6 +439,10 @@ Py_Main(int argc, wchar_t **argv)
Py_QuietFlag++;
break;
+ case 'R':
+ Py_HashRandomizationFlag++;
+ break;
+
/* This space reserved for other options */
default:
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index 0afab3c..9f57673 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -7614,82 +7614,6 @@ posix_getloadavg(PyObject *self, PyObject *noargs)
}
#endif
-#ifdef MS_WINDOWS
-
-PyDoc_STRVAR(win32_urandom__doc__,
-"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
- LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
- DWORD dwFlags );
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
- BYTE *pbBuffer );
-
-static CRYPTGENRANDOM pCryptGenRandom = NULL;
-/* This handle is never explicitly released. Instead, the operating
- system will release it when the process terminates. */
-static HCRYPTPROV hCryptProv = 0;
-
-static PyObject*
-win32_urandom(PyObject *self, PyObject *args)
-{
- int howMany;
- PyObject* result;
-
- /* Read arguments */
- if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
- return NULL;
- if (howMany < 0)
- return PyErr_Format(PyExc_ValueError,
- "negative argument not allowed");
-
- if (hCryptProv == 0) {
- HINSTANCE hAdvAPI32 = NULL;
- CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
-
- /* Obtain handle to the DLL containing CryptoAPI
- This should not fail */
- hAdvAPI32 = GetModuleHandle("advapi32.dll");
- if(hAdvAPI32 == NULL)
- return win32_error("GetModuleHandle", NULL);
-
- /* Obtain pointers to the CryptoAPI functions
- This will fail on some early versions of Win95 */
- pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
- hAdvAPI32,
- "CryptAcquireContextA");
- if (pCryptAcquireContext == NULL)
- return PyErr_Format(PyExc_NotImplementedError,
- "CryptAcquireContextA not found");
-
- pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
- hAdvAPI32, "CryptGenRandom");
- if (pCryptGenRandom == NULL)
- return PyErr_Format(PyExc_NotImplementedError,
- "CryptGenRandom not found");
-
- /* Acquire context */
- if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
- PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
- return win32_error("CryptAcquireContext", NULL);
- }
-
- /* Allocate bytes */
- result = PyBytes_FromStringAndSize(NULL, howMany);
- if (result != NULL) {
- /* Get random data */
- memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */
- if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)
- PyBytes_AS_STRING(result))) {
- Py_DECREF(result);
- return win32_error("CryptGenRandom", NULL);
- }
- }
- return result;
-}
-#endif
-
PyDoc_STRVAR(device_encoding__doc__,
"device_encoding(fd) -> str\n\n\
Return a string describing the encoding of the device\n\
@@ -7727,41 +7651,35 @@ device_encoding(PyObject *self, PyObject *args)
return Py_None;
}
-#ifdef __VMS
-/* Use openssl random routine */
-#include <openssl/rand.h>
-PyDoc_STRVAR(vms_urandom__doc__,
+PyDoc_STRVAR(posix_urandom__doc__,
"urandom(n) -> str\n\n\
Return n random bytes suitable for cryptographic use.");
-static PyObject*
-vms_urandom(PyObject *self, PyObject *args)
+static PyObject *
+posix_urandom(PyObject *self, PyObject *args)
{
- int howMany;
- PyObject* result;
+ Py_ssize_t size;
+ PyObject *result;
+ int ret;
- /* Read arguments */
- if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
+ /* Read arguments */
+ if (!PyArg_ParseTuple(args, "n:urandom", &size))
return NULL;
- if (howMany < 0)
+ if (size < 0)
return PyErr_Format(PyExc_ValueError,
"negative argument not allowed");
+ result = PyBytes_FromStringAndSize(NULL, size);
+ if (result == NULL)
+ return NULL;
- /* Allocate bytes */
- result = PyBytes_FromStringAndSize(NULL, howMany);
- if (result != NULL) {
- /* Get random data */
- if (RAND_pseudo_bytes((unsigned char*)
- PyBytes_AS_STRING(result),
- howMany) < 0) {
- Py_DECREF(result);
- return PyErr_Format(PyExc_ValueError,
- "RAND_pseudo_bytes");
- }
+ ret = _PyOS_URandom(PyBytes_AS_STRING(result),
+ PyBytes_GET_SIZE(result));
+ if (ret == -1) {
+ Py_DECREF(result);
+ return NULL;
}
return result;
}
-#endif
#ifdef HAVE_SETRESUID
PyDoc_STRVAR(posix_setresuid__doc__,
@@ -8137,12 +8055,7 @@ static PyMethodDef posix_methods[] = {
#ifdef HAVE_GETLOADAVG
{"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__},
#endif
- #ifdef MS_WINDOWS
- {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
- {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
+ {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__},
#ifdef HAVE_SETRESUID
{"setresuid", posix_setresuid, METH_VARARGS, posix_setresuid__doc__},
#endif
@@ -8155,7 +8068,6 @@ static PyMethodDef posix_methods[] = {
#ifdef HAVE_GETRESGID
{"getresgid", posix_getresgid, METH_NOARGS, posix_getresgid__doc__},
#endif
-
{NULL, NULL} /* Sentinel */
};
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index a0d4cbd..d63fabc 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -878,11 +878,21 @@ bytes_hash(PyBytesObject *a)
if (a->ob_shash != -1)
return a->ob_shash;
len = Py_SIZE(a);
+ /*
+ We make the hash of the empty string be 0, rather than using
+ (prefix ^ suffix), since this slightly obfuscates the hash secret
+ */
+ if (len == 0) {
+ a->ob_shash = 0;
+ return 0;
+ }
p = (unsigned char *) a->ob_sval;
- x = *p << 7;
+ x = _Py_HashSecret.prefix;
+ x ^= *p << 7;
while (--len >= 0)
x = (_PyHASH_MULTIPLIER*x) ^ *p++;
x ^= Py_SIZE(a);
+ x ^= _Py_HashSecret.suffix;
if (x == -1)
x = -2;
a->ob_shash = x;
diff --git a/Objects/object.c b/Objects/object.c
index 694e7e7..84ec2f3 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -754,6 +754,8 @@ PyObject_HashNotImplemented(PyObject *v)
return -1;
}
+_Py_HashSecret_t _Py_HashSecret;
+
Py_hash_t
PyObject_Hash(PyObject *v)
{
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 70856f5..467f95c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7676,11 +7676,21 @@ unicode_hash(PyUnicodeObject *self)
if (self->hash != -1)
return self->hash;
len = Py_SIZE(self);
+ /*
+ We make the hash of the empty string be 0, rather than using
+ (prefix ^ suffix), since this slightly obfuscates the hash secret
+ */
+ if (len == 0) {
+ self->hash = 0;
+ return 0;
+ }
p = self->str;
- x = *p << 7;
+ x = _Py_HashSecret.prefix;
+ x ^= *p << 7;
while (--len >= 0)
x = (_PyHASH_MULTIPLIER*x) ^ *p++;
x ^= Py_SIZE(self);
+ x ^= _Py_HashSecret.suffix;
if (x == -1)
x = -2;
self->hash = x;
diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj
index f83f6b1..5e61308 100644
--- a/PCbuild/pythoncore.vcproj
+++ b/PCbuild/pythoncore.vcproj
@@ -1882,6 +1882,10 @@
RelativePath="..\Python\pythonrun.c"
>
</File>
+ <File
+ RelativePath="..\Python\random.c"
+ >
+ </File>
<File
RelativePath="..\Python\structmember.c"
>
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index ec69bcb..718362d 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -70,6 +70,7 @@ extern void _PyUnicode_Init(void);
extern void _PyUnicode_Fini(void);
extern int _PyLong_Init(void);
extern void PyLong_Fini(void);
+extern void _PyRandom_Init(void);
#ifdef WITH_THREAD
extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
@@ -89,6 +90,7 @@ int Py_FrozenFlag; /* Needed by getpath.c */
int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */
int Py_NoUserSiteDirectory = 0; /* for -s and site.py */
int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */
+int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */
PyThreadState *_Py_Finalizing = NULL;
@@ -207,6 +209,12 @@ Py_InitializeEx(int install_sigs)
Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p);
if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0')
Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
+ /* The variable is only tested for existence here; _PyRandom_Init will
+ check its value further. */
+ if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
+ Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p);
+
+ _PyRandom_Init();
interp = PyInterpreterState_New();
if (interp == NULL)
diff --git a/Python/random.c b/Python/random.c
new file mode 100644
index 0000000..327166e
--- /dev/null
+++ b/Python/random.c
@@ -0,0 +1,302 @@
+#include "Python.h"
+#ifdef MS_WINDOWS
+#include <windows.h>
+#else
+#include <fcntl.h>
+#endif
+
+static int random_initialized = 0;
+
+#ifdef MS_WINDOWS
+typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
+ LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
+ DWORD dwFlags );
+typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
+ BYTE *pbBuffer );
+
+static CRYPTGENRANDOM pCryptGenRandom = NULL;
+/* This handle is never explicitly released. Instead, the operating
+ system will release it when the process terminates. */
+static HCRYPTPROV hCryptProv = 0;
+
+static int
+win32_urandom_init(int raise)
+{
+ HINSTANCE hAdvAPI32 = NULL;
+ CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
+
+ /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
+ hAdvAPI32 = GetModuleHandle("advapi32.dll");
+ if(hAdvAPI32 == NULL)
+ goto error;
+
+ /* Obtain pointers to the CryptoAPI functions. This will fail on some early
+ versions of Win95. */
+ pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
+ hAdvAPI32, "CryptAcquireContextA");
+ if (pCryptAcquireContext == NULL)
+ goto error;
+
+ pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
+ "CryptGenRandom");
+ if (pCryptGenRandom == NULL)
+ goto error;
+
+ /* Acquire context */
+ if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
+ PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
+ goto error;
+
+ return 0;
+
+error:
+ if (raise)
+ PyErr_SetFromWindowsErr(0);
+ else
+ Py_FatalError("Failed to initialize Windows random API (CryptoGen)");
+ return -1;
+}
+
+/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
+ API. Return 0 on success, or -1 on error. */
+static int
+win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+ Py_ssize_t chunk;
+
+ if (hCryptProv == 0)
+ {
+ if (win32_urandom_init(raise) == -1)
+ return -1;
+ }
+
+ while (size > 0)
+ {
+ chunk = size > INT_MAX ? INT_MAX : size;
+ if (!pCryptGenRandom(hCryptProv, chunk, buffer))
+ {
+ /* CryptGenRandom() failed */
+ if (raise)
+ PyErr_SetFromWindowsErr(0);
+ else
+ Py_FatalError("Failed to initialized the randomized hash "
+ "secret using CryptoGen)");
+ return -1;
+ }
+ buffer += chunk;
+ size -= chunk;
+ }
+ return 0;
+}
+#endif /* MS_WINDOWS */
+
+
+#ifdef __VMS
+/* Use openssl random routine */
+#include <openssl/rand.h>
+static int
+vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+ if (RAND_pseudo_bytes(buffer, size) < 0) {
+ if (raise) {
+ PyErr_Format(PyExc_ValueError,
+ "RAND_pseudo_bytes");
+ } else {
+ Py_FatalError("Failed to initialize the randomized hash "
+ "secret using RAND_pseudo_bytes");
+ }
+ return -1;
+ }
+ return 0;
+}
+#endif /* __VMS */
+
+
+#if !defined(MS_WINDOWS) && !defined(__VMS)
+
+/* Read size bytes from /dev/urandom into buffer.
+ Call Py_FatalError() on error. */
+static void
+dev_urandom_noraise(char *buffer, Py_ssize_t size)
+{
+ int fd;
+ Py_ssize_t n;
+
+ assert (0 < size);
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0)
+ Py_FatalError("Failed to open /dev/urandom");
+
+ while (0 < size)
+ {
+ do {
+ n = read(fd, buffer, (size_t)size);
+ } while (n < 0 && errno == EINTR);
+ if (n <= 0)
+ {
+ /* stop on error or if read(size) returned 0 */
+ Py_FatalError("Failed to read bytes from /dev/urandom");
+ break;
+ }
+ buffer += n;
+ size -= (Py_ssize_t)n;
+ }
+ close(fd);
+}
+
+/* Read size bytes from /dev/urandom into buffer.
+ Return 0 on success, raise an exception and return -1 on error. */
+static int
+dev_urandom_python(char *buffer, Py_ssize_t size)
+{
+ int fd;
+ Py_ssize_t n;
+
+ if (size <= 0)
+ return 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ fd = open("/dev/urandom", O_RDONLY);
+ Py_END_ALLOW_THREADS
+ if (fd < 0)
+ {
+ PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom");
+ return -1;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ do {
+ do {
+ n = read(fd, buffer, (size_t)size);
+ } while (n < 0 && errno == EINTR);
+ if (n <= 0)
+ break;
+ buffer += n;
+ size -= (Py_ssize_t)n;
+ } while (0 < size);
+ Py_END_ALLOW_THREADS
+
+ if (n <= 0)
+ {
+ /* stop on error or if read(size) returned 0 */
+ if (n < 0)
+ PyErr_SetFromErrno(PyExc_OSError);
+ else
+ PyErr_Format(PyExc_RuntimeError,
+ "Failed to read %zi bytes from /dev/urandom",
+ size);
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */
+
+/* Fill buffer with pseudo-random bytes generated by a linear congruent
+ generator (LCG):
+
+ x(n+1) = (x(n) * 214013 + 2531011) % 2^32
+
+ Use bits 23..16 of x(n) to generate a byte. */
+static void
+lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
+{
+ size_t index;
+ unsigned int x;
+
+ x = x0;
+ for (index=0; index < size; index++) {
+ x *= 214013;
+ x += 2531011;
+ /* modulo 2 ^ (8 * sizeof(int)) */
+ buffer[index] = (x >> 16) & 0xff;
+ }
+}
+
+/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic
+ use, from the operating random number generator (RNG).
+
+ Return 0 on success, raise an exception and return -1 on error. */
+int
+_PyOS_URandom(void *buffer, Py_ssize_t size)
+{
+ if (size < 0) {
+ PyErr_Format(PyExc_ValueError,
+ "negative argument not allowed");
+ return -1;
+ }
+ if (size == 0)
+ return 0;
+
+#ifdef MS_WINDOWS
+ return win32_urandom((unsigned char *)buffer, size, 1);
+#else
+# ifdef __VMS
+ return vms_urandom((unsigned char *)buffer, size, 1);
+# else
+ return dev_urandom_python((char*)buffer, size);
+# endif
+#endif
+}
+
+void
+_PyRandom_Init(void)
+{
+ char *env;
+ void *secret = &_Py_HashSecret;
+ Py_ssize_t secret_size = sizeof(_Py_HashSecret);
+
+ if (random_initialized)
+ return;
+ random_initialized = 1;
+
+ /*
+ By default, hash randomization is disabled, and only
+ enabled if PYTHONHASHSEED is set to non-empty or if
+ "-R" is provided at the command line:
+ */
+ if (!Py_HashRandomizationFlag) {
+ /* Disable the randomized hash: */
+ memset(secret, 0, secret_size);
+ return;
+ }
+
+ /*
+ Hash randomization is enabled. Generate a per-process secret,
+ using PYTHONHASHSEED if provided.
+ */
+
+ env = Py_GETENV("PYTHONHASHSEED");
+ if (env && *env != '\0' & strcmp(env, "random") != 0) {
+ char *endptr = env;
+ unsigned long seed;
+ seed = strtoul(env, &endptr, 10);
+ if (*endptr != '\0'
+ || seed > 4294967295UL
+ || (errno == ERANGE && seed == ULONG_MAX))
+ {
+ Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer "
+ "in range [0; 4294967295]");
+ }
+ if (seed == 0) {
+ /* disable the randomized hash */
+ memset(secret, 0, secret_size);
+ }
+ else {
+ lcg_urandom(seed, (unsigned char*)secret, secret_size);
+ }
+ }
+ else {
+#ifdef MS_WINDOWS
+ (void)win32_urandom((unsigned char *)secret, secret_size, 0);
+#else /* #ifdef MS_WINDOWS */
+# ifdef __VMS
+ vms_urandom((unsigned char *)secret, secret_size, 0);
+# else
+ dev_urandom_noraise((char*)secret, secret_size);
+# endif
+#endif
+ }
+}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 8a659c5..c4f27d0 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1368,6 +1368,7 @@ static PyStructSequence_Field flags_fields[] = {
/* {"skip_first", "-x"}, */
{"bytes_warning", "-b"},
{"quiet", "-q"},
+ {"hash_randomization", "-R"},
{0}
};
@@ -1376,9 +1377,9 @@ static PyStructSequence_Desc flags_desc = {
flags__doc__, /* doc */
flags_fields, /* fields */
#ifdef RISCOS
- 13
+ 14
#else
- 12
+ 13
#endif
};
@@ -1412,6 +1413,7 @@ make_flags(void)
/* SetFlag(skipfirstline); */
SetFlag(Py_BytesWarningFlag);
SetFlag(Py_QuietFlag);
+ SetFlag(Py_HashRandomizationFlag);
#undef SetFlag
if (PyErr_Occurred()) {