From 2daf6ae2495c862adf8bc717bfe9964081ea0b10 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 19:54:16 +0100 Subject: Issue #13703: add a way to randomize the hash values of basic types (str, bytes, datetime) in order to make algorithmic complexity attacks on (e.g.) web apps much more complicated. The environment variable PYTHONHASHSEED and the new command line flag -R control this behavior. --- Doc/library/sys.rst | 4 + Doc/reference/datamodel.rst | 2 + Doc/using/cmdline.rst | 48 ++++- Include/object.h | 6 + Include/pydebug.h | 1 + Include/pythonrun.h | 2 + Lib/json/__init__.py | 4 +- Lib/os.py | 17 -- Lib/test/mapping_tests.py | 2 +- Lib/test/regrtest.py | 5 + Lib/test/script_helper.py | 7 +- Lib/test/test_cmd_line.py | 17 +- Lib/test/test_descr.py | 12 +- Lib/test/test_hash.py | 92 ++++++++- Lib/test/test_os.py | 36 +++- Lib/test/test_set.py | 23 ++- Lib/test/test_sys.py | 2 +- Lib/test/test_urllib.py | 4 +- Lib/tkinter/test/test_ttk/test_functions.py | 2 +- Makefile.pre.in | 1 + Misc/NEWS | 5 + Misc/python.man | 29 +++ Modules/datetimemodule.c | 4 +- Modules/main.c | 16 +- Modules/posixmodule.c | 125 ++---------- Objects/bytesobject.c | 12 +- Objects/object.c | 2 + Objects/unicodeobject.c | 12 +- PCbuild/pythoncore.vcproj | 4 + Python/pythonrun.c | 8 + Python/random.c | 302 ++++++++++++++++++++++++++++ Python/sysmodule.c | 6 +- 32 files changed, 660 insertions(+), 152 deletions(-) create mode 100644 Python/random.c diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index bb9f920..9594756 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -220,8 +220,12 @@ always available. :const:`ignore_environment` :option:`-E` :const:`verbose` :option:`-v` :const:`bytes_warning` :option:`-b` + :const:`hash_randomization` :option:`-R` ============================= ============================= + .. versionadded:: 3.1.5 + The ``hash_randomization`` attribute. + .. data:: float_info diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 6f874b6..7ded851 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1265,6 +1265,8 @@ Basic customization inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__` had been explicitly set to :const:`None`. + See also the :option:`-R` command-line option. + .. method:: object.__bool__(self) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 3fe0c7a..11e2d7d 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -21,7 +21,7 @@ Command line When invoking Python, you may specify any of these options:: - python [-bBdEhiOsSuvVWx?] [-c command | -m module-name | script | - ] [args] + python [-bBdEhiORsSuvVWx?] [-c command | -m module-name | script | - ] [args] The most common use case is, of course, a simple invocation of a script:: @@ -215,6 +215,29 @@ Miscellaneous options Discard docstrings in addition to the :option:`-O` optimizations. +.. cmdoption:: -R + + Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes + and datetime objects are "salted" with an unpredictable random value. + Although they remain constant within an individual Python process, they are + not predictable between repeated invocations of Python. + + This is intended to provide protection against a denial-of-service caused by + carefully-chosen inputs that exploit the worst case performance of a dict + insertion, O(n^2) complexity. See + http://www.ocert.org/advisories/ocert-2011-003.html for details. + + Changing hash values affects the order in which keys are retrieved from a + dict. Although Python has never made guarantees about this ordering (and it + typically varies between 32-bit and 64-bit builds), enough real-world code + implicitly relies on this non-guaranteed behavior that the randomization is + disabled by default. + + See also :envvar:`PYTHONHASHSEED`. + + .. versionadded:: 3.1.5 + + .. cmdoption:: -s Don't add user site directory to sys.path @@ -314,6 +337,7 @@ Miscellaneous options .. note:: The line numbers in error messages will be off by one. + Options you shouldn't use ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -328,6 +352,7 @@ Options you shouldn't use Reserved for alternative implementations of Python to use for their own purposes. + .. _using-on-envvars: Environment variables @@ -435,6 +460,27 @@ These environment variables influence Python's behavior. import of source modules. +.. envvar:: PYTHONHASHSEED + + If this variable is set to ``random``, the effect is the same as specifying + the :option:`-R` option: a random value is used to seed the hashes of str, + bytes and datetime objects. + + If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed + seed for generating the hash() of the types covered by the hash + randomization. + + Its purpose is to allow repeatable hashing, such as for selftests for the + interpreter itself, or to allow a cluster of python processes to share hash + values. + + The integer must be a decimal number in the range [0,4294967295]. Specifying + the value 0 will lead to the same hash values as when hash randomization is + disabled. + + .. versionadded:: 3.1.5 + + .. envvar:: PYTHONIOENCODING Overrides the encoding used for stdin/stdout/stderr, in the syntax diff --git a/Include/object.h b/Include/object.h index ef73a21..7848cf4 100644 --- a/Include/object.h +++ b/Include/object.h @@ -473,6 +473,12 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject *); PyAPI_FUNC(long) _Py_HashDouble(double); PyAPI_FUNC(long) _Py_HashPointer(void*); +typedef struct { + long prefix; + long suffix; +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + /* Helper for passing objects to printf and the like */ #define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj)) diff --git a/Include/pydebug.h b/Include/pydebug.h index 0a31f5b..5969e44 100644 --- a/Include/pydebug.h +++ b/Include/pydebug.h @@ -19,6 +19,7 @@ PyAPI_DATA(int) Py_DivisionWarningFlag; PyAPI_DATA(int) Py_DontWriteBytecodeFlag; PyAPI_DATA(int) Py_NoUserSiteDirectory; PyAPI_DATA(int) Py_UnbufferedStdioFlag; +PyAPI_DATA(int) Py_HashRandomizationFlag; /* this is a wrapper around getenv() that pays attention to Py_IgnoreEnvironmentFlag. It should be used for getting variables like diff --git a/Include/pythonrun.h b/Include/pythonrun.h index 96a0e23..af4aa08 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -174,6 +174,8 @@ typedef void (*PyOS_sighandler_t)(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); +/* Random */ +PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); #ifdef __cplusplus } diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 6d88931..ba2bc1d 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -31,7 +31,9 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':')) + >>> from collections import OrderedDict + >>> mydict = OrderedDict([('4', 5), ('6', 7)]) + >>> json.dumps([1,2,3,mydict], separators=(',', ':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: diff --git a/Lib/os.py b/Lib/os.py index b46c02f..8f66472 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -611,23 +611,6 @@ try: except NameError: # statvfs_result may not exist pass -if not _exists("urandom"): - def urandom(n): - """urandom(n) -> str - - Return a string of n random bytes suitable for cryptographic use. - - """ - try: - _urandomfd = open("/dev/urandom", O_RDONLY) - except (OSError, IOError): - raise NotImplementedError("/dev/urandom (or equivalent) not found") - bs = b"" - while len(bs) < n: - bs += read(_urandomfd, n - len(bs)) - close(_urandomfd) - return bs - # Supply os.popen() def popen(cmd, mode="r", buffering=-1): if not isinstance(cmd, str): diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py index c34bd59..592a78b 100644 --- a/Lib/test/mapping_tests.py +++ b/Lib/test/mapping_tests.py @@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase): def _reference(self): """Return a dictionary of values which are invariant by storage in the object under test.""" - return {1:2, "key1":"value1", "key2":(1,2,3)} + return {"1": "2", "key1":"value1", "key2":(1,2,3)} def _empty_mapping(self): """Return an empty mapping object""" return self.type2test() diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index 98d68bd..d203600 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -428,6 +428,11 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False, except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: + hashseed = os.getenv('PYTHONHASHSEED') + if not hashseed: + os.environ['PYTHONHASHSEED'] = str(random_seed) + os.execv(sys.executable, [sys.executable] + sys.argv) + return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(tests) diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py index 0699cf6..fca28d3 100644 --- a/Lib/test/script_helper.py +++ b/Lib/test/script_helper.py @@ -3,7 +3,6 @@ import sys import os -import re import os.path import tempfile import subprocess @@ -19,11 +18,15 @@ def _assert_python(expected_success, *args, **env_vars): cmd_line = [sys.executable] if not env_vars: cmd_line.append('-E') - cmd_line.extend(args) # Need to preserve the original environment, for in-place testing of # shared library builds. env = os.environ.copy() + # But a special flag that can be set to override -- in this case, the + # caller is responsible to pass the full environment. + if env_vars.pop('__cleanenv', None): + env = {} env.update(env_vars) + cmd_line.extend(args) p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index c4b8be5..eacd7a6 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -4,7 +4,6 @@ import os import test.support, unittest -import os import sys import subprocess @@ -190,6 +189,22 @@ sys.stdout.buffer.write(path)""" self.assertTrue(path1.encode('ascii') in stdout) self.assertTrue(path2.encode('ascii') in stdout) + def test_hash_randomization(self): + # Verify that -R enables hash randomization: + self.verify_valid_flag('-R') + hashes = [] + for i in range(2): + code = 'print(hash("spam"))' + data, rc = self.start_python_and_exit_code('-R', '-c', code) + self.assertEqual(rc, 0) + hashes.append(data) + self.assertNotEqual(hashes[0], hashes[1]) + + # Verify that sys.flags contains hash_randomization + code = 'import sys; print("random is", sys.flags.hash_randomization)' + data, rc = self.start_python_and_exit_code('-R', '-c', code) + self.assertEqual(rc, 0) + self.assertIn(b'random is 1', data) def test_main(): test.support.run_unittest(CmdLineTest) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 0ce85f0..077f5da 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4300,8 +4300,18 @@ class DictProxyTests(unittest.TestCase): def test_repr(self): # Testing dict_proxy.__repr__ + def sorted_dict_repr(repr_): + # Given the repr of a dict, sort the keys + assert repr_.startswith('{') + assert repr_.endswith('}') + kvs = repr_[1:-1].split(', ') + return '{' + ', '.join(sorted(kvs)) + '}' dict_ = {k: v for k, v in self.C.__dict__.items()} - self.assertEqual(repr(self.C.__dict__), 'dict_proxy({!r})'.format(dict_)) + repr_ = repr(self.C.__dict__) + self.assert_(repr_.startswith('dict_proxy(')) + self.assert_(repr_.endswith(')')) + self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]), + sorted_dict_repr('{!r}'.format(dict_))) class PTypesLongInitTest(unittest.TestCase): diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py index 569e5e0..f5736b2 100644 --- a/Lib/test/test_hash.py +++ b/Lib/test/test_hash.py @@ -3,10 +3,16 @@ # # Also test that hash implementations are inherited as expected +import datetime +import os +import struct import unittest from test import support +from test.script_helper import assert_python_ok from collections import Hashable +IS_64BIT = (struct.calcsize('l') == 8) + class HashEqualityTestCase(unittest.TestCase): @@ -118,10 +124,92 @@ class HashBuiltinsTestCase(unittest.TestCase): for obj in self.hashes_to_check: self.assertEqual(hash(obj), _default_hash(obj)) +class HashRandomizationTests(unittest.TestCase): + + # Each subclass should define a field "repr_", containing the repr() of + # an object to be tested + + def get_hash_command(self, repr_): + return 'print(hash(%s))' % repr_ + + def get_hash(self, repr_, seed=None): + env = os.environ.copy() + env['__cleanenv'] = True # signal to assert_python not to do a copy + # of os.environ on its own + if seed is not None: + env['PYTHONHASHSEED'] = str(seed) + else: + env.pop('PYTHONHASHSEED', None) + out = assert_python_ok( + '-c', self.get_hash_command(repr_), + **env) + stdout = out[1].strip() + return int(stdout) + + def test_randomized_hash(self): + # two runs should return different hashes + run1 = self.get_hash(self.repr_, seed='random') + run2 = self.get_hash(self.repr_, seed='random') + self.assertNotEqual(run1, run2) + +class StringlikeHashRandomizationTests(HashRandomizationTests): + def test_null_hash(self): + # PYTHONHASHSEED=0 disables the randomized hash + if IS_64BIT: + known_hash_of_obj = 1453079729188098211 + else: + known_hash_of_obj = -1600925533 + + # Randomization is disabled by default: + self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj) + + # It can also be disabled by setting the seed to 0: + self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) + + def test_fixed_hash(self): + # test a fixed seed for the randomized hash + # Note that all types share the same values: + if IS_64BIT: + h = -4410911502303878509 + else: + h = -206076799 + self.assertEqual(self.get_hash(self.repr_, seed=42), h) + +class StrHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr('abc') + + def test_empty_string(self): + self.assertEqual(hash(""), 0) + +class BytesHashRandomizationTests(StringlikeHashRandomizationTests): + repr_ = repr(b'abc') + + def test_empty_string(self): + self.assertEqual(hash(b""), 0) + +class DatetimeTests(HashRandomizationTests): + def get_hash_command(self, repr_): + return 'import datetime; print(hash(%s))' % repr_ + +class DatetimeDateTests(DatetimeTests): + repr_ = repr(datetime.date(1066, 10, 14)) + +class DatetimeDatetimeTests(DatetimeTests): + repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7)) + +class DatetimeTimeTests(DatetimeTests): + repr_ = repr(datetime.time(0)) + + def test_main(): support.run_unittest(HashEqualityTestCase, - HashInheritanceTestCase, - HashBuiltinsTestCase) + HashInheritanceTestCase, + HashBuiltinsTestCase, + StrHashRandomizationTests, + BytesHashRandomizationTests, + DatetimeDateTests, + DatetimeDatetimeTests, + DatetimeTimeTests) if __name__ == "__main__": diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index cbf0c16..bff4f0b 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -9,6 +9,7 @@ import warnings import sys import shutil from test import support +from test.script_helper import assert_python_ok # Detect whether we're on a Linux system that uses the (now outdated # and unmaintained) linuxthreads threading library. There's an issue @@ -574,14 +575,33 @@ class DevNullTests(unittest.TestCase): f.close() class URandomTests(unittest.TestCase): - def test_urandom(self): - try: - self.assertEqual(len(os.urandom(1)), 1) - self.assertEqual(len(os.urandom(10)), 10) - self.assertEqual(len(os.urandom(100)), 100) - self.assertEqual(len(os.urandom(1000)), 1000) - except NotImplementedError: - pass + def test_urandom_length(self): + self.assertEqual(len(os.urandom(0)), 0) + self.assertEqual(len(os.urandom(1)), 1) + self.assertEqual(len(os.urandom(10)), 10) + self.assertEqual(len(os.urandom(100)), 100) + self.assertEqual(len(os.urandom(1000)), 1000) + + def test_urandom_value(self): + data1 = os.urandom(16) + data2 = os.urandom(16) + self.assertNotEqual(data1, data2) + + def get_urandom_subprocess(self, count): + code = '\n'.join(( + 'import os, sys', + 'data = os.urandom(%s)' % count, + 'sys.stdout.buffer.write(data)', + 'sys.stdout.buffer.flush()')) + out = assert_python_ok('-c', code) + stdout = out[1] + self.assertEqual(len(stdout), 16) + return stdout + + def test_urandom_subprocess(self): + data1 = self.get_urandom_subprocess(16) + data2 = self.get_urandom_subprocess(16) + self.assertNotEqual(data1, data2) class ExecTests(unittest.TestCase): @unittest.skipIf(USING_LINUXTHREADS, diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 99d5c70..5d5e232 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -734,6 +734,17 @@ class TestBasicOps(unittest.TestCase): if self.repr is not None: self.assertEqual(repr(self.set), self.repr) + def check_repr_against_values(self): + text = repr(self.set) + self.assertTrue(text.startswith('{')) + self.assertTrue(text.endswith('}')) + + result = text[1:-1].split(', ') + result.sort() + sorted_repr_values = [repr(value) for value in self.values] + sorted_repr_values.sort() + self.assertEqual(result, sorted_repr_values) + def test_print(self): try: fo = open(support.TESTFN, "w") @@ -892,7 +903,9 @@ class TestBasicOpsString(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 3 - self.repr = "{'a', 'c', 'b'}" + + def test_repr(self): + self.check_repr_against_values() #------------------------------------------------------------------------------ @@ -903,7 +916,9 @@ class TestBasicOpsBytes(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 3 - self.repr = "{b'a', b'c', b'b'}" + + def test_repr(self): + self.check_repr_against_values() #------------------------------------------------------------------------------ @@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps): self.set = set(self.values) self.dup = set(self.values) self.length = 4 - self.repr = "{'a', b'a', 'b', b'b'}" def tearDown(self): warnings.filters = self.warning_filters + def test_repr(self): + self.check_repr_against_values() + #============================================================================== def baditer(): diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 11685a4..7732c4c 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -446,7 +446,7 @@ class SysModuleTest(unittest.TestCase): attrs = ("debug", "division_warning", "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", - "bytes_warning") + "bytes_warning", "hash_randomization") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) self.assertEqual(type(getattr(sys.flags, attr)), int, attr) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 4d3509a..482acc1 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -12,6 +12,7 @@ import os import sys import tempfile import warnings +import collections def hexescape(char): """Escape char as RFC 2396 specifies""" @@ -840,8 +841,9 @@ class urlencode_Tests(unittest.TestCase): self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) self.assertEqual("a=None&a=a", urllib.parse.urlencode({"a": [None, "a"]}, True)) + data = collections.OrderedDict([("a", 1), ("b", 1)]) self.assertEqual("a=a&a=b", - urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) + urllib.parse.urlencode({"a": data}, True)) def test_urlencode_encoding(self): # ASCII encoding. Expect %3F with errors="replace' diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py index df593cd..2303e4c 100644 --- a/Lib/tkinter/test/test_ttk/test_functions.py +++ b/Lib/tkinter/test/test_ttk/test_functions.py @@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.TestCase): ('a', 'b', 'c')), ("test {a b} c", ())) # state spec and options self.assertEqual(ttk._format_elemcreate('image', False, 'test', - ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y"))) + ('a', 'b'), a='x'), ("test a b", ("-a", "x"))) # format returned values as a tcl script # state spec with multiple states and an option with a multivalue self.assertEqual(ttk._format_elemcreate('image', True, 'test', diff --git a/Makefile.pre.in b/Makefile.pre.in index ed1dc33..e4470bd 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -305,6 +305,7 @@ PYTHON_OBJS= \ Python/pymath.o \ Python/pystate.o \ Python/pythonrun.o \ + Python/random.o \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ diff --git a/Misc/NEWS b/Misc/NEWS index 6e95697..486da13 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,11 @@ What's New in Python 3.1.5? Core and Builtins ----------------- +- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED + environment variables, to provide an opt-in way to protect against denial of + service attacks due to hash collisions within the dict and set types. Patch + by David Malcolm, based on work by Victor Stinner. + Library ------- diff --git a/Misc/python.man b/Misc/python.man index 411a43a..5b4eeef 100644 --- a/Misc/python.man +++ b/Misc/python.man @@ -34,6 +34,9 @@ python \- an interpreted, interactive, object-oriented programming language .B \-OO ] [ +.B \-R +] +[ .B -Q .I argument ] @@ -145,6 +148,18 @@ to \fI.pyo\fP. Given twice, causes docstrings to be discarded. .B \-OO Discard docstrings in addition to the \fB-O\fP optimizations. .TP +.B \-R +Turn on "hash randomization", so that the hash() values of str, bytes and +datetime objects are "salted" with an unpredictable pseudo-random value. +Although they remain constant within an individual Python process, they are +not predictable between repeated invocations of Python. +.IP +This is intended to provide protection against a denial of service +caused by carefully-chosen inputs that exploit the worst case performance +of a dict insertion, O(n^2) complexity. See +http://www.ocert.org/advisories/ocert-2011-003.html +for details. +.TP .BI "\-Q " argument Division control; see PEP 238. The argument must be one of "old" (the default, int/int and long/long return an int or long), "new" (new @@ -403,6 +418,20 @@ the \fB\-u\fP option. If this is set to a non-empty string it is equivalent to specifying the \fB\-v\fP option. If set to an integer, it is equivalent to specifying \fB\-v\fP multiple times. +.IP PYTHONHASHSEED +If this variable is set to "random", the effect is the same as specifying +the \fB-R\fP option: a random value is used to seed the hashes of str, +bytes and datetime objects. + +If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for +generating the hash() of the types covered by the hash randomization. Its +purpose is to allow repeatable hashing, such as for selftests for the +interpreter itself, or to allow a cluster of python processes to share hash +values. + +The integer must be a decimal number in the range [0,4294967295]. Specifying +the value 0 will lead to the same hash values as when hash randomization is +disabled. .SH AUTHOR The Python Software Foundation: http://www.python.org/psf .SH INTERNET RESOURCES diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c index 0ac51aa..f3103ea 100644 --- a/Modules/datetimemodule.c +++ b/Modules/datetimemodule.c @@ -2566,10 +2566,12 @@ generic_hash(unsigned char *data, int len) register long x; p = (unsigned char *) data; - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= len; + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; diff --git a/Modules/main.c b/Modules/main.c index eb9bb54..9607cb3 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -47,7 +47,7 @@ static wchar_t **orig_argv; static int orig_argc; /* command line options */ -#define BASE_OPTS L"bBc:dEhiJm:OsStuvVW:xX?" +#define BASE_OPTS L"bBc:dEhiJm:ORsStuvVW:xX?" #define PROGRAM_OPTS BASE_OPTS @@ -72,6 +72,9 @@ static char *usage_2 = "\ -m mod : run library module as a script (terminates option list)\n\ -O : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\ -OO : remove doc-strings in addition to the -O optimizations\n\ +-R : use a pseudo-random salt to make hash() values of various types be\n\ + unpredictable between separate invocations of the interpreter, as\n\ + a defence against denial-of-service attacks\n\ -s : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\ -S : don't imply 'import site' on initialization\n\ "; @@ -99,6 +102,12 @@ PYTHONHOME : alternate directory (or %c).\n\ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\ "; +static char *usage_6 = "\ +PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\ + as specifying the :option:`-R` option: a random value is used to seed the\n\ + hashes of str, bytes and datetime objects. It can also be set to an integer\n\ + in the range [0,4294967295] to get hash values with a predictable seed.\n\ +"; #ifndef MS_WINDOWS static FILE* @@ -136,6 +145,7 @@ usage(int exitcode, wchar_t* program) fputs(usage_3, f); fprintf(f, usage_4, DELIM); fprintf(f, usage_5, DELIM, PYTHONHOMEHELP); + fputs(usage_6, f); } #if defined(__VMS) if (exitcode == 0) { @@ -373,6 +383,10 @@ Py_Main(int argc, wchar_t **argv) PySys_AddWarnOption(_PyOS_optarg); break; + case 'R': + Py_HashRandomizationFlag++; + break; + /* This space reserved for other options */ default: diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index a836af6..dbbc29f 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4022,7 +4022,7 @@ posix_getgroups(PyObject *self, PyObject *noargs) #endif gid_t grouplist[MAX_GROUPS]; - /* On MacOSX getgroups(2) can return more than MAX_GROUPS results + /* On MacOSX getgroups(2) can return more than MAX_GROUPS results * This is a helper variable to store the intermediate result when * that happens. * @@ -6942,82 +6942,6 @@ posix_getloadavg(PyObject *self, PyObject *noargs) } #endif -#ifdef MS_WINDOWS - -PyDoc_STRVAR(win32_urandom__doc__, -"urandom(n) -> str\n\n\ -Return n random bytes suitable for cryptographic use."); - -typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ - LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ - DWORD dwFlags ); -typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ - BYTE *pbBuffer ); - -static CRYPTGENRANDOM pCryptGenRandom = NULL; -/* This handle is never explicitly released. Instead, the operating - system will release it when the process terminates. */ -static HCRYPTPROV hCryptProv = 0; - -static PyObject* -win32_urandom(PyObject *self, PyObject *args) -{ - int howMany; - PyObject* result; - - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) - return NULL; - if (howMany < 0) - return PyErr_Format(PyExc_ValueError, - "negative argument not allowed"); - - if (hCryptProv == 0) { - HINSTANCE hAdvAPI32 = NULL; - CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; - - /* Obtain handle to the DLL containing CryptoAPI - This should not fail */ - hAdvAPI32 = GetModuleHandle("advapi32.dll"); - if(hAdvAPI32 == NULL) - return win32_error("GetModuleHandle", NULL); - - /* Obtain pointers to the CryptoAPI functions - This will fail on some early versions of Win95 */ - pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( - hAdvAPI32, - "CryptAcquireContextA"); - if (pCryptAcquireContext == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptAcquireContextA not found"); - - pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress( - hAdvAPI32, "CryptGenRandom"); - if (pCryptGenRandom == NULL) - return PyErr_Format(PyExc_NotImplementedError, - "CryptGenRandom not found"); - - /* Acquire context */ - if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, - PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - return win32_error("CryptAcquireContext", NULL); - } - - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */ - if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*) - PyBytes_AS_STRING(result))) { - Py_DECREF(result); - return win32_error("CryptGenRandom", NULL); - } - } - return result; -} -#endif - PyDoc_STRVAR(device_encoding__doc__, "device_encoding(fd) -> str\n\n\ Return a string describing the encoding of the device\n\ @@ -7055,41 +6979,35 @@ device_encoding(PyObject *self, PyObject *args) return Py_None; } -#ifdef __VMS -/* Use openssl random routine */ -#include -PyDoc_STRVAR(vms_urandom__doc__, +PyDoc_STRVAR(posix_urandom__doc__, "urandom(n) -> str\n\n\ Return n random bytes suitable for cryptographic use."); -static PyObject* -vms_urandom(PyObject *self, PyObject *args) +static PyObject * +posix_urandom(PyObject *self, PyObject *args) { - int howMany; - PyObject* result; + Py_ssize_t size; + PyObject *result; + int ret; - /* Read arguments */ - if (! PyArg_ParseTuple(args, "i:urandom", &howMany)) + /* Read arguments */ + if (!PyArg_ParseTuple(args, "n:urandom", &size)) return NULL; - if (howMany < 0) + if (size < 0) return PyErr_Format(PyExc_ValueError, "negative argument not allowed"); + result = PyBytes_FromStringAndSize(NULL, size); + if (result == NULL) + return NULL; - /* Allocate bytes */ - result = PyBytes_FromStringAndSize(NULL, howMany); - if (result != NULL) { - /* Get random data */ - if (RAND_pseudo_bytes((unsigned char*) - PyBytes_AS_STRING(result), - howMany) < 0) { - Py_DECREF(result); - return PyErr_Format(PyExc_ValueError, - "RAND_pseudo_bytes"); - } + ret = _PyOS_URandom(PyBytes_AS_STRING(result), + PyBytes_GET_SIZE(result)); + if (ret == -1) { + Py_DECREF(result); + return NULL; } return result; } -#endif static PyMethodDef posix_methods[] = { {"access", posix_access, METH_VARARGS, posix_access__doc__}, @@ -7374,12 +7292,7 @@ static PyMethodDef posix_methods[] = { #ifdef HAVE_GETLOADAVG {"getloadavg", posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__}, #endif - #ifdef MS_WINDOWS - {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__}, - #endif - #ifdef __VMS - {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__}, - #endif + {"urandom", posix_urandom, METH_VARARGS, posix_urandom__doc__}, {NULL, NULL} /* Sentinel */ }; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index f2ee131..e6ab440 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -899,11 +899,21 @@ bytes_hash(PyBytesObject *a) if (a->ob_shash != -1) return a->ob_shash; len = Py_SIZE(a); + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + a->ob_shash = 0; + return 0; + } p = (unsigned char *) a->ob_sval; - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= Py_SIZE(a); + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; a->ob_shash = x; diff --git a/Objects/object.c b/Objects/object.c index ac57cd7..0b1c656 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -712,6 +712,8 @@ PyObject_HashNotImplemented(PyObject *v) return -1; } +_Py_HashSecret_t _Py_HashSecret; + long PyObject_Hash(PyObject *v) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2cdbc0e..5986fb8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7344,11 +7344,21 @@ unicode_hash(PyUnicodeObject *self) if (self->hash != -1) return self->hash; len = Py_SIZE(self); + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + self->hash = 0; + return 0; + } p = self->str; - x = *p << 7; + x = _Py_HashSecret.prefix; + x ^= *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= Py_SIZE(self); + x ^= _Py_HashSecret.suffix; if (x == -1) x = -2; self->hash = x; diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj index 11cd311..045300a 100644 --- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -1778,6 +1778,10 @@ RelativePath="..\Python\pythonrun.c" > + + diff --git a/Python/pythonrun.c b/Python/pythonrun.c index c4ae921..4474e79 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -73,6 +73,7 @@ extern void _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); extern int _PyLong_Init(void); extern void PyLong_Fini(void); +extern void _PyRandom_Init(void); #ifdef WITH_THREAD extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); @@ -91,6 +92,7 @@ int Py_FrozenFlag; /* Needed by getpath.c */ int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */ int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */ +int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */ /* PyModule_GetWarningsModule is no longer necessary as of 2.6 since _warnings is builtin. This API should not be used. */ @@ -195,6 +197,12 @@ Py_InitializeEx(int install_sigs) Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p); if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); + /* The variable is only tested for existence here; _PyRandom_Init will + check its value further. */ + if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0') + Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p); + + _PyRandom_Init(); interp = PyInterpreterState_New(); if (interp == NULL) diff --git a/Python/random.c b/Python/random.c new file mode 100644 index 0000000..327166e --- /dev/null +++ b/Python/random.c @@ -0,0 +1,302 @@ +#include "Python.h" +#ifdef MS_WINDOWS +#include +#else +#include +#endif + +static int random_initialized = 0; + +#ifdef MS_WINDOWS +typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\ + LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\ + DWORD dwFlags ); +typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\ + BYTE *pbBuffer ); + +static CRYPTGENRANDOM pCryptGenRandom = NULL; +/* This handle is never explicitly released. Instead, the operating + system will release it when the process terminates. */ +static HCRYPTPROV hCryptProv = 0; + +static int +win32_urandom_init(int raise) +{ + HINSTANCE hAdvAPI32 = NULL; + CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL; + + /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */ + hAdvAPI32 = GetModuleHandle("advapi32.dll"); + if(hAdvAPI32 == NULL) + goto error; + + /* Obtain pointers to the CryptoAPI functions. This will fail on some early + versions of Win95. */ + pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress( + hAdvAPI32, "CryptAcquireContextA"); + if (pCryptAcquireContext == NULL) + goto error; + + pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32, + "CryptGenRandom"); + if (pCryptGenRandom == NULL) + goto error; + + /* Acquire context */ + if (! pCryptAcquireContext(&hCryptProv, NULL, NULL, + PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + goto error; + + return 0; + +error: + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialize Windows random API (CryptoGen)"); + return -1; +} + +/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen + API. Return 0 on success, or -1 on error. */ +static int +win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + Py_ssize_t chunk; + + if (hCryptProv == 0) + { + if (win32_urandom_init(raise) == -1) + return -1; + } + + while (size > 0) + { + chunk = size > INT_MAX ? INT_MAX : size; + if (!pCryptGenRandom(hCryptProv, chunk, buffer)) + { + /* CryptGenRandom() failed */ + if (raise) + PyErr_SetFromWindowsErr(0); + else + Py_FatalError("Failed to initialized the randomized hash " + "secret using CryptoGen)"); + return -1; + } + buffer += chunk; + size -= chunk; + } + return 0; +} +#endif /* MS_WINDOWS */ + + +#ifdef __VMS +/* Use openssl random routine */ +#include +static int +vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise) +{ + if (RAND_pseudo_bytes(buffer, size) < 0) { + if (raise) { + PyErr_Format(PyExc_ValueError, + "RAND_pseudo_bytes"); + } else { + Py_FatalError("Failed to initialize the randomized hash " + "secret using RAND_pseudo_bytes"); + } + return -1; + } + return 0; +} +#endif /* __VMS */ + + +#if !defined(MS_WINDOWS) && !defined(__VMS) + +/* Read size bytes from /dev/urandom into buffer. + Call Py_FatalError() on error. */ +static void +dev_urandom_noraise(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + assert (0 < size); + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + Py_FatalError("Failed to open /dev/urandom"); + + while (0 < size) + { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + Py_FatalError("Failed to read bytes from /dev/urandom"); + break; + } + buffer += n; + size -= (Py_ssize_t)n; + } + close(fd); +} + +/* Read size bytes from /dev/urandom into buffer. + Return 0 on success, raise an exception and return -1 on error. */ +static int +dev_urandom_python(char *buffer, Py_ssize_t size) +{ + int fd; + Py_ssize_t n; + + if (size <= 0) + return 0; + + Py_BEGIN_ALLOW_THREADS + fd = open("/dev/urandom", O_RDONLY); + Py_END_ALLOW_THREADS + if (fd < 0) + { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom"); + return -1; + } + + Py_BEGIN_ALLOW_THREADS + do { + do { + n = read(fd, buffer, (size_t)size); + } while (n < 0 && errno == EINTR); + if (n <= 0) + break; + buffer += n; + size -= (Py_ssize_t)n; + } while (0 < size); + Py_END_ALLOW_THREADS + + if (n <= 0) + { + /* stop on error or if read(size) returned 0 */ + if (n < 0) + PyErr_SetFromErrno(PyExc_OSError); + else + PyErr_Format(PyExc_RuntimeError, + "Failed to read %zi bytes from /dev/urandom", + size); + close(fd); + return -1; + } + close(fd); + return 0; +} +#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */ + +/* Fill buffer with pseudo-random bytes generated by a linear congruent + generator (LCG): + + x(n+1) = (x(n) * 214013 + 2531011) % 2^32 + + Use bits 23..16 of x(n) to generate a byte. */ +static void +lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size) +{ + size_t index; + unsigned int x; + + x = x0; + for (index=0; index < size; index++) { + x *= 214013; + x += 2531011; + /* modulo 2 ^ (8 * sizeof(int)) */ + buffer[index] = (x >> 16) & 0xff; + } +} + +/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic + use, from the operating random number generator (RNG). + + Return 0 on success, raise an exception and return -1 on error. */ +int +_PyOS_URandom(void *buffer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "negative argument not allowed"); + return -1; + } + if (size == 0) + return 0; + +#ifdef MS_WINDOWS + return win32_urandom((unsigned char *)buffer, size, 1); +#else +# ifdef __VMS + return vms_urandom((unsigned char *)buffer, size, 1); +# else + return dev_urandom_python((char*)buffer, size); +# endif +#endif +} + +void +_PyRandom_Init(void) +{ + char *env; + void *secret = &_Py_HashSecret; + Py_ssize_t secret_size = sizeof(_Py_HashSecret); + + if (random_initialized) + return; + random_initialized = 1; + + /* + By default, hash randomization is disabled, and only + enabled if PYTHONHASHSEED is set to non-empty or if + "-R" is provided at the command line: + */ + if (!Py_HashRandomizationFlag) { + /* Disable the randomized hash: */ + memset(secret, 0, secret_size); + return; + } + + /* + Hash randomization is enabled. Generate a per-process secret, + using PYTHONHASHSEED if provided. + */ + + env = Py_GETENV("PYTHONHASHSEED"); + if (env && *env != '\0' & strcmp(env, "random") != 0) { + char *endptr = env; + unsigned long seed; + seed = strtoul(env, &endptr, 10); + if (*endptr != '\0' + || seed > 4294967295UL + || (errno == ERANGE && seed == ULONG_MAX)) + { + Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer " + "in range [0; 4294967295]"); + } + if (seed == 0) { + /* disable the randomized hash */ + memset(secret, 0, secret_size); + } + else { + lcg_urandom(seed, (unsigned char*)secret, secret_size); + } + } + else { +#ifdef MS_WINDOWS + (void)win32_urandom((unsigned char *)secret, secret_size, 0); +#else /* #ifdef MS_WINDOWS */ +# ifdef __VMS + vms_urandom((unsigned char *)secret, secret_size, 0); +# else + dev_urandom_noraise((char*)secret, secret_size); +# endif +#endif + } +} diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c688172..6a7e914 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1126,6 +1126,7 @@ static PyStructSequence_Field flags_fields[] = { /* {"unbuffered", "-u"}, */ /* {"skip_first", "-x"}, */ {"bytes_warning", "-b"}, + {"hash_randomization", "-R"}, {0} }; @@ -1134,9 +1135,9 @@ static PyStructSequence_Desc flags_desc = { flags__doc__, /* doc */ flags_fields, /* fields */ #ifdef RISCOS - 12 + 13 #else - 11 + 12 #endif }; @@ -1169,6 +1170,7 @@ make_flags(void) /* SetFlag(saw_unbuffered_flag); */ /* SetFlag(skipfirstline); */ SetFlag(Py_BytesWarningFlag); + SetFlag(Py_HashRandomizationFlag); #undef SetFlag if (PyErr_Occurred()) { -- cgit v0.12 From a3ed11bd3409d61e6eb35995743e405790df2d3c Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Mon, 20 Feb 2012 15:20:37 -0500 Subject: don't rely on the order of module clearing --- Lib/test/test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_module.py b/Lib/test/test_module.py index 7734fb0..15836ca 100644 --- a/Lib/test/test_module.py +++ b/Lib/test/test_module.py @@ -70,7 +70,7 @@ class ModuleTests(unittest.TestCase): m = ModuleType("foo") m.destroyed = destroyed s = """class A: - def __del__(self): + def __del__(self, destroyed=destroyed): destroyed.append(1) a = A()""" exec(s, m.__dict__) -- cgit v0.12 From 8848255d8a5b3ac711801466de7e19bae54246af Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:34:31 +0100 Subject: Run tests with -R on "make test" and the buildbots. --- Makefile.pre.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 7ffc3ec..3008d6d 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -764,7 +764,7 @@ $(LIBRARY_OBJS) $(MODOBJS) Modules/python.o: $(PYTHON_HEADERS) TESTOPTS= -l $(EXTRATESTOPTS) TESTPROG= $(srcdir)/Lib/test/regrtest.py -TESTPYTHON= $(RUNSHARED) ./$(BUILDPYTHON) -Wd -E -bb $(TESTPYTHONOPTS) +TESTPYTHON= $(RUNSHARED) ./$(BUILDPYTHON) -Wd -E -R -bb $(TESTPYTHONOPTS) test: all platform -find $(srcdir)/Lib -name '*.py[co]' -print | xargs rm -f -$(TESTPYTHON) $(TESTPROG) $(TESTOPTS) -- cgit v0.12 From a86b262d1e8b98de5c8f6781cc30eef0b0c98ddc Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:34:57 +0100 Subject: Fix bad inheritance in test_subprocess that led to a number of tests being executed twice. --- Lib/test/test_subprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 40d0fb4..fb0b834 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1702,7 +1702,7 @@ class CommandsWithSpaces (BaseTestCase): self.with_spaces([sys.executable, self.fname, "ab cd"]) -class ContextManagerTests(ProcessTestCase): +class ContextManagerTests(BaseTestCase): def test_pipe(self): with subprocess.Popen([sys.executable, "-c", -- cgit v0.12 From 242631da860fc94eafa86ff50d219cfd9dfded3e Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:36:28 +0100 Subject: Fix "sys.path modified" warning in test_strlit, by not replacing sys.path itself, only its contents. --- Lib/test/test_strlit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py index 30475a4..6bdc6e4 100644 --- a/Lib/test/test_strlit.py +++ b/Lib/test/test_strlit.py @@ -65,7 +65,7 @@ class TestLiterals(unittest.TestCase): sys.path.insert(0, self.tmpdir) def tearDown(self): - sys.path = self.save_path + sys.path[:] = self.save_path shutil.rmtree(self.tmpdir, ignore_errors=True) def test_template(self): -- cgit v0.12 From c425a94899a10b565df060c6f943b5d8b4a85ac9 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:37:22 +0100 Subject: Fix use of deprecated assert_ method. --- Lib/test/test_descr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 77cadc0..92304b4 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4482,8 +4482,8 @@ class DictProxyTests(unittest.TestCase): return '{' + ', '.join(sorted(kvs)) + '}' dict_ = {k: v for k, v in self.C.__dict__.items()} repr_ = repr(self.C.__dict__) - self.assert_(repr_.startswith('dict_proxy(')) - self.assert_(repr_.endswith(')')) + self.assertTrue(repr_.startswith('dict_proxy(')) + self.assertTrue(repr_.endswith(')')) self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]), sorted_dict_repr('{!r}'.format(dict_))) -- cgit v0.12 From a108227c47b27116267aa2a131624e29d23d768d Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 21:41:03 +0100 Subject: Fix test_dis dependency on dict order. --- Lib/test/test_dis.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 7a61493..4246618 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -268,12 +268,13 @@ Variable names: 6: args 7: kwds Cell variables: - 0: e - 1: d - 2: f - 3: y - 4: x - 5: z""" + 0: [edfxyz] + 1: [edfxyz] + 2: [edfxyz] + 3: [edfxyz] + 4: [edfxyz] + 5: [edfxyz]""" +# NOTE: the order of the cell variables above depends on dictionary order! co_tricky_nested_f = tricky.__func__.__code__.co_consts[1] -- cgit v0.12 From 27fe226eb13b1f7cedba838b3d388a645197d722 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:03:28 +0100 Subject: Another test_dis dict order dependency. --- Lib/test/test_dis.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 4246618..5c59eaa 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -293,12 +293,12 @@ Names: Variable names: 0: c Free variables: - 0: e - 1: d - 2: f - 3: y - 4: x - 5: z""" + 0: [edfxyz] + 1: [edfxyz] + 2: [edfxyz] + 3: [edfxyz] + 4: [edfxyz] + 5: [edfxyz]""" code_info_expr_str = """\ Name: -- cgit v0.12 From 61470246d086553fbd1de3c637ec17e9bb1ecb67 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:06:02 +0100 Subject: Remove setting hash seed to regrtest's random seed and re-execv()ing: this doesn't preserve Python flags and fails from a temp directory. --- Lib/test/regrtest.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index 26ba982..135a90e 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -496,11 +496,6 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: - hashseed = os.getenv('PYTHONHASHSEED') - if not hashseed: - os.environ['PYTHONHASHSEED'] = str(random_seed) - os.execv(sys.executable, [sys.executable] + sys.argv) - return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(selected) -- cgit v0.12 From 12897d7d395f5907e0f9a3694ba3c64c329db0dd Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:49:29 +0100 Subject: Fix typo in conditional. --- Python/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/random.c b/Python/random.c index 327166e..01cd83a 100644 --- a/Python/random.c +++ b/Python/random.c @@ -269,7 +269,7 @@ _PyRandom_Init(void) */ env = Py_GETENV("PYTHONHASHSEED"); - if (env && *env != '\0' & strcmp(env, "random") != 0) { + if (env && *env != '\0' && strcmp(env, "random") != 0) { char *endptr = env; unsigned long seed; seed = strtoul(env, &endptr, 10); -- cgit v0.12 From 06b1c4f68bca367bf2a2c8ce9bf36168ccb2f9ef Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:09:59 +0100 Subject: Fix typo. --- Misc/NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS b/Misc/NEWS index 486da13..0d23663 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -11,7 +11,7 @@ Core and Builtins ----------------- - Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED - environment variables, to provide an opt-in way to protect against denial of + environment variable, to provide an opt-in way to protect against denial of service attacks due to hash collisions within the dict and set types. Patch by David Malcolm, based on work by Victor Stinner. -- cgit v0.12 From 7c573f7a075fd8fc7bf90bdbcc4f163cda3b9acf Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:48:06 +0100 Subject: Fix dbm_gnu test relying on set order. --- Lib/test/test_dbm_gnu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index 2173b92..f77dcfe 100755 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -49,7 +49,7 @@ class TestGdbm(unittest.TestCase): all = set(gdbm.open_flags) # Test standard flags (presumably "crwn"). modes = all - set('fsu') - for mode in modes: + for mode in sorted(modes): # put "c" mode first self.g = gdbm.open(filename, mode) self.g.close() -- cgit v0.12 From 9571155ae4ff86b4275950fcdfadcc76475943e3 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:06:02 +0100 Subject: Remove setting hash seed to regrtest's random seed and re-execv()ing: this doesn't preserve Python flags and fails from a temp directory. --- Lib/test/regrtest.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index d203600..98d68bd 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -428,11 +428,6 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False, except ValueError: print("Couldn't find starting test (%s), using all tests" % start) if randomize: - hashseed = os.getenv('PYTHONHASHSEED') - if not hashseed: - os.environ['PYTHONHASHSEED'] = str(random_seed) - os.execv(sys.executable, [sys.executable] + sys.argv) - return random.seed(random_seed) print("Using random seed", random_seed) random.shuffle(tests) -- cgit v0.12 From f47b20f0b00452e00a014c6fb80b911899845e2f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:08:27 +0100 Subject: Fix use of deprecated assertRegexpMatches method. --- Lib/lib2to3/tests/test_refactor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/lib2to3/tests/test_refactor.py b/Lib/lib2to3/tests/test_refactor.py index 4b87ed6..8bdebc1 100644 --- a/Lib/lib2to3/tests/test_refactor.py +++ b/Lib/lib2to3/tests/test_refactor.py @@ -230,7 +230,7 @@ from __future__ import print_function""" os.sep, os.path.basename(test_file)) for message in debug_messages: if "Not writing changes" in message: - self.assertRegexpMatches(message, message_regex) + self.assertRegex(message, message_regex) break else: self.fail("%r not matched in %r" % (message_regex, debug_messages)) -- cgit v0.12 From 16684eb62449922b8e066d8063cebc682c0ea5c7 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 22:48:06 +0100 Subject: Fix dbm_gnu test relying on set order. --- Lib/test/test_dbm_gnu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index ce96ce4..30a39f7 100755 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -53,7 +53,7 @@ class TestGdbm(unittest.TestCase): all = set(gdbm.open_flags) # Test standard flags (presumably "crwn"). modes = all - set('fsu') - for mode in modes: + for mode in sorted(modes): # put "c" mode first self.g = gdbm.open(filename, mode) self.g.close() -- cgit v0.12 From 09562b43304a91aa323418834e183e7f39101372 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:09:59 +0100 Subject: Fix typo. --- Misc/NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS b/Misc/NEWS index 63281cf..0e3595a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -11,7 +11,7 @@ Core and Builtins ----------------- - Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED - environment variables, to provide an opt-in way to protect against denial of + environment variable, to provide an opt-in way to protect against denial of service attacks due to hash collisions within the dict and set types. Patch by David Malcolm, based on work by Victor Stinner. -- cgit v0.12 From e5a0e0a75f4aec8410092f53abad145565e76d3f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:37:36 +0100 Subject: Fix obscure failures of datetime-related tests due to the datetime tests failing to restore the system state completely after testing the pure-Python versions. --- Lib/test/datetimetester.py | 2 -- Lib/test/test_datetime.py | 40 +++++++++++++++++++++++----------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 38f3b8f..3fd6799 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1780,8 +1780,6 @@ class TestDateTime(TestDate): self.assertTrue(abs(from_timestamp - from_now) <= tolerance) def test_strptime(self): - import _strptime - string = '2004-12-01 13:02:47.197' format = '%Y-%m-%d %H:%M:%S.%f' expected = _strptime._strptime_datetime(self.theclass, string, format) diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index ded2aa9..d9ddb32 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -1,7 +1,9 @@ import unittest import sys from test.support import import_fresh_module, run_unittest + TESTS = 'test.datetimetester' + # XXX: import_fresh_module() is supposed to leave sys.module cache untouched, # XXX: but it does not, so we have to save and restore it ourselves. save_sys_modules = sys.modules.copy() @@ -15,28 +17,32 @@ finally: sys.modules.update(save_sys_modules) test_modules = [pure_tests, fast_tests] test_suffixes = ["_Pure", "_Fast"] +# XXX(gb) First run all the _Pure tests, then all the _Fast tests. You might +# not believe this, but in spite of all the sys.modules trickery running a _Pure +# test last will leave a mix of pure and native datetime stuff lying around. +test_classes = [] for module, suffix in zip(test_modules, test_suffixes): for name, cls in module.__dict__.items(): - if isinstance(cls, type) and issubclass(cls, unittest.TestCase): - name += suffix - cls.__name__ = name - globals()[name] = cls - def setUp(self, module=module, setup=cls.setUp): - self._save_sys_modules = sys.modules.copy() - sys.modules[TESTS] = module - sys.modules['datetime'] = module.datetime_module - sys.modules['_strptime'] = module._strptime - setup(self) - def tearDown(self, teardown=cls.tearDown): - teardown(self) - sys.modules.clear() - sys.modules.update(self._save_sys_modules) - cls.setUp = setUp - cls.tearDown = tearDown + if not (isinstance(cls, type) and issubclass(cls, unittest.TestCase)): + continue + cls.__name__ = name + suffix + @classmethod + def setUpClass(cls_, module=module): + cls_._save_sys_modules = sys.modules.copy() + sys.modules[TESTS] = module + sys.modules['datetime'] = module.datetime_module + sys.modules['_strptime'] = module._strptime + @classmethod + def tearDownClass(cls_): + sys.modules.clear() + sys.modules.update(cls_._save_sys_modules) + cls.setUpClass = setUpClass + cls.tearDownClass = tearDownClass + test_classes.append(cls) def test_main(): - run_unittest(__name__) + run_unittest(*test_classes) if __name__ == "__main__": test_main() -- cgit v0.12 From e9f637b062b3d23f284a88c45b3a8889eca673ca Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 20 Feb 2012 23:49:07 +0100 Subject: Make "regrtest -j" "-R"-aware --- Lib/test/support.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/support.py b/Lib/test/support.py index 51375d5..01cd203 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -1460,6 +1460,7 @@ def args_from_interpreter_flags(): flag_opt_map = { 'bytes_warning': 'b', 'dont_write_bytecode': 'B', + 'hash_randomization': 'R', 'ignore_environment': 'E', 'no_user_site': 's', 'no_site': 'S', -- cgit v0.12 From 91e5c08fe8a7b682099606ab48ef965833981a9a Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 20 Feb 2012 23:49:29 +0100 Subject: Fix typo in conditional. --- Python/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/random.c b/Python/random.c index 327166e..01cd83a 100644 --- a/Python/random.c +++ b/Python/random.c @@ -269,7 +269,7 @@ _PyRandom_Init(void) */ env = Py_GETENV("PYTHONHASHSEED"); - if (env && *env != '\0' & strcmp(env, "random") != 0) { + if (env && *env != '\0' && strcmp(env, "random") != 0) { char *endptr = env; unsigned long seed; seed = strtoul(env, &endptr, 10); -- cgit v0.12