summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2012-02-20 18:54:16 (GMT)
committerGeorg Brandl <georg@python.org>2012-02-20 18:54:16 (GMT)
commit2daf6ae2495c862adf8bc717bfe9964081ea0b10 (patch)
treeebd7efe668e4f7842c6d51bdbde47b00f92a57db /Lib
parentec1712a1662282c909b4cd4cc0c7486646bc9246 (diff)
downloadcpython-2daf6ae2495c862adf8bc717bfe9964081ea0b10.zip
cpython-2daf6ae2495c862adf8bc717bfe9964081ea0b10.tar.gz
cpython-2daf6ae2495c862adf8bc717bfe9964081ea0b10.tar.bz2
Issue #13703: add a way to randomize the hash values of basic types (str, bytes, datetime)
in order to make algorithmic complexity attacks on (e.g.) web apps much more complicated. The environment variable PYTHONHASHSEED and the new command line flag -R control this behavior.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/json/__init__.py4
-rw-r--r--Lib/os.py17
-rw-r--r--Lib/test/mapping_tests.py2
-rwxr-xr-xLib/test/regrtest.py5
-rw-r--r--Lib/test/script_helper.py7
-rw-r--r--Lib/test/test_cmd_line.py17
-rw-r--r--Lib/test/test_descr.py12
-rw-r--r--Lib/test/test_hash.py92
-rw-r--r--Lib/test/test_os.py36
-rw-r--r--Lib/test/test_set.py23
-rw-r--r--Lib/test/test_sys.py2
-rw-r--r--Lib/test/test_urllib.py4
-rw-r--r--Lib/tkinter/test/test_ttk/test_functions.py2
13 files changed, 184 insertions, 39 deletions
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 6d88931..ba2bc1d 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -31,7 +31,9 @@ Encoding basic Python object hierarchies::
Compact encoding::
>>> import json
- >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':'))
+ >>> from collections import OrderedDict
+ >>> mydict = OrderedDict([('4', 5), ('6', 7)])
+ >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
'[1,2,3,{"4":5,"6":7}]'
Pretty printing::
diff --git a/Lib/os.py b/Lib/os.py
index b46c02f..8f66472 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -611,23 +611,6 @@ try:
except NameError: # statvfs_result may not exist
pass
-if not _exists("urandom"):
- def urandom(n):
- """urandom(n) -> str
-
- Return a string of n random bytes suitable for cryptographic use.
-
- """
- try:
- _urandomfd = open("/dev/urandom", O_RDONLY)
- except (OSError, IOError):
- raise NotImplementedError("/dev/urandom (or equivalent) not found")
- bs = b""
- while len(bs) < n:
- bs += read(_urandomfd, n - len(bs))
- close(_urandomfd)
- return bs
-
# Supply os.popen()
def popen(cmd, mode="r", buffering=-1):
if not isinstance(cmd, str):
diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py
index c34bd59..592a78b 100644
--- a/Lib/test/mapping_tests.py
+++ b/Lib/test/mapping_tests.py
@@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase):
def _reference(self):
"""Return a dictionary of values which are invariant by storage
in the object under test."""
- return {1:2, "key1":"value1", "key2":(1,2,3)}
+ return {"1": "2", "key1":"value1", "key2":(1,2,3)}
def _empty_mapping(self):
"""Return an empty mapping object"""
return self.type2test()
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
index 98d68bd..d203600 100755
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -428,6 +428,11 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False,
except ValueError:
print("Couldn't find starting test (%s), using all tests" % start)
if randomize:
+ hashseed = os.getenv('PYTHONHASHSEED')
+ if not hashseed:
+ os.environ['PYTHONHASHSEED'] = str(random_seed)
+ os.execv(sys.executable, [sys.executable] + sys.argv)
+ return
random.seed(random_seed)
print("Using random seed", random_seed)
random.shuffle(tests)
diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py
index 0699cf6..fca28d3 100644
--- a/Lib/test/script_helper.py
+++ b/Lib/test/script_helper.py
@@ -3,7 +3,6 @@
import sys
import os
-import re
import os.path
import tempfile
import subprocess
@@ -19,11 +18,15 @@ def _assert_python(expected_success, *args, **env_vars):
cmd_line = [sys.executable]
if not env_vars:
cmd_line.append('-E')
- cmd_line.extend(args)
# Need to preserve the original environment, for in-place testing of
# shared library builds.
env = os.environ.copy()
+ # But a special flag that can be set to override -- in this case, the
+ # caller is responsible to pass the full environment.
+ if env_vars.pop('__cleanenv', None):
+ env = {}
env.update(env_vars)
+ cmd_line.extend(args)
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
env=env)
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index c4b8be5..eacd7a6 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -4,7 +4,6 @@
import os
import test.support, unittest
-import os
import sys
import subprocess
@@ -190,6 +189,22 @@ sys.stdout.buffer.write(path)"""
self.assertTrue(path1.encode('ascii') in stdout)
self.assertTrue(path2.encode('ascii') in stdout)
+ def test_hash_randomization(self):
+ # Verify that -R enables hash randomization:
+ self.verify_valid_flag('-R')
+ hashes = []
+ for i in range(2):
+ code = 'print(hash("spam"))'
+ data, rc = self.start_python_and_exit_code('-R', '-c', code)
+ self.assertEqual(rc, 0)
+ hashes.append(data)
+ self.assertNotEqual(hashes[0], hashes[1])
+
+ # Verify that sys.flags contains hash_randomization
+ code = 'import sys; print("random is", sys.flags.hash_randomization)'
+ data, rc = self.start_python_and_exit_code('-R', '-c', code)
+ self.assertEqual(rc, 0)
+ self.assertIn(b'random is 1', data)
def test_main():
test.support.run_unittest(CmdLineTest)
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
index 0ce85f0..077f5da 100644
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -4300,8 +4300,18 @@ class DictProxyTests(unittest.TestCase):
def test_repr(self):
# Testing dict_proxy.__repr__
+ def sorted_dict_repr(repr_):
+ # Given the repr of a dict, sort the keys
+ assert repr_.startswith('{')
+ assert repr_.endswith('}')
+ kvs = repr_[1:-1].split(', ')
+ return '{' + ', '.join(sorted(kvs)) + '}'
dict_ = {k: v for k, v in self.C.__dict__.items()}
- self.assertEqual(repr(self.C.__dict__), 'dict_proxy({!r})'.format(dict_))
+ repr_ = repr(self.C.__dict__)
+ self.assert_(repr_.startswith('dict_proxy('))
+ self.assert_(repr_.endswith(')'))
+ self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]),
+ sorted_dict_repr('{!r}'.format(dict_)))
class PTypesLongInitTest(unittest.TestCase):
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
index 569e5e0..f5736b2 100644
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -3,10 +3,16 @@
#
# Also test that hash implementations are inherited as expected
+import datetime
+import os
+import struct
import unittest
from test import support
+from test.script_helper import assert_python_ok
from collections import Hashable
+IS_64BIT = (struct.calcsize('l') == 8)
+
class HashEqualityTestCase(unittest.TestCase):
@@ -118,10 +124,92 @@ class HashBuiltinsTestCase(unittest.TestCase):
for obj in self.hashes_to_check:
self.assertEqual(hash(obj), _default_hash(obj))
+class HashRandomizationTests(unittest.TestCase):
+
+ # Each subclass should define a field "repr_", containing the repr() of
+ # an object to be tested
+
+ def get_hash_command(self, repr_):
+ return 'print(hash(%s))' % repr_
+
+ def get_hash(self, repr_, seed=None):
+ env = os.environ.copy()
+ env['__cleanenv'] = True # signal to assert_python not to do a copy
+ # of os.environ on its own
+ if seed is not None:
+ env['PYTHONHASHSEED'] = str(seed)
+ else:
+ env.pop('PYTHONHASHSEED', None)
+ out = assert_python_ok(
+ '-c', self.get_hash_command(repr_),
+ **env)
+ stdout = out[1].strip()
+ return int(stdout)
+
+ def test_randomized_hash(self):
+ # two runs should return different hashes
+ run1 = self.get_hash(self.repr_, seed='random')
+ run2 = self.get_hash(self.repr_, seed='random')
+ self.assertNotEqual(run1, run2)
+
+class StringlikeHashRandomizationTests(HashRandomizationTests):
+ def test_null_hash(self):
+ # PYTHONHASHSEED=0 disables the randomized hash
+ if IS_64BIT:
+ known_hash_of_obj = 1453079729188098211
+ else:
+ known_hash_of_obj = -1600925533
+
+ # Randomization is disabled by default:
+ self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
+
+ # It can also be disabled by setting the seed to 0:
+ self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+
+ def test_fixed_hash(self):
+ # test a fixed seed for the randomized hash
+ # Note that all types share the same values:
+ if IS_64BIT:
+ h = -4410911502303878509
+ else:
+ h = -206076799
+ self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+
+class StrHashRandomizationTests(StringlikeHashRandomizationTests):
+ repr_ = repr('abc')
+
+ def test_empty_string(self):
+ self.assertEqual(hash(""), 0)
+
+class BytesHashRandomizationTests(StringlikeHashRandomizationTests):
+ repr_ = repr(b'abc')
+
+ def test_empty_string(self):
+ self.assertEqual(hash(b""), 0)
+
+class DatetimeTests(HashRandomizationTests):
+ def get_hash_command(self, repr_):
+ return 'import datetime; print(hash(%s))' % repr_
+
+class DatetimeDateTests(DatetimeTests):
+ repr_ = repr(datetime.date(1066, 10, 14))
+
+class DatetimeDatetimeTests(DatetimeTests):
+ repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
+
+class DatetimeTimeTests(DatetimeTests):
+ repr_ = repr(datetime.time(0))
+
+
def test_main():
support.run_unittest(HashEqualityTestCase,
- HashInheritanceTestCase,
- HashBuiltinsTestCase)
+ HashInheritanceTestCase,
+ HashBuiltinsTestCase,
+ StrHashRandomizationTests,
+ BytesHashRandomizationTests,
+ DatetimeDateTests,
+ DatetimeDatetimeTests,
+ DatetimeTimeTests)
if __name__ == "__main__":
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index cbf0c16..bff4f0b 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -9,6 +9,7 @@ import warnings
import sys
import shutil
from test import support
+from test.script_helper import assert_python_ok
# Detect whether we're on a Linux system that uses the (now outdated
# and unmaintained) linuxthreads threading library. There's an issue
@@ -574,14 +575,33 @@ class DevNullTests(unittest.TestCase):
f.close()
class URandomTests(unittest.TestCase):
- def test_urandom(self):
- try:
- self.assertEqual(len(os.urandom(1)), 1)
- self.assertEqual(len(os.urandom(10)), 10)
- self.assertEqual(len(os.urandom(100)), 100)
- self.assertEqual(len(os.urandom(1000)), 1000)
- except NotImplementedError:
- pass
+ def test_urandom_length(self):
+ self.assertEqual(len(os.urandom(0)), 0)
+ self.assertEqual(len(os.urandom(1)), 1)
+ self.assertEqual(len(os.urandom(10)), 10)
+ self.assertEqual(len(os.urandom(100)), 100)
+ self.assertEqual(len(os.urandom(1000)), 1000)
+
+ def test_urandom_value(self):
+ data1 = os.urandom(16)
+ data2 = os.urandom(16)
+ self.assertNotEqual(data1, data2)
+
+ def get_urandom_subprocess(self, count):
+ code = '\n'.join((
+ 'import os, sys',
+ 'data = os.urandom(%s)' % count,
+ 'sys.stdout.buffer.write(data)',
+ 'sys.stdout.buffer.flush()'))
+ out = assert_python_ok('-c', code)
+ stdout = out[1]
+ self.assertEqual(len(stdout), 16)
+ return stdout
+
+ def test_urandom_subprocess(self):
+ data1 = self.get_urandom_subprocess(16)
+ data2 = self.get_urandom_subprocess(16)
+ self.assertNotEqual(data1, data2)
class ExecTests(unittest.TestCase):
@unittest.skipIf(USING_LINUXTHREADS,
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
index 99d5c70..5d5e232 100644
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -734,6 +734,17 @@ class TestBasicOps(unittest.TestCase):
if self.repr is not None:
self.assertEqual(repr(self.set), self.repr)
+ def check_repr_against_values(self):
+ text = repr(self.set)
+ self.assertTrue(text.startswith('{'))
+ self.assertTrue(text.endswith('}'))
+
+ result = text[1:-1].split(', ')
+ result.sort()
+ sorted_repr_values = [repr(value) for value in self.values]
+ sorted_repr_values.sort()
+ self.assertEqual(result, sorted_repr_values)
+
def test_print(self):
try:
fo = open(support.TESTFN, "w")
@@ -892,7 +903,9 @@ class TestBasicOpsString(TestBasicOps):
self.set = set(self.values)
self.dup = set(self.values)
self.length = 3
- self.repr = "{'a', 'c', 'b'}"
+
+ def test_repr(self):
+ self.check_repr_against_values()
#------------------------------------------------------------------------------
@@ -903,7 +916,9 @@ class TestBasicOpsBytes(TestBasicOps):
self.set = set(self.values)
self.dup = set(self.values)
self.length = 3
- self.repr = "{b'a', b'c', b'b'}"
+
+ def test_repr(self):
+ self.check_repr_against_values()
#------------------------------------------------------------------------------
@@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps):
self.set = set(self.values)
self.dup = set(self.values)
self.length = 4
- self.repr = "{'a', b'a', 'b', b'b'}"
def tearDown(self):
warnings.filters = self.warning_filters
+ def test_repr(self):
+ self.check_repr_against_values()
+
#==============================================================================
def baditer():
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 11685a4..7732c4c 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -446,7 +446,7 @@ class SysModuleTest(unittest.TestCase):
attrs = ("debug", "division_warning",
"inspect", "interactive", "optimize", "dont_write_bytecode",
"no_user_site", "no_site", "ignore_environment", "verbose",
- "bytes_warning")
+ "bytes_warning", "hash_randomization")
for attr in attrs:
self.assertTrue(hasattr(sys.flags, attr), attr)
self.assertEqual(type(getattr(sys.flags, attr)), int, attr)
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 4d3509a..482acc1 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -12,6 +12,7 @@ import os
import sys
import tempfile
import warnings
+import collections
def hexescape(char):
"""Escape char as RFC 2396 specifies"""
@@ -840,8 +841,9 @@ class urlencode_Tests(unittest.TestCase):
self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
self.assertEqual("a=None&a=a",
urllib.parse.urlencode({"a": [None, "a"]}, True))
+ data = collections.OrderedDict([("a", 1), ("b", 1)])
self.assertEqual("a=a&a=b",
- urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
+ urllib.parse.urlencode({"a": data}, True))
def test_urlencode_encoding(self):
# ASCII encoding. Expect %3F with errors="replace'
diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py
index df593cd..2303e4c 100644
--- a/Lib/tkinter/test/test_ttk/test_functions.py
+++ b/Lib/tkinter/test/test_ttk/test_functions.py
@@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.TestCase):
('a', 'b', 'c')), ("test {a b} c", ()))
# state spec and options
self.assertEqual(ttk._format_elemcreate('image', False, 'test',
- ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y")))
+ ('a', 'b'), a='x'), ("test a b", ("-a", "x")))
# format returned values as a tcl script
# state spec with multiple states and an option with a multivalue
self.assertEqual(ttk._format_elemcreate('image', True, 'test',