summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorChristian Heimes <christian@cheimes.de>2013-11-20 10:46:18 (GMT)
committerChristian Heimes <christian@cheimes.de>2013-11-20 10:46:18 (GMT)
commit985ecdcfc29adfc36ce2339acf03f819ad414869 (patch)
tree06a11f82271e768dbe49469c8736b65b083f671c /Lib
parentfe32aec25a8b36498d840bd69485e9bc94195b9c (diff)
downloadcpython-985ecdcfc29adfc36ce2339acf03f819ad414869.zip
cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.gz
cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.bz2
ssue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
Python now uses SipHash24 on all major platforms.
Diffstat (limited to 'Lib')
-rwxr-xr-xLib/test/regrtest.py2
-rw-r--r--Lib/test/test_hash.py150
-rw-r--r--Lib/test/test_sys.py23
3 files changed, 159 insertions, 16 deletions
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
index a5d707e..c1c831f 100755
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -601,6 +601,8 @@ def main(tests=None, **kwargs):
print("==", platform.python_implementation(), *sys.version.split())
print("== ", platform.platform(aliased=True),
"%s-endian" % sys.byteorder)
+ print("== ", "hash algorithm:", sys.hash_info.algorithm,
+ "64bit" if sys.maxsize > 2**32 else "32bit")
print("== ", os.getcwd())
print("Testing with flags:", sys.flags)
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
index e3ab6e4..66e4155 100644
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -12,6 +12,40 @@ from collections import Hashable
IS_64BIT = sys.maxsize > 2**32
+def lcg(x, length=16):
+ """Linear congruential generator"""
+ if x == 0:
+ return bytes(length)
+ out = bytearray(length)
+ for i in range(length):
+ x = (214013 * x + 2531011) & 0x7fffffff
+ out[i] = (x >> 16) & 0xff
+ return bytes(out)
+
+def pysiphash(uint64):
+ """Convert SipHash24 output to Py_hash_t
+ """
+ assert 0 <= uint64 < (1 << 64)
+ # simple unsigned to signed int64
+ if uint64 > (1 << 63) - 1:
+ int64 = uint64 - (1 << 64)
+ else:
+ int64 = uint64
+ # mangle uint64 to uint32
+ uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff
+ # simple unsigned to signed int32
+ if uint32 > (1 << 31) - 1:
+ int32 = uint32 - (1 << 32)
+ else:
+ int32 = uint32
+ return int32, int64
+
+def skip_unless_internalhash(test):
+ """Skip decorator for tests that depend on SipHash24 or FNV"""
+ ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
+ msg = "Requires SipHash24 or FNV"
+ return test if ok else unittest.skip(msg)(test)
+
class HashEqualityTestCase(unittest.TestCase):
@@ -138,7 +172,7 @@ class HashRandomizationTests:
# an object to be tested
def get_hash_command(self, repr_):
- return 'print(hash(%s))' % repr_
+ return 'print(hash(eval(%s.decode("utf-8"))))' % repr_.encode("utf-8")
def get_hash(self, repr_, seed=None):
env = os.environ.copy()
@@ -161,12 +195,67 @@ class HashRandomizationTests:
self.assertNotEqual(run1, run2)
class StringlikeHashRandomizationTests(HashRandomizationTests):
+ repr_ = None
+ repr_long = None
+
+ # 32bit little, 64bit little, 32bit big, 64bit big
+ known_hashes = {
+ 'djba33x': [ # only used for small strings
+ # seed 0, 'abc'
+ [193485960, 193485960, 193485960, 193485960],
+ # seed 42, 'abc'
+ [-678966196, 573763426263223372, -820489388, -4282905804826039665],
+ ],
+ 'siphash24': [
+ # seed 0, 'abc'
+ [2025351752, 4596069200710135518, 1433332804,
+ -3481057401533226760],
+ # seed 42, 'abc'
+ [-774632014, -4501618152524544106, 1054608210,
+ -1493500025205289231],
+ # seed 42, 'abcdefghijk'
+ [-1436007334, 4436719588892876975, -1436007334,
+ 4436719588892876975],
+ # seed 0, 'äú∑ℇ', PyUCS2 layout depends on endianess
+ [1386693832, 5749986484189612790, 1776982909,
+ -5915111450199468540],
+ # seed 42, 'äú∑ℇ'
+ [1260387190, -2947981342227738144, 1430287772,
+ -4296699217652516017],
+ ],
+ 'fnv': [
+ # seed 0, 'abc'
+ [-1600925533, 1453079729188098211, -1600925533,
+ 1453079729188098211],
+ # seed 42, 'abc'
+ [-206076799, -4410911502303878509, -1024014457,
+ -3570150969479994130],
+ # seed 42, 'abcdefghijk'
+ [811136751, -5046230049376118746, -77208053 ,
+ -4779029615281019666],
+ # seed 0, 'äú∑ℇ'
+ [44402817, 8998297579845987431, -1956240331,
+ -782697888614047887],
+ # seed 42, 'äú∑ℇ'
+ [-283066365, -4576729883824601543, -271871407, None],
+ ]
+ }
+
+ def get_expected_hash(self, position, length):
+ if length < sys.hash_info.cutoff:
+ algorithm = "djba33x"
+ else:
+ algorithm = sys.hash_info.algorithm
+ if sys.byteorder == 'little':
+ platform = 1 if IS_64BIT else 0
+ else:
+ assert(sys.byteorder == 'big')
+ platform = 3 if IS_64BIT else 2
+ return self.known_hashes[algorithm][position][platform]
+
def test_null_hash(self):
# PYTHONHASHSEED=0 disables the randomized hash
- if IS_64BIT:
- known_hash_of_obj = 1453079729188098211
- else:
- known_hash_of_obj = -1600925533
+ known_hash_of_obj = self.get_expected_hash(0, 3)
# Randomization is enabled by default:
self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
@@ -174,39 +263,53 @@ class StringlikeHashRandomizationTests(HashRandomizationTests):
# It can also be disabled by setting the seed to 0:
self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+ @skip_unless_internalhash
def test_fixed_hash(self):
# test a fixed seed for the randomized hash
# Note that all types share the same values:
- if IS_64BIT:
- if sys.byteorder == 'little':
- h = -4410911502303878509
- else:
- h = -3570150969479994130
- else:
- if sys.byteorder == 'little':
- h = -206076799
- else:
- h = -1024014457
+ h = self.get_expected_hash(1, 3)
self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+ @skip_unless_internalhash
+ def test_long_fixed_hash(self):
+ if self.repr_long is None:
+ return
+ h = self.get_expected_hash(2, 11)
+ self.assertEqual(self.get_hash(self.repr_long, seed=42), h)
+
+
class StrHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = repr('abc')
+ repr_long = repr('abcdefghijk')
+ repr_ucs2 = repr('äú∑ℇ')
+ @skip_unless_internalhash
def test_empty_string(self):
self.assertEqual(hash(""), 0)
+ @skip_unless_internalhash
+ def test_ucs2_string(self):
+ h = self.get_expected_hash(3, 6)
+ self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h)
+ h = self.get_expected_hash(4, 6)
+ self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h)
+
class BytesHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = repr(b'abc')
+ repr_long = repr(b'abcdefghijk')
+ @skip_unless_internalhash
def test_empty_string(self):
self.assertEqual(hash(b""), 0)
class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = "memoryview(b'abc')"
+ repr_long = "memoryview(b'abcdefghijk')"
+ @skip_unless_internalhash
def test_empty_string(self):
self.assertEqual(hash(memoryview(b"")), 0)
@@ -224,5 +327,22 @@ class DatetimeTimeTests(DatetimeTests, unittest.TestCase):
repr_ = repr(datetime.time(0))
+class HashDistributionTestCase(unittest.TestCase):
+
+ def test_hash_distribution(self):
+ # check for hash collision
+ base = "abcdefghabcdefg"
+ for i in range(1, len(base)):
+ prefix = base[:i]
+ s15 = set()
+ s255 = set()
+ for c in range(256):
+ h = hash(prefix + chr(c))
+ s15.add(h & 0xf)
+ s255.add(h & 0xff)
+ # SipHash24 distribution depends on key, usually > 60%
+ self.assertGreater(len(s15), 8, prefix)
+ self.assertGreater(len(s255), 128, prefix)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 0565f39..f0c7148 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -8,6 +8,7 @@ import operator
import codecs
import gc
import sysconfig
+import platform
# count the number of test runs, used to create unique
# strings to intern in test_intern()
@@ -431,7 +432,7 @@ class SysModuleTest(unittest.TestCase):
self.assertEqual(type(sys.int_info.sizeof_digit), int)
self.assertIsInstance(sys.hexversion, int)
- self.assertEqual(len(sys.hash_info), 5)
+ self.assertEqual(len(sys.hash_info), 9)
self.assertLess(sys.hash_info.modulus, 2**sys.hash_info.width)
# sys.hash_info.modulus should be a prime; we do a quick
# probable primality test (doesn't exclude the possibility of
@@ -446,6 +447,26 @@ class SysModuleTest(unittest.TestCase):
self.assertIsInstance(sys.hash_info.inf, int)
self.assertIsInstance(sys.hash_info.nan, int)
self.assertIsInstance(sys.hash_info.imag, int)
+ algo = sysconfig.get_config_var("PY_HASH_ALGORITHM")
+ if sys.hash_info.algorithm in {"fnv", "siphash24"}:
+ self.assertIn(sys.hash_info.hash_bits, {32, 64})
+ self.assertIn(sys.hash_info.seed_bits, {32, 64, 128})
+
+ if algo == 1:
+ self.assertEqual(sys.hash_info.algorithm, "siphash24")
+ elif algo == 2:
+ self.assertEqual(sys.hash_info.algorithm, "fnv")
+ else:
+ processor = platform.processor().lower()
+ if processor in {"sparc", "mips"}:
+ self.assertEqual(sys.hash_info.algorithm, "fnv")
+ else:
+ self.assertEqual(sys.hash_info.algorithm, "siphash24")
+ else:
+ # PY_HASH_EXTERNAL
+ self.assertEqual(algo, 0)
+ self.assertGreaterEqual(sys.hash_info.cutoff, 0)
+ self.assertLess(sys.hash_info.cutoff, 8)
self.assertIsInstance(sys.maxsize, int)
self.assertIsInstance(sys.maxunicode, int)