diff options
author | Inada Naoki <songofacandy@gmail.com> | 2021-10-10 08:29:46 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-10 08:29:46 (GMT) |
commit | ad970e8623523a8656e8c1ff4e1dff3423498a5a (patch) | |
tree | ddd33e1925765792c97a500b6d8bf88ca899d53c | |
parent | a1c3c9e8245a88cf37b084869b3559638116daf7 (diff) | |
download | cpython-ad970e8623523a8656e8c1ff4e1dff3423498a5a.zip cpython-ad970e8623523a8656e8c1ff4e1dff3423498a5a.tar.gz cpython-ad970e8623523a8656e8c1ff4e1dff3423498a5a.tar.bz2 |
bpo-29410: Change the default hash algorithm to SipHash13. (GH-28752)
Co-authored-by: Erlend Egeberg Aasland <erlend.aasland@innova.no>
Co-authored-by: Christian Heimes <christian@python.org>
-rw-r--r-- | Doc/using/configure.rst | 10 | ||||
-rw-r--r-- | Doc/whatsnew/3.11.rst | 5 | ||||
-rw-r--r-- | Include/pyhash.h | 8 | ||||
-rw-r--r-- | Lib/test/test_hash.py | 17 | ||||
-rw-r--r-- | Lib/test/test_imp.py | 4 | ||||
-rw-r--r-- | Lib/test/test_sys.py | 6 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst | 1 | ||||
-rw-r--r-- | Python/pyhash.c | 79 | ||||
-rwxr-xr-x | configure | 8 | ||||
-rw-r--r-- | configure.ac | 9 | ||||
-rw-r--r-- | pyconfig.h.in | 2 |
11 files changed, 123 insertions, 26 deletions
diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index a59bee4..75f572c 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -416,15 +416,19 @@ Libraries options Security Options ---------------- -.. cmdoption:: --with-hash-algorithm=[fnv|siphash24] +.. cmdoption:: --with-hash-algorithm=[fnv|siphash13|siphash24] Select hash algorithm for use in ``Python/pyhash.c``: - * ``siphash24`` (default). - * ``fnv``; + * ``siphash13`` (default); + * ``siphash24``; + * ``fnv``. .. versionadded:: 3.4 + .. versionadded:: 3.11 + ``siphash13`` is added and it is the new default. + .. cmdoption:: --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2 Built-in hash modules: diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index ff376d2..e3650a6 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -175,6 +175,11 @@ Other CPython Implementation Changes support :class:`typing.SupportsComplex` and :class:`typing.SupportsBytes` protocols. (Contributed by Mark Dickinson and Dong-hee Na in :issue:`24234`.) +* ``siphash13`` is added as a new internal hashing algorithms. It's has similar security + properties as ``siphash24`` but it is slightly faster for long inputs. ``str``, ``bytes``, + and some other types now use it as default algorithm for ``hash()``. :pep:`552` + hash-based pyc files now use ``siphash13``, too. + (Contributed by Inada Naoki in :issue:`29410`.) New Modules =========== diff --git a/Include/pyhash.h b/Include/pyhash.h index a314ea9..182d223 100644 --- a/Include/pyhash.h +++ b/Include/pyhash.h @@ -114,11 +114,10 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); /* hash algorithm selection * - * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the + * The values for Py_HASH_* are hard-coded in the * configure script. * - * - FNV is available on all platforms and architectures. - * - SIPHASH24 only works on platforms that don't require aligned memory for integers. + * - FNV and SIPHASH* are available on all platforms and architectures. * - With EXTERNAL embedders can provide an alternative implementation with:: * * PyHash_FuncDef PyHash_Func = {...}; @@ -128,10 +127,11 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); #define Py_HASH_EXTERNAL 0 #define Py_HASH_SIPHASH24 1 #define Py_HASH_FNV 2 +#define Py_HASH_SIPHASH13 3 #ifndef Py_HASH_ALGORITHM # ifndef HAVE_ALIGNED_REQUIRED -# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24 +# define Py_HASH_ALGORITHM Py_HASH_SIPHASH13 # else # define Py_HASH_ALGORITHM Py_HASH_FNV # endif /* uint64_t && uint32_t && aligned */ diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py index c68e0d8..cf9db66 100644 --- a/Lib/test/test_hash.py +++ b/Lib/test/test_hash.py @@ -42,8 +42,8 @@ def pysiphash(uint64): def skip_unless_internalhash(test): """Skip decorator for tests that depend on SipHash24 or FNV""" - ok = sys.hash_info.algorithm in {"fnv", "siphash24"} - msg = "Requires SipHash24 or FNV" + ok = sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"} + msg = "Requires SipHash13, SipHash24 or FNV" return test if ok else unittest.skip(msg)(test) @@ -206,6 +206,19 @@ class StringlikeHashRandomizationTests(HashRandomizationTests): # seed 42, 'abc' [-678966196, 573763426263223372, -820489388, -4282905804826039665], ], + 'siphash13': [ + # NOTE: PyUCS2 layout depends on endianness + # seed 0, 'abc' + [69611762, -4594863902769663758, 69611762, -4594863902769663758], + # seed 42, 'abc' + [-975800855, 3869580338025362921, -975800855, 3869580338025362921], + # seed 42, 'abcdefghijk' + [-595844228, 7764564197781545852, -595844228, 7764564197781545852], + # seed 0, 'äú∑ℇ' + [-1093288643, -2810468059467891395, -1041341092, 4925090034378237276], + # seed 42, 'äú∑ℇ' + [-585999602, -2845126246016066802, -817336969, -2219421378907968137], + ], 'siphash24': [ # NOTE: PyUCS2 layout depends on endianness # seed 0, 'abc' diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 99312cc..1a21025 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -351,8 +351,8 @@ class ImportTests(unittest.TestCase): self.assertEqual(_frozen_importlib.__spec__.origin, "frozen") def test_source_hash(self): - self.assertEqual(_imp.source_hash(42, b'hi'), b'\xc6\xe7Z\r\x03:}\xab') - self.assertEqual(_imp.source_hash(43, b'hi'), b'\x85\x9765\xf8\x9a\x8b9') + self.assertEqual(_imp.source_hash(42, b'hi'), b'\xfb\xd9G\x05\xaf$\x9b~') + self.assertEqual(_imp.source_hash(43, b'hi'), b'\xd0/\x87C\xccC\xff\xe2') def test_pyc_invalidation_mode_from_cmdline(self): cases = [ diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 6720cd6..93e39bc 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -508,7 +508,7 @@ class SysModuleTest(unittest.TestCase): self.assertIsInstance(sys.hash_info.nan, int) self.assertIsInstance(sys.hash_info.imag, int) algo = sysconfig.get_config_var("Py_HASH_ALGORITHM") - if sys.hash_info.algorithm in {"fnv", "siphash24"}: + if sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}: self.assertIn(sys.hash_info.hash_bits, {32, 64}) self.assertIn(sys.hash_info.seed_bits, {32, 64, 128}) @@ -516,8 +516,10 @@ class SysModuleTest(unittest.TestCase): self.assertEqual(sys.hash_info.algorithm, "siphash24") elif algo == 2: self.assertEqual(sys.hash_info.algorithm, "fnv") + elif algo == 3: + self.assertEqual(sys.hash_info.algorithm, "siphash13") else: - self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash24"}) + self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash13", "siphash24"}) else: # PY_HASH_EXTERNAL self.assertEqual(algo, 0) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst new file mode 100644 index 0000000..b08999e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst @@ -0,0 +1 @@ +Add SipHash13 for string hash algorithm and use it by default. diff --git a/Python/pyhash.c b/Python/pyhash.c index f0c8235..d5ac9f8 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -358,20 +358,73 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T, # define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) ) #endif -#define HALF_ROUND(a,b,c,d,s,t) \ - a += b; c += d; \ +#define HALF_ROUND(a,b,c,d,s,t) \ + a += b; c += d; \ b = ROTATE(b, s) ^ a; \ d = ROTATE(d, t) ^ c; \ a = ROTATE(a, 32); -#define DOUBLE_ROUND(v0,v1,v2,v3) \ - HALF_ROUND(v0,v1,v2,v3,13,16); \ - HALF_ROUND(v2,v1,v0,v3,17,21); \ - HALF_ROUND(v0,v1,v2,v3,13,16); \ +#define SINGLE_ROUND(v0,v1,v2,v3) \ + HALF_ROUND(v0,v1,v2,v3,13,16); \ HALF_ROUND(v2,v1,v0,v3,17,21); +#define DOUBLE_ROUND(v0,v1,v2,v3) \ + SINGLE_ROUND(v0,v1,v2,v3); \ + SINGLE_ROUND(v0,v1,v2,v3); + static uint64_t +siphash13(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) { + uint64_t b = (uint64_t)src_sz << 56; + const uint8_t *in = (const uint8_t*)src; + + uint64_t v0 = k0 ^ 0x736f6d6570736575ULL; + uint64_t v1 = k1 ^ 0x646f72616e646f6dULL; + uint64_t v2 = k0 ^ 0x6c7967656e657261ULL; + uint64_t v3 = k1 ^ 0x7465646279746573ULL; + + uint64_t t; + uint8_t *pt; + + while (src_sz >= 8) { + uint64_t mi; + memcpy(&mi, in, sizeof(mi)); + mi = _le64toh(mi); + in += sizeof(mi); + src_sz -= sizeof(mi); + v3 ^= mi; + SINGLE_ROUND(v0,v1,v2,v3); + v0 ^= mi; + } + + t = 0; + pt = (uint8_t *)&t; + switch (src_sz) { + case 7: pt[6] = in[6]; /* fall through */ + case 6: pt[5] = in[5]; /* fall through */ + case 5: pt[4] = in[4]; /* fall through */ + case 4: memcpy(pt, in, sizeof(uint32_t)); break; + case 3: pt[2] = in[2]; /* fall through */ + case 2: pt[1] = in[1]; /* fall through */ + case 1: pt[0] = in[0]; /* fall through */ + } + b |= _le64toh(t); + + v3 ^= b; + SINGLE_ROUND(v0,v1,v2,v3); + v0 ^= b; + v2 ^= 0xff; + SINGLE_ROUND(v0,v1,v2,v3); + SINGLE_ROUND(v0,v1,v2,v3); + SINGLE_ROUND(v0,v1,v2,v3); + + /* modified */ + t = (v0 ^ v1) ^ (v2 ^ v3); + return t; +} + +#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 +static uint64_t siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) { uint64_t b = (uint64_t)src_sz << 56; const uint8_t *in = (const uint8_t*)src; @@ -419,14 +472,26 @@ siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) { t = (v0 ^ v1) ^ (v2 ^ v3); return t; } +#endif uint64_t _Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz) { - return siphash24(key, 0, src, src_sz); + return siphash13(key, 0, src, src_sz); } +#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH13 +static Py_hash_t +pysiphash(const void *src, Py_ssize_t src_sz) { + return (Py_hash_t)siphash13( + _le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1), + src, src_sz); +} + +static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash13", 64, 128}; +#endif + #if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 static Py_hash_t pysiphash(const void *src, Py_ssize_t src_sz) { @@ -1561,9 +1561,9 @@ Optional Packages: --with-undefined-behavior-sanitizer enable UndefinedBehaviorSanitizer undefined behaviour detector, 'ubsan' (default is no) - --with-hash-algorithm=[fnv|siphash24] + --with-hash-algorithm=[fnv|siphash13|siphash24] select hash algorithm for use in Python/pyhash.c - (default is SipHash24) + (default is SipHash13) --with-tzpath=<list of absolute paths separated by pathsep> Select the default time zone search path for zoneinfo.TZPATH @@ -10431,6 +10431,10 @@ if test "${with_hash_algorithm+set}" = set; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5 $as_echo "$withval" >&6; } case "$withval" in + siphash13) + $as_echo "#define Py_HASH_ALGORITHM 3" >>confdefs.h + + ;; siphash24) $as_echo "#define Py_HASH_ALGORITHM 1" >>confdefs.h diff --git a/configure.ac b/configure.ac index c7cb797..6c65b29 100644 --- a/configure.ac +++ b/configure.ac @@ -3036,16 +3036,19 @@ fi # str, bytes and memoryview hash algorithm AH_TEMPLATE(Py_HASH_ALGORITHM, [Define hash algorithm for str, bytes and memoryview. - SipHash24: 1, FNV: 2, externally defined: 0]) + SipHash24: 1, FNV: 2, SipHash13: 3, externally defined: 0]) AC_MSG_CHECKING(for --with-hash-algorithm) dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output AC_ARG_WITH(hash_algorithm, - AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash24@:>@], - [select hash algorithm for use in Python/pyhash.c (default is SipHash24)]), + AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash13|siphash24@:>@], + [select hash algorithm for use in Python/pyhash.c (default is SipHash13)]), [ AC_MSG_RESULT($withval) case "$withval" in + siphash13) + AC_DEFINE(Py_HASH_ALGORITHM, 3) + ;; siphash24) AC_DEFINE(Py_HASH_ALGORITHM, 1) ;; diff --git a/pyconfig.h.in b/pyconfig.h.in index 862c083..3231cd6 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1439,7 +1439,7 @@ #undef Py_ENABLE_SHARED /* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2, - externally defined: 0 */ + SipHash13: 3, externally defined: 0 */ #undef Py_HASH_ALGORITHM /* Define if you want to enable tracing references for debugging purpose */ |