summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2021-10-10 08:29:46 (GMT)
committerGitHub <noreply@github.com>2021-10-10 08:29:46 (GMT)
commitad970e8623523a8656e8c1ff4e1dff3423498a5a (patch)
treeddd33e1925765792c97a500b6d8bf88ca899d53c
parenta1c3c9e8245a88cf37b084869b3559638116daf7 (diff)
downloadcpython-ad970e8623523a8656e8c1ff4e1dff3423498a5a.zip
cpython-ad970e8623523a8656e8c1ff4e1dff3423498a5a.tar.gz
cpython-ad970e8623523a8656e8c1ff4e1dff3423498a5a.tar.bz2
bpo-29410: Change the default hash algorithm to SipHash13. (GH-28752)
Co-authored-by: Erlend Egeberg Aasland <erlend.aasland@innova.no> Co-authored-by: Christian Heimes <christian@python.org>
-rw-r--r--Doc/using/configure.rst10
-rw-r--r--Doc/whatsnew/3.11.rst5
-rw-r--r--Include/pyhash.h8
-rw-r--r--Lib/test/test_hash.py17
-rw-r--r--Lib/test/test_imp.py4
-rw-r--r--Lib/test/test_sys.py6
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst1
-rw-r--r--Python/pyhash.c79
-rwxr-xr-xconfigure8
-rw-r--r--configure.ac9
-rw-r--r--pyconfig.h.in2
11 files changed, 123 insertions, 26 deletions
diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst
index a59bee4..75f572c 100644
--- a/Doc/using/configure.rst
+++ b/Doc/using/configure.rst
@@ -416,15 +416,19 @@ Libraries options
Security Options
----------------
-.. cmdoption:: --with-hash-algorithm=[fnv|siphash24]
+.. cmdoption:: --with-hash-algorithm=[fnv|siphash13|siphash24]
Select hash algorithm for use in ``Python/pyhash.c``:
- * ``siphash24`` (default).
- * ``fnv``;
+ * ``siphash13`` (default);
+ * ``siphash24``;
+ * ``fnv``.
.. versionadded:: 3.4
+ .. versionadded:: 3.11
+ ``siphash13`` is added and it is the new default.
+
.. cmdoption:: --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2
Built-in hash modules:
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
index ff376d2..e3650a6 100644
--- a/Doc/whatsnew/3.11.rst
+++ b/Doc/whatsnew/3.11.rst
@@ -175,6 +175,11 @@ Other CPython Implementation Changes
support :class:`typing.SupportsComplex` and :class:`typing.SupportsBytes` protocols.
(Contributed by Mark Dickinson and Dong-hee Na in :issue:`24234`.)
+* ``siphash13`` is added as a new internal hashing algorithms. It's has similar security
+ properties as ``siphash24`` but it is slightly faster for long inputs. ``str``, ``bytes``,
+ and some other types now use it as default algorithm for ``hash()``. :pep:`552`
+ hash-based pyc files now use ``siphash13``, too.
+ (Contributed by Inada Naoki in :issue:`29410`.)
New Modules
===========
diff --git a/Include/pyhash.h b/Include/pyhash.h
index a314ea9..182d223 100644
--- a/Include/pyhash.h
+++ b/Include/pyhash.h
@@ -114,11 +114,10 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
/* hash algorithm selection
*
- * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
+ * The values for Py_HASH_* are hard-coded in the
* configure script.
*
- * - FNV is available on all platforms and architectures.
- * - SIPHASH24 only works on platforms that don't require aligned memory for integers.
+ * - FNV and SIPHASH* are available on all platforms and architectures.
* - With EXTERNAL embedders can provide an alternative implementation with::
*
* PyHash_FuncDef PyHash_Func = {...};
@@ -128,10 +127,11 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#define Py_HASH_EXTERNAL 0
#define Py_HASH_SIPHASH24 1
#define Py_HASH_FNV 2
+#define Py_HASH_SIPHASH13 3
#ifndef Py_HASH_ALGORITHM
# ifndef HAVE_ALIGNED_REQUIRED
-# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
+# define Py_HASH_ALGORITHM Py_HASH_SIPHASH13
# else
# define Py_HASH_ALGORITHM Py_HASH_FNV
# endif /* uint64_t && uint32_t && aligned */
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
index c68e0d8..cf9db66 100644
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -42,8 +42,8 @@ def pysiphash(uint64):
def skip_unless_internalhash(test):
"""Skip decorator for tests that depend on SipHash24 or FNV"""
- ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
- msg = "Requires SipHash24 or FNV"
+ ok = sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}
+ msg = "Requires SipHash13, SipHash24 or FNV"
return test if ok else unittest.skip(msg)(test)
@@ -206,6 +206,19 @@ class StringlikeHashRandomizationTests(HashRandomizationTests):
# seed 42, 'abc'
[-678966196, 573763426263223372, -820489388, -4282905804826039665],
],
+ 'siphash13': [
+ # NOTE: PyUCS2 layout depends on endianness
+ # seed 0, 'abc'
+ [69611762, -4594863902769663758, 69611762, -4594863902769663758],
+ # seed 42, 'abc'
+ [-975800855, 3869580338025362921, -975800855, 3869580338025362921],
+ # seed 42, 'abcdefghijk'
+ [-595844228, 7764564197781545852, -595844228, 7764564197781545852],
+ # seed 0, 'äú∑ℇ'
+ [-1093288643, -2810468059467891395, -1041341092, 4925090034378237276],
+ # seed 42, 'äú∑ℇ'
+ [-585999602, -2845126246016066802, -817336969, -2219421378907968137],
+ ],
'siphash24': [
# NOTE: PyUCS2 layout depends on endianness
# seed 0, 'abc'
diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py
index 99312cc..1a21025 100644
--- a/Lib/test/test_imp.py
+++ b/Lib/test/test_imp.py
@@ -351,8 +351,8 @@ class ImportTests(unittest.TestCase):
self.assertEqual(_frozen_importlib.__spec__.origin, "frozen")
def test_source_hash(self):
- self.assertEqual(_imp.source_hash(42, b'hi'), b'\xc6\xe7Z\r\x03:}\xab')
- self.assertEqual(_imp.source_hash(43, b'hi'), b'\x85\x9765\xf8\x9a\x8b9')
+ self.assertEqual(_imp.source_hash(42, b'hi'), b'\xfb\xd9G\x05\xaf$\x9b~')
+ self.assertEqual(_imp.source_hash(43, b'hi'), b'\xd0/\x87C\xccC\xff\xe2')
def test_pyc_invalidation_mode_from_cmdline(self):
cases = [
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 6720cd6..93e39bc 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -508,7 +508,7 @@ class SysModuleTest(unittest.TestCase):
self.assertIsInstance(sys.hash_info.nan, int)
self.assertIsInstance(sys.hash_info.imag, int)
algo = sysconfig.get_config_var("Py_HASH_ALGORITHM")
- if sys.hash_info.algorithm in {"fnv", "siphash24"}:
+ if sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}:
self.assertIn(sys.hash_info.hash_bits, {32, 64})
self.assertIn(sys.hash_info.seed_bits, {32, 64, 128})
@@ -516,8 +516,10 @@ class SysModuleTest(unittest.TestCase):
self.assertEqual(sys.hash_info.algorithm, "siphash24")
elif algo == 2:
self.assertEqual(sys.hash_info.algorithm, "fnv")
+ elif algo == 3:
+ self.assertEqual(sys.hash_info.algorithm, "siphash13")
else:
- self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash24"})
+ self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash13", "siphash24"})
else:
# PY_HASH_EXTERNAL
self.assertEqual(algo, 0)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst
new file mode 100644
index 0000000..b08999e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-07-19-09-12.bpo-29410.bg5SYp.rst
@@ -0,0 +1 @@
+Add SipHash13 for string hash algorithm and use it by default.
diff --git a/Python/pyhash.c b/Python/pyhash.c
index f0c8235..d5ac9f8 100644
--- a/Python/pyhash.c
+++ b/Python/pyhash.c
@@ -358,20 +358,73 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
# define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
#endif
-#define HALF_ROUND(a,b,c,d,s,t) \
- a += b; c += d; \
+#define HALF_ROUND(a,b,c,d,s,t) \
+ a += b; c += d; \
b = ROTATE(b, s) ^ a; \
d = ROTATE(d, t) ^ c; \
a = ROTATE(a, 32);
-#define DOUBLE_ROUND(v0,v1,v2,v3) \
- HALF_ROUND(v0,v1,v2,v3,13,16); \
- HALF_ROUND(v2,v1,v0,v3,17,21); \
- HALF_ROUND(v0,v1,v2,v3,13,16); \
+#define SINGLE_ROUND(v0,v1,v2,v3) \
+ HALF_ROUND(v0,v1,v2,v3,13,16); \
HALF_ROUND(v2,v1,v0,v3,17,21);
+#define DOUBLE_ROUND(v0,v1,v2,v3) \
+ SINGLE_ROUND(v0,v1,v2,v3); \
+ SINGLE_ROUND(v0,v1,v2,v3);
+
static uint64_t
+siphash13(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
+ uint64_t b = (uint64_t)src_sz << 56;
+ const uint8_t *in = (const uint8_t*)src;
+
+ uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
+ uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
+ uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
+ uint64_t v3 = k1 ^ 0x7465646279746573ULL;
+
+ uint64_t t;
+ uint8_t *pt;
+
+ while (src_sz >= 8) {
+ uint64_t mi;
+ memcpy(&mi, in, sizeof(mi));
+ mi = _le64toh(mi);
+ in += sizeof(mi);
+ src_sz -= sizeof(mi);
+ v3 ^= mi;
+ SINGLE_ROUND(v0,v1,v2,v3);
+ v0 ^= mi;
+ }
+
+ t = 0;
+ pt = (uint8_t *)&t;
+ switch (src_sz) {
+ case 7: pt[6] = in[6]; /* fall through */
+ case 6: pt[5] = in[5]; /* fall through */
+ case 5: pt[4] = in[4]; /* fall through */
+ case 4: memcpy(pt, in, sizeof(uint32_t)); break;
+ case 3: pt[2] = in[2]; /* fall through */
+ case 2: pt[1] = in[1]; /* fall through */
+ case 1: pt[0] = in[0]; /* fall through */
+ }
+ b |= _le64toh(t);
+
+ v3 ^= b;
+ SINGLE_ROUND(v0,v1,v2,v3);
+ v0 ^= b;
+ v2 ^= 0xff;
+ SINGLE_ROUND(v0,v1,v2,v3);
+ SINGLE_ROUND(v0,v1,v2,v3);
+ SINGLE_ROUND(v0,v1,v2,v3);
+
+ /* modified */
+ t = (v0 ^ v1) ^ (v2 ^ v3);
+ return t;
+}
+
+#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
+static uint64_t
siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
uint64_t b = (uint64_t)src_sz << 56;
const uint8_t *in = (const uint8_t*)src;
@@ -419,14 +472,26 @@ siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
t = (v0 ^ v1) ^ (v2 ^ v3);
return t;
}
+#endif
uint64_t
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
{
- return siphash24(key, 0, src, src_sz);
+ return siphash13(key, 0, src, src_sz);
}
+#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH13
+static Py_hash_t
+pysiphash(const void *src, Py_ssize_t src_sz) {
+ return (Py_hash_t)siphash13(
+ _le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
+ src, src_sz);
+}
+
+static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash13", 64, 128};
+#endif
+
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
static Py_hash_t
pysiphash(const void *src, Py_ssize_t src_sz) {
diff --git a/configure b/configure
index 3a6cf30..15c7c54 100755
--- a/configure
+++ b/configure
@@ -1561,9 +1561,9 @@ Optional Packages:
--with-undefined-behavior-sanitizer
enable UndefinedBehaviorSanitizer undefined
behaviour detector, 'ubsan' (default is no)
- --with-hash-algorithm=[fnv|siphash24]
+ --with-hash-algorithm=[fnv|siphash13|siphash24]
select hash algorithm for use in Python/pyhash.c
- (default is SipHash24)
+ (default is SipHash13)
--with-tzpath=<list of absolute paths separated by pathsep>
Select the default time zone search path for zoneinfo.TZPATH
@@ -10431,6 +10431,10 @@ if test "${with_hash_algorithm+set}" = set; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
$as_echo "$withval" >&6; }
case "$withval" in
+ siphash13)
+ $as_echo "#define Py_HASH_ALGORITHM 3" >>confdefs.h
+
+ ;;
siphash24)
$as_echo "#define Py_HASH_ALGORITHM 1" >>confdefs.h
diff --git a/configure.ac b/configure.ac
index c7cb797..6c65b29 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3036,16 +3036,19 @@ fi
# str, bytes and memoryview hash algorithm
AH_TEMPLATE(Py_HASH_ALGORITHM,
[Define hash algorithm for str, bytes and memoryview.
- SipHash24: 1, FNV: 2, externally defined: 0])
+ SipHash24: 1, FNV: 2, SipHash13: 3, externally defined: 0])
AC_MSG_CHECKING(for --with-hash-algorithm)
dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output
AC_ARG_WITH(hash_algorithm,
- AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash24@:>@],
- [select hash algorithm for use in Python/pyhash.c (default is SipHash24)]),
+ AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash13|siphash24@:>@],
+ [select hash algorithm for use in Python/pyhash.c (default is SipHash13)]),
[
AC_MSG_RESULT($withval)
case "$withval" in
+ siphash13)
+ AC_DEFINE(Py_HASH_ALGORITHM, 3)
+ ;;
siphash24)
AC_DEFINE(Py_HASH_ALGORITHM, 1)
;;
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 862c083..3231cd6 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -1439,7 +1439,7 @@
#undef Py_ENABLE_SHARED
/* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2,
- externally defined: 0 */
+ SipHash13: 3, externally defined: 0 */
#undef Py_HASH_ALGORITHM
/* Define if you want to enable tracing references for debugging purpose */