summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorChristian Heimes <christian@cheimes.de>2013-11-20 10:46:18 (GMT)
committerChristian Heimes <christian@cheimes.de>2013-11-20 10:46:18 (GMT)
commit985ecdcfc29adfc36ce2339acf03f819ad414869 (patch)
tree06a11f82271e768dbe49469c8736b65b083f671c /Include
parentfe32aec25a8b36498d840bd69485e9bc94195b9c (diff)
downloadcpython-985ecdcfc29adfc36ce2339acf03f819ad414869.zip
cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.gz
cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.bz2
ssue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
Python now uses SipHash24 on all major platforms.
Diffstat (limited to 'Include')
-rw-r--r--Include/Python.h1
-rw-r--r--Include/object.h17
-rw-r--r--Include/pyhash.h147
-rw-r--r--Include/pyport.h19
4 files changed, 150 insertions, 34 deletions
diff --git a/Include/Python.h b/Include/Python.h
index a78a721..2dd8290 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -68,6 +68,7 @@
#include "object.h"
#include "objimpl.h"
#include "typeslots.h"
+#include "pyhash.h"
#include "pydebug.h"
diff --git a/Include/object.h b/Include/object.h
index a6130fe..8de2208 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -562,23 +562,6 @@ PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *);
PyAPI_FUNC(int) Py_ReprEnter(PyObject *);
PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
-/* Helpers for hash functions */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
-PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
-PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
-#endif
-
-typedef struct {
- Py_hash_t prefix;
- Py_hash_t suffix;
-} _Py_HashSecret_t;
-PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
-
-#ifdef Py_DEBUG
-PyAPI_DATA(int) _Py_HashSecret_Initialized;
-#endif
-
/* Helper for passing objects to printf and the like */
#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))
diff --git a/Include/pyhash.h b/Include/pyhash.h
new file mode 100644
index 0000000..83c9281
--- /dev/null
+++ b/Include/pyhash.h
@@ -0,0 +1,147 @@
+#ifndef Py_HASH_H
+
+#define Py_HASH_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Helpers for hash functions */
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
+PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
+PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
+#endif
+
+/* Prime multiplier used in string and various other hashes. */
+#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
+
+/* Parameters used for the numeric hash implementation. See notes for
+ _Py_HashDouble in Objects/object.c. Numeric hashes are based on
+ reduction modulo the prime 2**_PyHASH_BITS - 1. */
+
+#if SIZEOF_VOID_P >= 8
+# define _PyHASH_BITS 61
+#else
+# define _PyHASH_BITS 31
+#endif
+
+#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
+#define _PyHASH_INF 314159
+#define _PyHASH_NAN 0
+#define _PyHASH_IMAG _PyHASH_MULTIPLIER
+
+
+/* hash secret
+ *
+ * memory layout on 64 bit systems
+ * cccccccc cccccccc cccccccc uc -- unsigned char[24]
+ * pppppppp ssssssss ........ fnv -- two Py_hash_t
+ * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T
+ * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
+ * ........ ........ eeeeeeee pyexpat XML hash salt
+ *
+ * memory layout on 32 bit systems
+ * cccccccc cccccccc cccccccc uc
+ * ppppssss ........ ........ fnv -- two Py_hash_t
+ * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*)
+ * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
+ * ........ ........ eeee.... pyexpat XML hash salt
+ *
+ * (*) The siphash member may not be available on 32 bit platforms without
+ * an unsigned int64 data type.
+ */
+typedef union {
+ /* ensure 24 bytes */
+ unsigned char uc[24];
+ /* two Py_hash_t for FNV */
+ struct {
+ Py_hash_t prefix;
+ Py_hash_t suffix;
+ } fnv;
+#ifdef PY_UINT64_T
+ /* two uint64 for SipHash24 */
+ struct {
+ PY_UINT64_T k0;
+ PY_UINT64_T k1;
+ } siphash;
+#endif
+ /* a different (!) Py_hash_t for small string optimization */
+ struct {
+ unsigned char padding[16];
+ Py_hash_t suffix;
+ } djbx33a;
+ struct {
+ unsigned char padding[16];
+ Py_hash_t hashsalt;
+ } expat;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
+
+#ifdef Py_DEBUG
+PyAPI_DATA(int) _Py_HashSecret_Initialized;
+#endif
+
+
+/* hash function definition */
+#ifndef Py_LIMITED_API
+typedef struct {
+ Py_hash_t (*const hash)(const void *, Py_ssize_t);
+ const char *name;
+ const int hash_bits;
+ const int seed_bits;
+} PyHash_FuncDef;
+
+PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
+#endif
+
+
+/* cutoff for small string DJBX33A optimization in range [1, cutoff).
+ *
+ * About 50% of the strings in a typical Python application are smaller than
+ * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
+ * NEVER use DJBX33A for long strings!
+ *
+ * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
+ * should use a smaller cutoff because it is easier to create colliding
+ * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should
+ * provide a decent safety margin.
+ */
+#ifndef Py_HASH_CUTOFF
+# define Py_HASH_CUTOFF 0
+#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0)
+# error Py_HASH_CUTOFF must in range 0...7.
+#endif /* Py_HASH_CUTOFF */
+
+
+/* hash algorithm selection
+ *
+ * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
+ * configure script.
+ *
+ * - FNV is available on all platforms and architectures.
+ * - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't
+ * require aligned memory for integers.
+ * - With EXTERNAL embedders can provide an alternative implementation with::
+ *
+ * PyHash_FuncDef PyHash_Func = {...};
+ *
+ * XXX: Figure out __declspec() for extern PyHash_FuncDef.
+ */
+#define Py_HASH_EXTERNAL 0
+#define Py_HASH_SIPHASH24 1
+#define Py_HASH_FNV 2
+
+#ifndef Py_HASH_ALGORITHM
+# if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \
+ && !defined(HAVE_ALIGNED_REQUIRED))
+# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
+# else
+# define Py_HASH_ALGORITHM Py_HASH_FNV
+# endif /* uint64_t && uint32_t && aligned */
+#endif /* Py_HASH_ALGORITHM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !Py_HASH_H */
diff --git a/Include/pyport.h b/Include/pyport.h
index ca20b22..b6b426a 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -144,23 +144,6 @@ Used in: PY_LONG_LONG
#endif
#endif
-/* Prime multiplier used in string and various other hashes. */
-#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
-
-/* Parameters used for the numeric hash implementation. See notes for
- _Py_HashDouble in Objects/object.c. Numeric hashes are based on
- reduction modulo the prime 2**_PyHASH_BITS - 1. */
-
-#if SIZEOF_VOID_P >= 8
-#define _PyHASH_BITS 61
-#else
-#define _PyHASH_BITS 31
-#endif
-#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
-#define _PyHASH_INF 314159
-#define _PyHASH_NAN 0
-#define _PyHASH_IMAG _PyHASH_MULTIPLIER
-
/* uintptr_t is the C9X name for an unsigned integral type such that a
* legitimate void* can be cast to uintptr_t and then back to void* again
* without loss of information. Similarly for intptr_t, wrt a signed
@@ -199,8 +182,10 @@ typedef Py_intptr_t Py_ssize_t;
#endif
/* Py_hash_t is the same size as a pointer. */
+#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T
typedef Py_ssize_t Py_hash_t;
/* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */
+#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T
typedef size_t Py_uhash_t;
/* Largest possible value of size_t.