diff options
author | Christian Heimes <christian@cheimes.de> | 2013-11-20 10:46:18 (GMT) |
---|---|---|
committer | Christian Heimes <christian@cheimes.de> | 2013-11-20 10:46:18 (GMT) |
commit | 985ecdcfc29adfc36ce2339acf03f819ad414869 (patch) | |
tree | 06a11f82271e768dbe49469c8736b65b083f671c /Include | |
parent | fe32aec25a8b36498d840bd69485e9bc94195b9c (diff) | |
download | cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.zip cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.gz cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.bz2 |
ssue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
Python now uses SipHash24 on all major platforms.
Diffstat (limited to 'Include')
-rw-r--r-- | Include/Python.h | 1 | ||||
-rw-r--r-- | Include/object.h | 17 | ||||
-rw-r--r-- | Include/pyhash.h | 147 | ||||
-rw-r--r-- | Include/pyport.h | 19 |
4 files changed, 150 insertions, 34 deletions
diff --git a/Include/Python.h b/Include/Python.h index a78a721..2dd8290 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -68,6 +68,7 @@ #include "object.h" #include "objimpl.h" #include "typeslots.h" +#include "pyhash.h" #include "pydebug.h" diff --git a/Include/object.h b/Include/object.h index a6130fe..8de2208 100644 --- a/Include/object.h +++ b/Include/object.h @@ -562,23 +562,6 @@ PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *); PyAPI_FUNC(int) Py_ReprEnter(PyObject *); PyAPI_FUNC(void) Py_ReprLeave(PyObject *); -/* Helpers for hash functions */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); -PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); -PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t); -#endif - -typedef struct { - Py_hash_t prefix; - Py_hash_t suffix; -} _Py_HashSecret_t; -PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; - -#ifdef Py_DEBUG -PyAPI_DATA(int) _Py_HashSecret_Initialized; -#endif - /* Helper for passing objects to printf and the like */ #define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj)) diff --git a/Include/pyhash.h b/Include/pyhash.h new file mode 100644 index 0000000..83c9281 --- /dev/null +++ b/Include/pyhash.h @@ -0,0 +1,147 @@ +#ifndef Py_HASH_H + +#define Py_HASH_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Helpers for hash functions */ +#ifndef Py_LIMITED_API +PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); +PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); +PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); +#endif + +/* Prime multiplier used in string and various other hashes. */ +#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ + +/* Parameters used for the numeric hash implementation. See notes for + _Py_HashDouble in Objects/object.c. Numeric hashes are based on + reduction modulo the prime 2**_PyHASH_BITS - 1. */ + +#if SIZEOF_VOID_P >= 8 +# define _PyHASH_BITS 61 +#else +# define _PyHASH_BITS 31 +#endif + +#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) +#define _PyHASH_INF 314159 +#define _PyHASH_NAN 0 +#define _PyHASH_IMAG _PyHASH_MULTIPLIER + + +/* hash secret + * + * memory layout on 64 bit systems + * cccccccc cccccccc cccccccc uc -- unsigned char[24] + * pppppppp ssssssss ........ fnv -- two Py_hash_t + * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T + * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t + * ........ ........ eeeeeeee pyexpat XML hash salt + * + * memory layout on 32 bit systems + * cccccccc cccccccc cccccccc uc + * ppppssss ........ ........ fnv -- two Py_hash_t + * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*) + * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t + * ........ ........ eeee.... pyexpat XML hash salt + * + * (*) The siphash member may not be available on 32 bit platforms without + * an unsigned int64 data type. + */ +typedef union { + /* ensure 24 bytes */ + unsigned char uc[24]; + /* two Py_hash_t for FNV */ + struct { + Py_hash_t prefix; + Py_hash_t suffix; + } fnv; +#ifdef PY_UINT64_T + /* two uint64 for SipHash24 */ + struct { + PY_UINT64_T k0; + PY_UINT64_T k1; + } siphash; +#endif + /* a different (!) Py_hash_t for small string optimization */ + struct { + unsigned char padding[16]; + Py_hash_t suffix; + } djbx33a; + struct { + unsigned char padding[16]; + Py_hash_t hashsalt; + } expat; +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + +#ifdef Py_DEBUG +PyAPI_DATA(int) _Py_HashSecret_Initialized; +#endif + + +/* hash function definition */ +#ifndef Py_LIMITED_API +typedef struct { + Py_hash_t (*const hash)(const void *, Py_ssize_t); + const char *name; + const int hash_bits; + const int seed_bits; +} PyHash_FuncDef; + +PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); +#endif + + +/* cutoff for small string DJBX33A optimization in range [1, cutoff). + * + * About 50% of the strings in a typical Python application are smaller than + * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks. + * NEVER use DJBX33A for long strings! + * + * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms + * should use a smaller cutoff because it is easier to create colliding + * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should + * provide a decent safety margin. + */ +#ifndef Py_HASH_CUTOFF +# define Py_HASH_CUTOFF 0 +#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0) +# error Py_HASH_CUTOFF must in range 0...7. +#endif /* Py_HASH_CUTOFF */ + + +/* hash algorithm selection + * + * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the + * configure script. + * + * - FNV is available on all platforms and architectures. + * - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't + * require aligned memory for integers. + * - With EXTERNAL embedders can provide an alternative implementation with:: + * + * PyHash_FuncDef PyHash_Func = {...}; + * + * XXX: Figure out __declspec() for extern PyHash_FuncDef. + */ +#define Py_HASH_EXTERNAL 0 +#define Py_HASH_SIPHASH24 1 +#define Py_HASH_FNV 2 + +#ifndef Py_HASH_ALGORITHM +# if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \ + && !defined(HAVE_ALIGNED_REQUIRED)) +# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24 +# else +# define Py_HASH_ALGORITHM Py_HASH_FNV +# endif /* uint64_t && uint32_t && aligned */ +#endif /* Py_HASH_ALGORITHM */ + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_HASH_H */ diff --git a/Include/pyport.h b/Include/pyport.h index ca20b22..b6b426a 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -144,23 +144,6 @@ Used in: PY_LONG_LONG #endif #endif -/* Prime multiplier used in string and various other hashes. */ -#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ - -/* Parameters used for the numeric hash implementation. See notes for - _Py_HashDouble in Objects/object.c. Numeric hashes are based on - reduction modulo the prime 2**_PyHASH_BITS - 1. */ - -#if SIZEOF_VOID_P >= 8 -#define _PyHASH_BITS 61 -#else -#define _PyHASH_BITS 31 -#endif -#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) -#define _PyHASH_INF 314159 -#define _PyHASH_NAN 0 -#define _PyHASH_IMAG _PyHASH_MULTIPLIER - /* uintptr_t is the C9X name for an unsigned integral type such that a * legitimate void* can be cast to uintptr_t and then back to void* again * without loss of information. Similarly for intptr_t, wrt a signed @@ -199,8 +182,10 @@ typedef Py_intptr_t Py_ssize_t; #endif /* Py_hash_t is the same size as a pointer. */ +#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T typedef Py_ssize_t Py_hash_t; /* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */ +#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T typedef size_t Py_uhash_t; /* Largest possible value of size_t. |