From c3397520a1955d418be2421e33d1b618ea049cb7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 18 Sep 2018 12:14:26 -0700 Subject: updated xxhash to latest version --- lib/xxhash.c | 614 ++++++++++++++++++++++++++++++++++++----------------------- lib/xxhash.h | 221 ++++++++++++--------- 2 files changed, 503 insertions(+), 332 deletions(-) diff --git a/lib/xxhash.c b/lib/xxhash.c index 3fc97fd..ff28749 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -50,20 +50,26 @@ * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define XXH_FORCE_MEMORY_ACCESS 2 # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7S__) )) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif /*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. - * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. - * By default, this option is disabled. To enable it, uncomment below define : + * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. + * When this macro is enabled, xxHash actively checks input for null pointer. + * It it is, result for null input pointers is the same as a null-length input. */ -/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif /*!XXH_FORCE_NATIVE_FORMAT : * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. @@ -80,8 +86,9 @@ /*!XXH_FORCE_ALIGN_CHECK : * This is a minor performance trick, only useful with lots of very small keys. * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; set to 0 when the input data - * is guaranteed to be aligned. + * The check costs one initial branch per hash; + * set it to 0 when the input is guaranteed to be aligned, + * or when alignment doesn't matter for performance. */ #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) @@ -104,6 +111,8 @@ static void XXH_free (void* p) { free(p); } #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } +#include /* assert */ + #define XXH_STATIC_LINKING_ONLY #include "xxhash.h" @@ -113,40 +122,35 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp ***************************************/ #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - -#ifndef XXH_FORCE_INLINE -# ifdef _MSC_VER /* Visual Studio */ -# define XXH_FORCE_INLINE static __forceinline -# else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define XXH_FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define XXH_FORCE_INLINE static inline -# endif +# define FORCE_INLINE static __forceinline +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) # else -# define XXH_FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -# endif /* _MSC_VER */ -#endif /* XXH_FORCE_INLINE */ +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif /* ************************************* * Basic Types ***************************************/ #ifndef MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; - typedef int32_t S32; # else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; - typedef signed int S32; # endif #endif @@ -213,8 +217,12 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ #ifndef XXH_CPU_LITTLE_ENDIAN - static const int g_one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +static int XXH_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() #endif @@ -223,7 +231,7 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -XXH_FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); @@ -231,7 +239,7 @@ XXH_FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, X return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } -XXH_FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } @@ -245,12 +253,12 @@ static U32 XXH_readBE32(const void* ptr) /* ************************************* * Macros ***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } /* ******************************************************************* -* 32-bits hash functions +* 32-bit hash functions *********************************************************************/ static const U32 PRIME32_1 = 2654435761U; static const U32 PRIME32_2 = 2246822519U; @@ -266,14 +274,89 @@ static U32 XXH32_round(U32 seed, U32 input) return seed; } -XXH_FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +/* mix all bits */ +static U32 XXH32_avalanche(U32 h32) +{ + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +static U32 +XXH32_finalize(U32 h32, const void* ptr, size_t len, + XXH_endianess endian, XXH_alignment align) + +{ + const BYTE* p = (const BYTE*)ptr; + +#define PROCESS1 \ + h32 += (*p++) * PRIME32_5; \ + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + +#define PROCESS4 \ + h32 += XXH_get32bits(p) * PRIME32_3; \ + p+=4; \ + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + + switch(len&15) /* or switch(bEnd - p) */ + { + case 12: PROCESS4; + /* fallthrough */ + case 8: PROCESS4; + /* fallthrough */ + case 4: PROCESS4; + return XXH32_avalanche(h32); + + case 13: PROCESS4; + /* fallthrough */ + case 9: PROCESS4; + /* fallthrough */ + case 5: PROCESS4; + PROCESS1; + return XXH32_avalanche(h32); + + case 14: PROCESS4; + /* fallthrough */ + case 10: PROCESS4; + /* fallthrough */ + case 6: PROCESS4; + PROCESS1; + PROCESS1; + return XXH32_avalanche(h32); + + case 15: PROCESS4; + /* fallthrough */ + case 11: PROCESS4; + /* fallthrough */ + case 7: PROCESS4; + /* fallthrough */ + case 3: PROCESS1; + /* fallthrough */ + case 2: PROCESS1; + /* fallthrough */ + case 1: PROCESS1; + /* fallthrough */ + case 0: return XXH32_avalanche(h32); + } + assert(0); + return h32; /* reaching this point is deemed impossible */ +} + + +FORCE_INLINE U32 +XXH32_endian_align(const void* input, size_t len, U32 seed, + XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; @@ -281,7 +364,7 @@ XXH_FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, #endif if (len>=16) { - const BYTE* const limit = bEnd - 16; + const BYTE* const limit = bEnd - 15; U32 v1 = seed + PRIME32_1 + PRIME32_2; U32 v2 = seed + PRIME32_2; U32 v3 = seed + 0; @@ -292,34 +375,17 @@ XXH_FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p<=limit); + } while (p < limit); - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } - h32 += (U32) len; - - while (p+4<=bEnd) { - h32 += XXH_get32bits(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; + h32 += (U32)len; - return h32; + return XXH32_finalize(h32, p, len&15, endian, align); } @@ -371,74 +437,81 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } -XXH_FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE XXH_errorcode +XXH32_update_endian(XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; #endif - state->total_len_32 += (unsigned)len; - state->large_len |= (len>=16) | (state->total_len_32>=16); + { const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (unsigned)len; - return XXH_OK; - } + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; - state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; - state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; - state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; - state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; - do { - v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; - v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; - v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; - v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; - } while (p<=limit); + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); + } + p += 16-state->memsize; + state->memsize = 0; + } - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } } return XXH_OK; } + XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -450,40 +523,23 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* } - -XXH_FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +FORCE_INLINE U32 +XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { - const BYTE * p = (const BYTE*)state->mem32; - const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; - while (p+4<=bEnd) { - h32 += XXH_readLE32(p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; + return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); } @@ -503,7 +559,7 @@ XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) /*! Default XXH result types are basic unsigned 32 and 64 bits. * The canonical representation follows human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +* This way, hash values can be written into a file or buffer, remaining comparable across different systems. */ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) @@ -522,18 +578,21 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src #ifndef XXH_NO_LONG_LONG /* ******************************************************************* -* 64-bits hash functions +* 64-bit hash functions *********************************************************************/ /*====== Memory access ======*/ #ifndef MEM_MODULE # define MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint64_t U64; # else - typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ + /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ + typedef unsigned long long U64; # endif #endif @@ -583,7 +642,7 @@ static U64 XXH_swap64 (U64 x) } #endif -XXH_FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); @@ -591,7 +650,7 @@ XXH_FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, X return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } -XXH_FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } @@ -626,14 +685,137 @@ static U64 XXH64_mergeRound(U64 acc, U64 val) return acc; } -XXH_FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +static U64 XXH64_avalanche(U64 h64) +{ + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +static U64 +XXH64_finalize(U64 h64, const void* ptr, size_t len, + XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)ptr; + +#define PROCESS1_64 \ + h64 ^= (*p++) * PRIME64_5; \ + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + +#define PROCESS4_64 \ + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ + p+=4; \ + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + +#define PROCESS8_64 { \ + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ + p+=8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ +} + + switch(len&31) { + case 24: PROCESS8_64; + /* fallthrough */ + case 16: PROCESS8_64; + /* fallthrough */ + case 8: PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: PROCESS8_64; + /* fallthrough */ + case 20: PROCESS8_64; + /* fallthrough */ + case 12: PROCESS8_64; + /* fallthrough */ + case 4: PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: PROCESS8_64; + /* fallthrough */ + case 17: PROCESS8_64; + /* fallthrough */ + case 9: PROCESS8_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: PROCESS8_64; + /* fallthrough */ + case 21: PROCESS8_64; + /* fallthrough */ + case 13: PROCESS8_64; + /* fallthrough */ + case 5: PROCESS4_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: PROCESS8_64; + /* fallthrough */ + case 18: PROCESS8_64; + /* fallthrough */ + case 10: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: PROCESS8_64; + /* fallthrough */ + case 22: PROCESS8_64; + /* fallthrough */ + case 14: PROCESS8_64; + /* fallthrough */ + case 6: PROCESS4_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: PROCESS8_64; + /* fallthrough */ + case 19: PROCESS8_64; + /* fallthrough */ + case 11: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: PROCESS8_64; + /* fallthrough */ + case 23: PROCESS8_64; + /* fallthrough */ + case 15: PROCESS8_64; + /* fallthrough */ + case 7: PROCESS4_64; + /* fallthrough */ + case 3: PROCESS1_64; + /* fallthrough */ + case 2: PROCESS1_64; + /* fallthrough */ + case 1: PROCESS1_64; + /* fallthrough */ + case 0: return XXH64_avalanche(h64); + } + + /* impossible to reach */ + assert(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +FORCE_INLINE U64 +XXH64_endian_align(const void* input, size_t len, U64 seed, + XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + const BYTE* bEnd = p + len; U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; @@ -666,32 +848,7 @@ XXH_FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, h64 += (U64) len; - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; + return XXH64_finalize(h64, p, len, endian, align); } @@ -741,65 +898,71 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } -XXH_FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE XXH_errorcode +XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; #endif - state->total_len += len; - - if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - state->memsize += (U32)len; - return XXH_OK; - } + { const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); - state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); - state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); - state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); - p += 32-state->memsize; - state->memsize = 0; - } + state->total_len += len; - if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } - do { - v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; - v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; - v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; - v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; - } while (p<=limit); + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } } return XXH_OK; @@ -815,10 +978,8 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* return XXH64_update_endian(state_in, input, len, XXH_bigEndian); } -XXH_FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { - const BYTE * p = (const BYTE*)state->mem64; - const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; U64 h64; if (state->total_len >= 32) { @@ -833,37 +994,12 @@ XXH_FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endian h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { - h64 = state->v3 + PRIME64_5; + h64 = state->v3 /*seed*/ + PRIME64_5; } h64 += (U64) state->total_len; - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; + return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); } XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) diff --git a/lib/xxhash.h b/lib/xxhash.h index 870a6d9..d6bad94 100644 --- a/lib/xxhash.h +++ b/lib/xxhash.h @@ -57,8 +57,8 @@ Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. -A 64-bits version, named XXH64, is available since r35. -It offers much better speed, but for 64-bits applications only. +A 64-bit version, named XXH64, is available since r35. +It offers much better speed, but for 64-bit applications only. Name Speed on 64 bits Speed on 32 bits XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s @@ -80,18 +80,19 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; /* **************************** -* API modifier -******************************/ -/** XXH_PRIVATE_API -* This is useful to include xxhash functions in `static` mode -* in order to inline them, and remove their symbol from the public list. -* Methodology : -* #define XXH_PRIVATE_API -* #include "xxhash.h" -* `xxhash.c` is automatically included. -* It's not useful to compile and link it as a separate module. -*/ -#ifdef XXH_PRIVATE_API + * API modifier + ******************************/ +/** XXH_INLINE_ALL (and XXH_PRIVATE_API) + * This is useful to include xxhash functions in `static` mode + * in order to inline them, and remove their symbol from the public list. + * Inlining can offer dramatic performance improvement on small keys. + * Methodology : + * #define XXH_INLINE_ALL + * #include "xxhash.h" + * `xxhash.c` is automatically included. + * It's not useful to compile and link it as a separate module. + */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) # ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY # endif @@ -102,23 +103,24 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else -# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ + /* this version may generate warnings for unused static functions */ +# define XXH_PUBLIC_API static # endif #else # define XXH_PUBLIC_API /* do nothing */ -#endif /* XXH_PRIVATE_API */ - -/*!XXH_NAMESPACE, aka Namespace Emulation : - -If you want to include _and expose_ xxHash functions from within your own library, -but also want to avoid symbol collisions with other libraries which may also include xxHash, - -you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library -with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). - -Note that no change is required within the calling program as long as it includes `xxhash.h` : -regular symbol name will be automatically translated by this header. -*/ +#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ + +/*! XXH_NAMESPACE, aka Namespace Emulation : + * + * If you want to include _and expose_ xxHash functions from within your own library, + * but also want to avoid symbol collisions with other libraries which may also include xxHash, + * + * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library + * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + * + * Note that no change is required within the calling program as long as it includes `xxhash.h` : + * regular symbol name will be automatically translated by this header. + */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) @@ -149,18 +151,18 @@ regular symbol name will be automatically translated by this header. ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_RELEASE 5 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); /*-********************************************************************** -* 32-bits hash +* 32-bit hash ************************************************************************/ -typedef unsigned int XXH32_hash_t; +typedef unsigned int XXH32_hash_t; /*! XXH32() : - Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ @@ -177,26 +179,25 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); /* -These functions generate the xxHash of an input provided in multiple segments. -Note that, for small input, they are slower than single-call functions, due to state management. -For small input, prefer `XXH32()` and `XXH64()` . - -XXH state must first be allocated, using XXH*_createState() . - -Start a new hash by initializing state with a seed, using XXH*_reset(). - -Then, feed the hash state by calling XXH*_update() as many times as necessary. -Obviously, input must be allocated and read accessible. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. - -Finally, a hash value can be produced anytime, by using XXH*_digest(). -This function returns the nn-bits hash as an int or long long. - -It's still possible to continue inserting input into the hash state after a digest, -and generate some new hashes later on, by calling again XXH*_digest(). - -When done, free XXH state space if it was allocated dynamically. -*/ + * Streaming functions generate the xxHash of an input provided in multiple segments. + * Note that, for small input, they are slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * XXH state must first be allocated, using XXH*_createState() . + * + * Start a new hash by initializing state with a seed, using XXH*_reset(). + * + * Then, feed the hash state by calling XXH*_update() as many times as necessary. + * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using XXH*_digest(). + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a digest, + * and generate some new hashes later on, by calling again XXH*_digest(). + * + * When done, free XXH state space if it was allocated dynamically. + */ /*====== Canonical representation ======*/ @@ -205,22 +206,22 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. -* The canonical representation uses human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. -*/ + * The canonical representation uses human-readable write convention, aka big-endian (large digits first). + * These functions allow transformation of hash result into and from its canonical format. + * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. + */ #ifndef XXH_NO_LONG_LONG /*-********************************************************************** -* 64-bits hash +* 64-bit hash ************************************************************************/ typedef unsigned long long XXH64_hash_t; /*! XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". "seed" can be used to alter the result predictably. - This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). + This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). */ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); @@ -241,48 +242,82 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src #endif /* XXH_NO_LONG_LONG */ + #ifdef XXH_STATIC_LINKING_ONLY /* ================================================================================================ - This section contains definitions which are not guaranteed to remain stable. + This section contains declarations which are not guaranteed to remain stable. They may change in future versions, becoming incompatible with a different version of the library. - They shall only be used with static linking. - Never use these definitions in association with dynamic linking ! + These declarations should only be used with static linking. + Never use them in association with dynamic linking ! =================================================================================================== */ -/* These definitions are only meant to allow allocation of XXH state - statically, on stack, or in a struct for example. - Do not use members directly. */ - - struct XXH32_state_s { - unsigned total_len_32; - unsigned large_len; - unsigned v1; - unsigned v2; - unsigned v3; - unsigned v4; - unsigned mem32[4]; /* buffer defined as U32 for alignment */ - unsigned memsize; - unsigned reserved; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH32_state_t */ - -#ifndef XXH_NO_LONG_LONG - struct XXH64_state_s { - unsigned long long total_len; - unsigned long long v1; - unsigned long long v2; - unsigned long long v3; - unsigned long long v4; - unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ - unsigned memsize; - unsigned reserved[2]; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH64_state_t */ +/* These definitions are only present to allow + * static allocation of XXH state, on stack or in a struct for example. + * Never **ever** use members directly. */ + +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + +struct XXH32_state_s { + uint32_t total_len_32; + uint32_t large_len; + uint32_t v1; + uint32_t v2; + uint32_t v3; + uint32_t v4; + uint32_t mem32[4]; + uint32_t memsize; + uint32_t reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + +struct XXH64_state_s { + uint64_t total_len; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t mem64[4]; + uint32_t memsize; + uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ + +# else + +struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; + unsigned memsize; + unsigned reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + +# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ +struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; + unsigned memsize; + unsigned reserved[2]; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ +# endif + +# endif + + +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ #endif -# ifdef XXH_PRIVATE_API -# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ -# endif - #endif /* XXH_STATIC_LINKING_ONLY */ -- cgit v0.12