From 2ab6f9a387de9d507e38ae63f6da0f9b9b9a58d8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 6 Oct 2014 11:13:56 +0100 Subject: Updated : xxHash to r37 --- NEWS | 4 + programs/lz4io.c | 28 ++-- xxhash.c | 470 +++++++++++++++++++++++++++++++++++-------------------- xxhash.h | 98 +++++------- 4 files changed, 360 insertions(+), 240 deletions(-) diff --git a/NEWS b/NEWS index 41d5762..7edee54 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +r124: +Fix : LZ4F_compressBound() using NULL preferencesPtr +Updated : xxHash, to r37 + r123: Added : experimental lz4frame API, thanks to Takayuki Matsuoka and Christopher Jackson for testings Fix : s390x support, thanks to Nobuhiro Iwamatsu diff --git a/programs/lz4io.c b/programs/lz4io.c index 20520d6..6de5206 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -409,7 +409,7 @@ static int compress_file_blockDependency(char* input_filename, char* output_file clock_t start, end; unsigned int blockSize, inputBufferSize; size_t sizeCheck, header_size; - void* streamChecksumState=NULL; + XXH32_state_t streamCRC; // Init start = clock(); @@ -440,7 +440,7 @@ static int compress_file_blockDependency(char* input_filename, char* output_file if (!in_buff || !out_buff) EXM_THROW(31, "Allocation error : not enough memory"); in_blockStart = in_buff + 64 KB; if (compressionlevel>=3) in_blockStart = in_buff; - if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); + if (streamChecksum) XXH32_reset(&streamCRC, LZ4S_CHECKSUM_SEED); ctx = initFunction(in_buff); // Write Archive Header @@ -469,7 +469,7 @@ static int compress_file_blockDependency(char* input_filename, char* output_file if( inSize==0 ) break; // No more input : end of compression filesize += inSize; DISPLAYLEVEL(3, "\rRead : %i MB ", (int)(filesize>>20)); - if (streamChecksum) XXH32_update(streamChecksumState, in_blockStart, inSize); + if (streamChecksum) XXH32_update(&streamCRC, in_blockStart, inSize); // Compress Block outSize = compressionFunction(ctx, in_blockStart, out_buff+4, inSize, inSize-1, compressionlevel); @@ -521,7 +521,7 @@ static int compress_file_blockDependency(char* input_filename, char* output_file compressedfilesize += 4; if (streamChecksum) { - unsigned int checksum = XXH32_digest(streamChecksumState); + unsigned int checksum = XXH32_digest(&streamCRC); * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum"); @@ -566,7 +566,7 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp clock_t start, end; int blockSize; size_t sizeCheck, header_size, readSize; - void* streamChecksumState=NULL; + XXH32_state_t streamCRC; // Branch out if (blockIndependence==0) return compress_file_blockDependency(input_filename, output_filename, compressionLevel); @@ -584,7 +584,7 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp out_buff = (char*)malloc(blockSize+CACHELINE); headerBuffer = (char*)malloc(LZ4S_MAXHEADERSIZE); if (!in_buff || !out_buff || !(headerBuffer)) EXM_THROW(31, "Allocation error : not enough memory"); - if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); + if (streamChecksum) XXH32_reset(&streamCRC, LZ4S_CHECKSUM_SEED); // Write Archive Header *(unsigned int*)headerBuffer = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER); // Magic Number, in Little Endian convention @@ -613,7 +613,7 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp filesize += readSize; DISPLAYLEVEL(3, "\rRead : %i MB ", (int)(filesize>>20)); - if (streamChecksum) XXH32_update(streamChecksumState, in_buff, (int)readSize); + if (streamChecksum) XXH32_update(&streamCRC, in_buff, (int)readSize); // Compress Block outSize = compressionFunction(in_buff, out_buff+4, (int)readSize, (int)readSize-1, compressionLevel); @@ -662,8 +662,8 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp compressedfilesize += 4; if (streamChecksum) { - unsigned int checksum = XXH32_digest(streamChecksumState); - * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); + unsigned int checksum = XXH32_digest(&streamCRC); + *(unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum"); compressedfilesize += 4; @@ -755,7 +755,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) unsigned int maxBlockSize; size_t sizeCheck; int blockChecksumFlag, streamChecksumFlag, blockIndependenceFlag; - void* streamChecksumState=NULL; + XXH32_state_t streamCRC; int (*decompressionFunction)(LZ4_streamDecode_t* ctx, const char* src, char* dst, int cSize, int maxOSize) = LZ4_decompress_safe_continue; LZ4_streamDecode_t ctx; @@ -805,7 +805,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) out_start = out_buff; out_end = out_start + outBuffSize; if (!in_buff || !out_buff) EXM_THROW(70, "Allocation error : not enough memory"); - if (streamChecksumFlag) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); + if (streamChecksumFlag) XXH32_reset(&streamCRC, LZ4S_CHECKSUM_SEED); } // Main Loop @@ -843,7 +843,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) sizeCheck = fwrite(in_buff, 1, blockSize, foutput); if (sizeCheck != (size_t)blockSize) EXM_THROW(76, "Write error : cannot write data block"); filesize += blockSize; - if (streamChecksumFlag) XXH32_update(streamChecksumState, in_buff, blockSize); + if (streamChecksumFlag) XXH32_update(&streamCRC, in_buff, blockSize); if (!blockIndependenceFlag) { // handle dictionary for streaming @@ -859,7 +859,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) decodedBytes = decompressionFunction(&ctx, in_buff, out_start, blockSize, maxBlockSize); if (decodedBytes < 0) EXM_THROW(77, "Decoding Failed ! Corrupted input detected !"); filesize += decodedBytes; - if (streamChecksumFlag) XXH32_update(streamChecksumState, out_start, decodedBytes); + if (streamChecksumFlag) XXH32_update(&streamCRC, out_start, decodedBytes); // Write Block sizeCheck = fwrite(out_start, 1, decodedBytes, foutput); @@ -872,7 +872,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) // Stream Checksum if (streamChecksumFlag) { - unsigned int checksum = XXH32_digest(streamChecksumState); + unsigned int checksum = XXH32_digest(&streamCRC); unsigned int readChecksum; sizeCheck = fread(&readChecksum, 1, 4, finput); if (sizeCheck != 4) EXM_THROW(74, "Read error : cannot read stream checksum"); diff --git a/xxhash.c b/xxhash.c index be48370..529d69c 100644 --- a/xxhash.c +++ b/xxhash.c @@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash source repository : http://code.google.com/p/xxhash/ +- public discussion board : https://groups.google.com/forum/#!forum/lz4c */ @@ -80,14 +81,23 @@ You can contact the author at : // Includes & Memory related functions //************************************** #include "xxhash.h" -// Modify the local functions below should you wish to use some other memory related routines +// Modify the local functions below should you wish to use some other memory routines // for malloc(), free() #include -FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } -FORCE_INLINE void XXH_free (void* p) { free(p); } +FORCE_INLINE void* XXH_malloc(size_t s) +{ + return malloc(s); +} +FORCE_INLINE void XXH_free (void* p) +{ + free(p); +} // for memcpy() #include -FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } +FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) +{ + return memcpy(dest,src,size); +} //************************************** @@ -95,17 +105,17 @@ FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return //************************************** #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 # include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; #else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; #endif #if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) @@ -122,8 +132,14 @@ FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return # endif #endif -typedef struct _U32_S { U32 v; } _PACKED U32_S; -typedef struct _U64_S { U64 v; } _PACKED U64_S; +typedef struct _U32_S +{ + U32 v; +} _PACKED U32_S; +typedef struct _U64_S +{ + U64 v; +} _PACKED U64_S; #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) # pragma pack(pop) @@ -154,12 +170,15 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; # define XXH_swap32 __builtin_bswap32 # define XXH_swap64 __builtin_bswap64 #else -static inline U32 XXH_swap32 (U32 x) { +static inline U32 XXH_swap32 (U32 x) +{ return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff );} -static inline U64 XXH_swap64 (U64 x) { + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static inline U64 XXH_swap64 (U64 x) +{ return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | @@ -167,7 +186,8 @@ static inline U64 XXH_swap64 (U64 x) { ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL);} + ((x >> 56) & 0x00000000000000ffULL); +} #endif @@ -191,7 +211,7 @@ static inline U64 XXH_swap64 (U64 x) { //************************************** typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; #ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch - static const int one = 1; +static const int one = 1; # define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) #endif @@ -215,7 +235,10 @@ FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_al return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); } -FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } +FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align) { @@ -225,13 +248,16 @@ FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_al return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr); } -FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } +FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} //**************************** // Simple Hash Functions //**************************** -FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; @@ -239,7 +265,11 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 see #define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } #endif if (len>=16) @@ -252,11 +282,24 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 see do { - v1 += XXH_get32bits(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_get32bits(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_get32bits(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_get32bits(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + v1 += XXH_get32bits(p) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_get32bits(p) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_get32bits(p) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_get32bits(p) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } @@ -291,13 +334,14 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 see } -U32 XXH32(const void* input, unsigned int len, U32 seed) +unsigned int XXH32 (const void* input, size_t len, unsigned seed) { #if 0 // Simple version, good for code maintenance, but unfortunately slow for small inputs - void* state = XXH32_init(seed); - XXH32_update(state, input, len); - return XXH32_digest(state); + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, input, len); + return XXH32_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -318,7 +362,7 @@ U32 XXH32(const void* input, unsigned int len, U32 seed) #endif } -FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; @@ -326,7 +370,11 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 see #define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } #endif if (len>=32) @@ -339,25 +387,50 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 see do { - v1 += XXH_get64bits(p) * PRIME64_2; p+=8; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; - v2 += XXH_get64bits(p) * PRIME64_2; p+=8; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; - v3 += XXH_get64bits(p) * PRIME64_2; p+=8; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; - v4 += XXH_get64bits(p) * PRIME64_2; p+=8; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; - } while (p<=limit); + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; - h64 = h64 * PRIME64_1 + PRIME64_4; - - v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; - h64 = h64 * PRIME64_1 + PRIME64_4; - - v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; - h64 = h64 * PRIME64_1 + PRIME64_4; - - v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; - h64 = h64 * PRIME64_1 + PRIME64_4; + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64 * PRIME64_1 + PRIME64_4; } else { @@ -368,22 +441,25 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 see while (p+8<=bEnd) { - U64 k1 = XXH_get64bits(p); - k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; + U64 k1 = XXH_get64bits(p); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; } if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state32_t); + XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough + return (XXH32_state_t*)malloc(sizeof(XXH32_state_t)); } +XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + free(statePtr); + return XXH_OK; +}; -int XXH64_sizeofState(void) +XXH64_state_t* XXH64_createState(void) { - XXH_STATIC_ASSERT(XXH64_SIZEOFSTATE >= sizeof(struct XXH_state64_t)); // A compilation error here means XXH64_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state64_t); + XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough + return (XXH64_state_t*)malloc(sizeof(XXH64_state_t)); } +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + free(statePtr); + return XXH_OK; +}; + +/*** Hash feed ***/ -XXH_errorcode XXH32_resetState(void* state_in, U32 seed) +XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + XXH_istate32_t* state = (XXH_istate32_t*) state_in; state->seed = seed; state->v1 = seed + PRIME32_1 + PRIME32_2; state->v2 = seed + PRIME32_2; @@ -473,9 +570,9 @@ XXH_errorcode XXH32_resetState(void* state_in, U32 seed) return XXH_OK; } -XXH_errorcode XXH64_resetState(void* state_in, unsigned long long seed) +XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed) { - struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + XXH_istate64_t* state = (XXH_istate64_t*) state_in; state->seed = seed; state->v1 = seed + PRIME64_1 + PRIME64_2; state->v2 = seed + PRIME64_2; @@ -487,24 +584,9 @@ XXH_errorcode XXH64_resetState(void* state_in, unsigned long long seed) } -void* XXH32_init (U32 seed) +FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) { - void* state = XXH_malloc (sizeof(struct XXH_state32_t)); - if (state != NULL) XXH32_resetState(state, seed); - return state; -} - -void* XXH64_init (unsigned long long seed) -{ - void* state = XXH_malloc (sizeof(struct XXH_state64_t)); - if (state != NULL) XXH64_resetState(state, seed); - return state; -} - - -FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + XXH_istate32_t* state = (XXH_istate32_t *) state_in; const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -517,7 +599,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu if (state->memsize + len < 16) // fill in tmp buffer { XXH_memcpy(state->memory + state->memsize, input, len); - state->memsize += len; + state->memsize += (U32)len; return XXH_OK; } @@ -526,10 +608,22 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); { const U32* p32 = (const U32*)state->memory; - state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; - state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; - state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v1 = XXH_rotl32(state->v1, 13); + state->v1 *= PRIME32_1; + p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v2 = XXH_rotl32(state->v2, 13); + state->v2 *= PRIME32_1; + p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v3 = XXH_rotl32(state->v3, 13); + state->v3 *= PRIME32_1; + p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v4 = XXH_rotl32(state->v4, 13); + state->v4 *= PRIME32_1; + p32++; } p += 16-state->memsize; state->memsize = 0; @@ -545,11 +639,24 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu do { - v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); state->v1 = v1; state->v2 = v2; @@ -566,7 +673,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu return XXH_OK; } -XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len) +XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -578,9 +685,9 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len) -FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) +FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + XXH_istate32_t* state = (XXH_istate32_t*) state_in; const BYTE * p = (const BYTE*)state->memory; BYTE* bEnd = (BYTE*)state->memory + state->memsize; U32 h32; @@ -620,30 +727,20 @@ FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess } -U32 XXH32_intermediateDigest (void* state_in) +U32 XXH32_digest (const XXH32_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + return XXH32_digest_endian(state_in, XXH_littleEndian); else - return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); -} - - -U32 XXH32_digest (void* state_in) -{ - U32 h32 = XXH32_intermediateDigest(state_in); - - XXH_free(state_in); - - return h32; + return XXH32_digest_endian(state_in, XXH_bigEndian); } -FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian) { - struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + XXH_istate64_t * state = (XXH_istate64_t *) state_in; const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -656,7 +753,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu if (state->memsize + len < 32) // fill in tmp buffer { XXH_memcpy(state->memory + state->memsize, input, len); - state->memsize += len; + state->memsize += (U32)len; return XXH_OK; } @@ -665,10 +762,22 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); { const U64* p64 = (const U64*)state->memory; - state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; state->v1 = XXH_rotl64(state->v1, 31); state->v1 *= PRIME64_1; p64++; - state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; state->v2 = XXH_rotl64(state->v2, 31); state->v2 *= PRIME64_1; p64++; - state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; state->v3 = XXH_rotl64(state->v3, 31); state->v3 *= PRIME64_1; p64++; - state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; state->v4 = XXH_rotl64(state->v4, 31); state->v4 *= PRIME64_1; p64++; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + p64++; } p += 32-state->memsize; state->memsize = 0; @@ -684,11 +793,24 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu do { - v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; p+=8; - v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; p+=8; - v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; p+=8; - v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; p+=8; - } while (p<=limit); + v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + p+=8; + v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + p+=8; + v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + p+=8; + v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + p+=8; + } + while (p<=limit); state->v1 = v1; state->v2 = v2; @@ -705,7 +827,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu return XXH_OK; } -XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len) +XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -717,33 +839,45 @@ XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len) -FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess endian) +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) { - struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + XXH_istate64_t * state = (XXH_istate64_t *) state_in; const BYTE * p = (const BYTE*)state->memory; BYTE* bEnd = (BYTE*)state->memory + state->memsize; U64 h64; if (state->total_len >= 32) { - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; - h64 = h64*PRIME64_1 + PRIME64_4; - - v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; - h64 = h64*PRIME64_1 + PRIME64_4; - - v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; - h64 = h64*PRIME64_1 + PRIME64_4; - - v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; - h64 = h64*PRIME64_1 + PRIME64_4; + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64*PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64*PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64*PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64*PRIME64_1 + PRIME64_4; } else { @@ -754,22 +888,25 @@ FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess while (p+8<=bEnd) { - U64 k1 = XXH_readLE64((const U64*)p, endian); - k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; + U64 k1 = XXH_readLE64((const U64*)p, endian); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; } if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1; + h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p /* size_t */ + + +/***************************** Type *****************************/ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; @@ -75,18 +81,16 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; Simple Hash Functions *****************************/ -unsigned int XXH32 (const void* input, unsigned int len, unsigned int seed); -unsigned long long XXH64 (const void* input, unsigned int len, unsigned long long seed); +unsigned int XXH32 (const void* input, size_t length, unsigned seed); +unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); /* XXH32() : - Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". - The memory between input & input+len must be valid (allocated and read-accessible). + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. This function successfully passes all SMHasher tests. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s - Note that "len" is type "int", which means it is limited to 2^31-1. - If your data is larger, use the advanced functions below. XXH64() : Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". */ @@ -96,70 +100,54 @@ XXH64() : /***************************** Advanced Hash Functions *****************************/ - -void* XXH32_init (unsigned int seed); -XXH_errorcode XXH32_update (void* state, const void* input, unsigned int len); -unsigned int XXH32_digest (void* state); - -void* XXH64_init (unsigned long long seed); -XXH_errorcode XXH64_update (void* state, const void* input, unsigned int len); -unsigned long long XXH64_digest (void* state); +typedef struct { long long ll[ 6]; } XXH32_state_t; +typedef struct { long long ll[11]; } XXH64_state_t; /* -These functions calculate the xxhash of an input provided in several small packets, -as opposed to an input provided as a single block. - -It must be started with : -void* XXHnn_init() -The function returns a pointer which holds the state of calculation. -If the pointer is NULL, allocation has failed, so no state can be tracked. - -The state pointer must be provided as "void* state" parameter for XXHnn_update(). -XXHnn_update() can be called as many times as necessary. -The user must provide a valid (allocated) input. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -Note that "len" is type "int", which means it is limited to 2^31-1. -If your data is larger, it is recommended to chunk your data into blocks -of size for example 2^30 (1GB) to avoid any "int" overflow issue. +These structures allow static allocation of XXH states. +States must then be initialized using XXHnn_reset() before first use. -Finally, you can end the calculation anytime, by using XXHnn_digest(). -This function returns the final nn-bits hash. -You must provide the same "void* state" parameter created by XXHnn_init(). -Memory will be freed by XXHnn_digest(). +If you prefer dynamic allocation, please refer to functions below. */ +XXH32_state_t* XXH32_createState(void); +XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); -int XXH32_sizeofState(void); -XXH_errorcode XXH32_resetState(void* state, unsigned int seed); +XXH64_state_t* XXH64_createState(void); +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); -#define XXH32_SIZEOFSTATE 48 -typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; +/* +These functions create and release memory for XXH state. +States must then be initialized using XXHnn_reset() before first use. +*/ -int XXH64_sizeofState(void); -XXH_errorcode XXH64_resetState(void* state, unsigned long long seed); -#define XXH64_SIZEOFSTATE 88 -typedef struct { long long ll[(XXH64_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH64_stateSpace_t; +XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed); +XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +unsigned int XXH32_digest (const XXH32_state_t* statePtr); + +XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +unsigned long long XXH64_digest (const XXH64_state_t* statePtr); /* -These functions allow user application to make its own allocation for state. +These functions calculate the xxHash of an input provided in multiple smaller packets, +as opposed to an input provided as a single block. -XXHnn_sizeofState() is used to know how much space must be allocated for the xxHash nn-bits state. -Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. -This pointer must then be provided as 'state' into XXHnn_resetState(), which initializes the state. +XXH state space must first be allocated, using either static or dynamic method provided above. -For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), -use the structure XXHnn_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. -*/ +Start a new hash by initializing state with a seed, using XXHnn_reset(). +Then, feed the hash state by calling XXHnn_update() as many times as necessary. +Obviously, input must be valid, meaning allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -unsigned int XXH32_intermediateDigest (void* state); -unsigned long long XXH64_intermediateDigest (void* state); -/* -These functions do the same as XXHnn_digest(), generating a nn-bit hash, -but preserve memory context. -This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXHnn_update(). -To free memory context, use XXHnn_digest(), or free(). +Finally, you can produce a hash anytime, by using XXHnn_digest(). +This function returns the final nn-bits hash. +You can nonetheless continue feeding the hash state with more input, +and therefore get some new hashes, by calling again XXHnn_digest(). + +When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). */ -- cgit v0.12