From b3b207c4db9378f3eb3518c0778ff03a9b5cdbb3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 23 Nov 2014 00:46:15 +0100 Subject: New endian & alignment code --- NEWS | 10 +- lz4.c | 420 +++++++++++++++++++++++++++++++++------------------------------ lz4.h | 12 +- lz4hc.h | 6 +- xxhash.c | 72 +++++------ 5 files changed, 275 insertions(+), 245 deletions(-) diff --git a/NEWS b/NEWS index 7edee54..763fa5a 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,12 @@ +r125: +Changed : endian and alignment code +Fixed : some alignment warnings under clang + r124: -Fix : LZ4F_compressBound() using NULL preferencesPtr -Updated : xxHash, to r37 +New : LZ4 HC streaming mode +Fixed : LZ4F_compressBound() using null preferencesPtr +Updated : xxHash to r38 +Updated library number, to 1.4.0 r123: Added : experimental lz4frame API, thanks to Takayuki Matsuoka and Christopher Jackson for testings diff --git a/lz4.c b/lz4.c index 198b581..579a4fc 100644 --- a/lz4.c +++ b/lz4.c @@ -31,63 +31,37 @@ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ + /************************************** Tuning parameters **************************************/ /* * HEAPMODE : * Select how default compression functions will allocate memory for their hash table, - * in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)). + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). */ #define HEAPMODE 0 +/* + * CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS : + * You can force the code to use unaligned memory access if you know your CPU can handle it. + */ +/* #define CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS 1 */ + /************************************** CPU Feature Detection **************************************/ -/* 32 or 64 bits ? */ -#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ - || defined(__64BIT__) || defined(__mips64) \ - || defined(__powerpc64__) || defined(__powerpc64le__) \ - || defined(__ppc64__) || defined(__ppc64le__) \ - || defined(__PPC64__) || defined(__PPC64LE__) \ - || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) \ - || defined(__s390x__) ) /* Detects 64 bits mode */ -# define LZ4_ARCH64 1 -#else -# define LZ4_ARCH64 0 -#endif -#define LZ4_32BITS (sizeof(void*)==4) -#define LZ4_64BITS (sizeof(void*)==8) - /* - * Little Endian or Big Endian ? - * Overwrite the #define below if you know your architecture endianess + * Unaligned memory access detection */ -#include /* Apparently required to detect endianess */ -#if defined (__GLIBC__) -# include -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 +#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ + || defined(__ARM_FEATURE_UNALIGNED) \ + || defined(__i386__) || defined(__x86_64__) \ + || defined(_M_IX86) || defined(_M_X64) +# define LZ4_UNALIGNED_ACCESS 1 #else -/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ -#endif - -/* - * Unaligned memory access is automatically enabled for "common" CPU, such as x86. - * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property - * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance - */ -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 +# define LZ4_UNALIGNED_ACCESS 0 #endif /* Define this parameter if your target system or compiler does not support hardware bit count */ @@ -95,18 +69,9 @@ # define LZ4_FORCE_SW_BITCOUNT #endif -/* - * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : - * This option may provide a small boost to performance for some big endian cpu, although probably modest. - * You may set this option to 1 if data will remain within closed environment. - * This option is useless on Little_Endian CPU (such as x86) - */ - -/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ - /************************************** - Compiler Options + Compiler Options **************************************/ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ /* "restrict" is a known keyword */ @@ -117,13 +82,6 @@ #ifdef _MSC_VER /* Visual Studio */ # define FORCE_INLINE static __forceinline # include /* For Visual 2005 */ -# if LZ4_ARCH64 /* 64-bits */ -# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ -# else /* 32-bits */ -# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ -# endif # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #else # ifdef __GNUC__ @@ -133,12 +91,6 @@ # endif #endif -#ifdef _MSC_VER /* Visual Studio */ -# define lz4_bswap16(x) _byteswap_ushort(x) -#else -# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) -#endif - #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) @@ -185,37 +137,167 @@ typedef unsigned long long U64; #endif -#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif +/************************************** + Reading and writing into memory +**************************************/ +static unsigned LZ4_64bits(void) { return sizeof(void*)==8; } -typedef struct { U16 v; } _PACKED U16_S; -typedef struct { U32 v; } _PACKED U32_S; -typedef struct { U64 v; } _PACKED U64_S; -typedef struct {size_t v;} _PACKED size_t_S; +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# if defined(__SUNPRO_C) || defined(__SUNPRO_CC) -# pragma pack(0) -# else -# pragma pack(pop) -# endif -#endif +static U16 LZ4_readLE16(const void* memPtr) +{ + if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) + return *(U16*)memPtr; + { + const BYTE* p = memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) + { + *(U16*)memPtr = value; + return; + } + { + BYTE* p = memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +static U32 LZ4_readLE32(const void* memPtr) +{ + if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) + return *(U32*)memPtr; + { + const BYTE* p = memPtr; + U32 result = (U32)((U32)p[0] + (p[1]<<8) + (p[2]<<16) + ((U32)p[3]<<24)); + return result; + } +} + +/* +static void LZ4_writeLE32(void* memPtr, U32 value) +{ + BYTE* p = memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + p[2] = (BYTE)(value>>16); + p[3] = (BYTE)(value>>24); +} +*/ + +static void LZ4_copy4(void* dstPtr, const void* srcPtr) +{ + if (LZ4_UNALIGNED_ACCESS) + { + *(U32*)dstPtr = *(U32*)srcPtr; + return; + } + { + BYTE* d = dstPtr; + const BYTE* s = srcPtr; + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + } +} + +static U64 LZ4_readLE64(const void* memPtr) +{ + if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) + return *(U64*)memPtr; + { + const BYTE* p = memPtr; + return (U64)((U64)p[0] + (p[1]<<8) + (p[2]<<16) + ((U64)p[3]<<24) + + (((U64)p[4])<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +/* +static void LZ4_writeLE64(void* memPtr, U64 value) +{ + BYTE* p = memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + p[2] = (BYTE)(value>>16); + p[3] = (BYTE)(value>>24); + p[4] = (BYTE)(value>>32); + p[5] = (BYTE)(value>>40); + p[6] = (BYTE)(value>>48); + p[7] = (BYTE)(value>>56); +} +*/ + +static void LZ4_copy8(void* dstPtr, const void* srcPtr) +{ + if (LZ4_UNALIGNED_ACCESS) + { + if (LZ4_64bits()) + *(U64*)dstPtr = *(U64*)srcPtr; + else + ((U32*)dstPtr)[0] = ((U32*)srcPtr)[0], + ((U32*)dstPtr)[1] = ((U32*)srcPtr)[1]; + return; + } + { + BYTE* d = dstPtr; + const BYTE* s = srcPtr; + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + d[4] = s[4]; + d[5] = s[5]; + d[6] = s[6]; + d[7] = s[7]; + } +} -#define A16(x) (((U16_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A64(x) (((U64_S *)(x))->v) -#define AARCH(x) (((size_t_S *)(x))->v) +#define STEPSIZE sizeof(size_t) + +static size_t LZ4_readLE_ARCH(const void* p) +{ + if (LZ4_64bits()) + return (size_t)LZ4_readLE64(p); + else + return (size_t)LZ4_readLE32(p); +} + +/* +static void LZ4_writeLE_ARCH(void* p, size_t value) +{ + if (LZ4_64BITS) + LZ4_writeLE64(p, (U64)value); + else + LZ4_writeLE32(p, (U32)value); +} + +static void LZ4_copyARCH(void* dstPtr, const void* srcPtr) +{ + if (LZ4_64BITS) + LZ4_copy8(dstPtr, srcPtr); + else + LZ4_copy4(dstPtr, srcPtr); +} +*/ + +#if !defined(__GNUC__) +# define LZ4_WILDCOPY(d,s,e) { do { LZ4_copy8(d,s); d+=8; s+=8; } while (d=e; */ +#else +# define LZ4_WILDCOPY64(d,s,e) { do { LZ4_copy8(d,s); d+=8; s+=8; } while (d=e; */ +# define LZ4_WILDCOPY32(d,s,e) { if (likely(e-d <= 8)) { LZ4_copy8(d,s); d+=8; s+=8; } else do { LZ4_copy8(d,s); d+=8; s+=8; } while (d=e; */ -#else -# define LZ4_WILDCOPY(d,s,e) { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); -# else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif -# else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + if (LZ4_64bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; - _BitScanForward64( &r, val ); + _BitScanForward64( &r, (U64)val ); return (int)(r>>3); # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); + return (__builtin_ctzll((U64)val) >> 3); # else static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; # endif -# endif -} - -#else - -static int LZ4_NbCommonBytes (register U32 val) -{ -# if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); -# else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif -# else + } + /* 32 bits */ + { # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r; - _BitScanForward( &r, val ); + _BitScanForward( &r, (U32)val ); return (int)(r>>3); # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); + return (__builtin_ctz((U32)val) >> 3); # else static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; # endif -# endif + } } -#endif - - -/******************************** - Compression functions -********************************/ -int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } -int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -static int LZ4_hashSequence(U32 sequence, tableType_t tableType) +static U32 LZ4_hashSequence(U32 sequence, tableType_t tableType) { if (tableType == byU16) return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); @@ -380,15 +403,15 @@ static int LZ4_hashSequence(U32 sequence, tableType_t tableType) return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); } -static int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); } +static U32 LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(LZ4_readLE32(p), tableType); } static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) { switch (tableType) { - case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; } + case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } } } @@ -417,13 +440,13 @@ static unsigned LZ4_count(const BYTE* pIn, const BYTE* pRef, const BYTE* pInLimi while (likely(pIn=lowRefLimit) : 1) && (ref+MAX_DISTANCE>=ip) - && (A32(ref+refDelta)==A32(ip)) ) + && (LZ4_readLE32(ref+refDelta)==LZ4_readLE32(ip)) ) { token=op++; *token=0; goto _next_match; } /* Prepare next loop */ @@ -646,16 +669,16 @@ _last_literals: int LZ4_compress(const char* source, char* dest, int inputSize) { #if (HEAPMODE) - void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U32, 4); /* Aligned on 4-bytes boundaries */ + void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 8); /* Aligned on 8-bytes boundaries */ #else - U32 ctx[LZ4_STREAMSIZE_U32] = {0}; /* Ensure data is aligned on 4-bytes boundaries */ + U64 ctx[LZ4_STREAMSIZE_U64] = {0}; /* Ensure data is aligned on 8-bytes boundaries */ #endif int result; if (inputSize < (int)LZ4_64KLIMIT) result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); #if (HEAPMODE) FREEMEM(ctx); @@ -666,16 +689,16 @@ int LZ4_compress(const char* source, char* dest, int inputSize) int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { #if (HEAPMODE) - void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U32, 4); /* Aligned on 4-bytes boundaries */ + void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 4); /* Aligned on 8-bytes boundaries */ #else - U32 ctx[LZ4_STREAMSIZE_U32] = {0}; /* Ensure data is aligned on 4-bytes boundaries */ + U64 ctx[LZ4_STREAMSIZE_U64] = {0}; /* Ensure data is aligned on 8-bytes boundaries */ #endif int result; if (inputSize < (int)LZ4_64KLIMIT) result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); #if (HEAPMODE) FREEMEM(ctx); @@ -700,7 +723,7 @@ void LZ4_resetStream (LZ4_stream_t* LZ4_stream) LZ4_stream_t* LZ4_createStream(void) { - LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(4, LZ4_STREAMSIZE_U32); + LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64); LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ LZ4_resetStream(lz4s); return lz4s; @@ -959,11 +982,12 @@ FORCE_INLINE int LZ4_decompress_generic( LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; /* get offset */ - LZ4_READ_LITTLEENDIAN_16(match,cpy,ip); ip+=2; + match = cpy - LZ4_readLE16(ip); ip+=2; if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */ /* get matchlength */ - if ((length=(token&ML_MASK)) == ML_MASK) + length = token & ML_MASK; + if (length == ML_MASK) { unsigned s; do @@ -1020,9 +1044,9 @@ FORCE_INLINE int LZ4_decompress_generic( op[2] = match[2]; op[3] = match[3]; match += dec32table[op-match]; - A32(op+4) = A32(match); + LZ4_copy4(op+4, match); op += 8; match -= dec64; - } else { LZ4_COPY8(op,match); } + } else { LZ4_copy8(op, match); op+=8; match+=8; } if (unlikely(cpy>oend-12)) { @@ -1079,7 +1103,7 @@ typedef struct */ LZ4_streamDecode_t* LZ4_createStreamDecode(void) { - LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(sizeof(U32), LZ4_STREAMDECODESIZE_U32); + LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(sizeof(U64), LZ4_STREAMDECODESIZE_U64); return lz4s; } @@ -1241,7 +1265,7 @@ int LZ4_resetStreamState(void* state, const char* inputBuffer) void* LZ4_create (const char* inputBuffer) { - void* lz4ds = ALLOCATOR(4, LZ4_STREAMSIZE_U32); + void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64); LZ4_init ((LZ4_stream_t_internal*)lz4ds, (const BYTE*)inputBuffer); return lz4ds; } @@ -1267,7 +1291,7 @@ int LZ4_compress_withState (void* state, const char* source, char* dest, int inp if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); else - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); } int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize) @@ -1278,7 +1302,7 @@ int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); else - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); } /* Obsolete streaming decompression functions */ diff --git a/lz4.h b/lz4.h index 0350f6a..f995b05 100644 --- a/lz4.h +++ b/lz4.h @@ -172,14 +172,14 @@ int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedS Experimental Streaming Compression Functions ***********************************************/ -#define LZ4_STREAMSIZE_U32 ((1 << (LZ4_MEMORY_USAGE-2)) + 8) -#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U32 * sizeof(unsigned int)) +#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) +#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long)) /* * LZ4_stream_t * information structure to track an LZ4 stream. * important : init this structure content before first use ! */ -typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t; +typedef struct { unsigned long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t; /* * LZ4_resetStream @@ -234,14 +234,14 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_streamPtr, char* safeBuffer, int dictSize); Experimental Streaming Decompression Functions ************************************************/ -#define LZ4_STREAMDECODESIZE_U32 8 -#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int)) +#define LZ4_STREAMDECODESIZE_U64 4 +#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) /* * LZ4_streamDecode_t * information structure to track an LZ4 stream. * important : init this structure content using LZ4_setStreamDecode or memset() before first use ! */ -typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t; +typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t; /* * If you prefer dynamic allocation methods, diff --git a/lz4hc.h b/lz4hc.h index 23ad7c1..26ba6d3 100644 --- a/lz4hc.h +++ b/lz4hc.h @@ -105,9 +105,9 @@ They just use the externally allocated memory for state instead of allocating th /************************************** Experimental Streaming Functions **************************************/ -#define LZ4_STREAMHCSIZE_U32 65548 -#define LZ4_STREAMHCSIZE (LZ4_STREAMHCSIZE_U32 * sizeof(unsigned int)) -typedef struct { unsigned int table[LZ4_STREAMHCSIZE_U32]; } LZ4_streamHC_t; +#define LZ4_STREAMHCSIZE_U64 32774 +#define LZ4_STREAMHCSIZE (LZ4_STREAMHCSIZE_U64 * sizeof(unsigned long long)) +typedef struct { unsigned long long table[LZ4_STREAMHCSIZE_U64]; } LZ4_streamHC_t; /* This structure allows static allocation of LZ4 HC streaming state. diff --git a/xxhash.c b/xxhash.c index e6c2f31..24a64b5 100644 --- a/xxhash.c +++ b/xxhash.c @@ -84,11 +84,11 @@ You can contact the author at : // Modify the local functions below should you wish to use some other memory routines // for malloc(), free() #include -FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } -FORCE_INLINE void XXH_free (void* p) { free(p); } +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } // for memcpy() #include -FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } @@ -221,28 +221,28 @@ static const int one = 1; //**************************** typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); else - return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); + return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr); } -FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } -FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); else - return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr); + return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr); } -FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } @@ -256,7 +256,7 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align) +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (p==NULL) @@ -361,7 +361,7 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (p==NULL) @@ -509,8 +509,8 @@ typedef struct U32 v2; U32 v3; U32 v4; + U32 mem32[4]; /* defined as U32 for alignment */ U32 memsize; - char memory[16]; } XXH_istate32_t; typedef struct @@ -521,8 +521,8 @@ typedef struct U64 v2; U64 v3; U64 v4; + U64 mem64[4]; /* defined as U64 for alignment */ U32 memsize; - char memory[32]; } XXH_istate64_t; @@ -592,16 +592,16 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v if (state->memsize + len < 16) // fill in tmp buffer { - XXH_memcpy(state->memory + state->memsize, input, len); + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) // some data left from previous update { - XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { - const U32* p32 = (const U32*)state->memory; + const U32* p32 = state->mem32; state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; @@ -633,19 +633,19 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v do { - v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v1 += XXH_readLE32(p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v2 += XXH_readLE32(p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v3 += XXH_readLE32(p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v4 += XXH_readLE32(p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; @@ -660,7 +660,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v if (p < bEnd) { - XXH_memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->mem32, p, bEnd-p); state->memsize = (int)(bEnd-p); } @@ -682,8 +682,8 @@ XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t l FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) { XXH_istate32_t* state = (XXH_istate32_t*) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; + const BYTE * p = (const BYTE*)state->mem32; + BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->total_len >= 16) @@ -699,7 +699,7 @@ FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endiane while (p+4<=bEnd) { - h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 += XXH_readLE32(p, endian) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4; p+=4; } @@ -746,16 +746,16 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v if (state->memsize + len < 32) // fill in tmp buffer { - XXH_memcpy(state->memory + state->memsize, input, len); + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) // some data left from previous update { - XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); { - const U64* p64 = (const U64*)state->memory; + const U64* p64 = state->mem64; state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; state->v1 = XXH_rotl64(state->v1, 31); state->v1 *= PRIME64_1; @@ -787,19 +787,19 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v do { - v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v1 += XXH_readLE64(p, endian) * PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; p+=8; - v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v2 += XXH_readLE64(p, endian) * PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; p+=8; - v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v3 += XXH_readLE64(p, endian) * PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; p+=8; - v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v4 += XXH_readLE64(p, endian) * PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; p+=8; @@ -814,7 +814,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v if (p < bEnd) { - XXH_memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->mem64, p, bEnd-p); state->memsize = (int)(bEnd-p); } @@ -836,8 +836,8 @@ XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t l FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) { XXH_istate64_t * state = (XXH_istate64_t *) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; + const BYTE * p = (const BYTE*)state->mem64; + BYTE* bEnd = (BYTE*)state->mem64 + state->memsize; U64 h64; if (state->total_len >= 32) @@ -882,7 +882,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endiane while (p+8<=bEnd) { - U64 k1 = XXH_readLE64((const U64*)p, endian); + U64 k1 = XXH_readLE64(p, endian); k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; @@ -893,7 +893,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endiane if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1; + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } -- cgit v0.12