From 633c51904ebc1e3d3f51d4793d24f4f39ce579cf Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Sat, 2 Mar 2013 23:34:21 +0000 Subject: - New cmake file, by Nobuhiro Iwamatsu, which can also produce shared and static libraries. - Improved decoding speed, even more for 64-bits, and "safe" version - Slight speed increase for LZ4-HC - Pushed a useless parameter down the list in lz4.c git-svn-id: https://lz4.googlecode.com/svn/trunk@90 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- bench.c | 3 +- cmake/CMakeLists.txt | 72 ++++++++++++++++++++++++++++------ cmake/release_COPYING.txt | 21 ++++++++++ fuzzer.c | 7 ++-- lz4.c | 98 +++++++++++++++++++++++++++++++---------------- lz4hc.c | 53 +++++++++++++++---------- 6 files changed, 184 insertions(+), 70 deletions(-) create mode 100644 cmake/release_COPYING.txt diff --git a/bench.c b/bench.c index 2758007..d185811 100644 --- a/bench.c +++ b/bench.c @@ -433,7 +433,8 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) while(BMK_GetMilliSpan(milliTime) < TIMELOOP) { for (chunkNb=0; chunkNb> 16; - for (i = 0; i < NB_ATTEMPTS; i++) { + for (i = 0; i < NB_ATTEMPTS; i++) + { printf("\r%7i /%7i\r", i, NB_ATTEMPTS); FUZ_rand(&seed); @@ -171,7 +172,7 @@ int main() { // Test decoding with output size being exactly what's necessary => must work ret = LZ4_uncompress((char*)&cbuf[off_full], (char*)testOut, LEN); - if (ret<0) { printf("decompression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + if (ret<0) { printf("decompression failed despite correct space: seed %u, len %d\n", seed, LEN); goto _output_error; } // Test decoding with one byte missing => must fail ret = LZ4_uncompress((char*)&cbuf[off_full], (char*)testOut, LEN-1); @@ -185,7 +186,7 @@ int main() { ret = LZ4_uncompress_unknownOutputSize((char*)&cbuf[off_full], (char*)testOut, len, LEN+1); if (ret<0) { printf("decompression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } - // Test decoding with output size being exactly what's necessary => should work + // Test decoding with output size being exactly what's necessary => must work ret = LZ4_uncompress_unknownOutputSize((char*)&cbuf[off_full], (char*)testOut, len, LEN); if (ret<0) { printf("decompression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } diff --git a/lz4.c b/lz4.c index a35f12b..1f2eafd 100644 --- a/lz4.c +++ b/lz4.c @@ -41,14 +41,6 @@ // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache #define MEMORY_USAGE 14 -// NOTCOMPRESSIBLE_DETECTIONLEVEL : -// Decreasing this value will make the algorithm skip faster data segments considered "incompressible" -// This may decrease compression ratio dramatically, but will be faster on incompressible data -// Increasing this value will make the algorithm search more before declaring a segment "incompressible" -// This could improve compression a bit, but will be slower on incompressible data -// The default value (6) is recommended -#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 - // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : // This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU. // You can set this option to 1 in situations where data will remain within closed environment @@ -188,6 +180,13 @@ typedef struct _U64_S { U64 v; } U64_S; #define HASHTABLESIZE (1 << HASH_LOG) #define HASH_MASK (HASHTABLESIZE - 1) +// NOTCOMPRESSIBLE_DETECTIONLEVEL : +// Decreasing this value will make the algorithm skip faster data segments considered "incompressible" +// This may decrease compression ratio dramatically, but will be faster on incompressible data +// Increasing this value will make the algorithm search more before declaring a segment "incompressible" +// This could improve compression a bit, but will be slower on incompressible data +// The default value (6) is recommended +#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2) #define STACKLIMIT 13 #define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()). @@ -309,7 +308,7 @@ static inline int LZ4_NbCommonBytes (register U32 val) #endif #else #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; + unsigned long r; _BitScanForward( &r, val ); return (int)(r>>3); #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -358,7 +357,7 @@ static inline int LZ4_compressCtx(void** ctx, BYTE* op = (BYTE*) dest; BYTE* const oend = op + maxOutputSize; - int len, length; + int length; const int skipStrength = SKIPSTRENGTH; U32 forwardH; @@ -430,7 +429,14 @@ static inline int LZ4_compressCtx(void** ctx, } else *token = (length<=(int)RUN_MASK) { *token=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } + if (length>=(int)RUN_MASK) + { + int len; + *token=(RUN_MASK< 254 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } else *token = (length<>8) > oend) return 0; // Check output limit - if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } - else *token += len; + length = (int)(ip - anchor); + if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit + if (length>=(int)ML_MASK) + { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } + if (length > 254) { length-=255; *op++ = 255; } + *op++ = (BYTE)length; + } + else *token += length; // Test end of chunk if (ip > mflimit) { anchor = ip; break; } @@ -713,7 +726,6 @@ int LZ4_uncompress(const char* source, unsigned token; - size_t length; size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; #if LZ4_ARCH64 size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; @@ -723,13 +735,15 @@ int LZ4_uncompress(const char* source, // Main Loop while (1) { + size_t length; + // get runlength token = *ip++; if ((length=(token>>ML_BITS)) == RUN_MASK) { size_t len; for (;(len=*ip++)==255;length+=255){} length += len; } // copy literals cpy = op+length; - if unlikely(cpy>oend-COPYLENGTH) + if (cpy>oend-COPYLENGTH) { if (cpy != oend) goto _output_error; // Error : not enough place for another match (min 4) + 5 literals memcpy(op, ip, length); @@ -740,7 +754,7 @@ int LZ4_uncompress(const char* source, // get offset LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; - if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset create reference outside destination buffer + if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset outside destination buffer // get matchlength if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; } @@ -762,16 +776,17 @@ int LZ4_uncompress(const char* source, op += STEPSIZE-4; ref -= dec64; } else { LZ4_COPYSTEP(ref,op); } cpy = op + length - (STEPSIZE-4); - if (cpy>oend-COPYLENGTH) + + if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) { - if (cpy > oend) goto _output_error; // Error : request to write beyond destination buffer + if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); while(op>ML_BITS)) == RUN_MASK) { int s=255; while ((ip>ML_BITS)) == RUN_MASK) + { + int s=255; + while (likely(ipoend-COPYLENGTH) || (ip+length>iend-COPYLENGTH)) + if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) { if (cpy > oend) goto _output_error; // Error : writes beyond output buffer - if (ip+length != iend) goto _output_error; // Error : LZ4 format requires to consume all input at this stage + if (ip+length != iend) goto _output_error; // Error : LZ4 format requires to consume all input at this stage (no match within the last 11 bytes, and at least 8 remaining input bytes for another match+literals) memcpy(op, ip, length); op += length; break; // Necessarily EOF, due to parsing restrictions @@ -829,10 +851,19 @@ int LZ4_uncompress_unknownOutputSize( // get offset LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; - if (ref < (BYTE* const)dest) goto _output_error; // Error : offset creates reference outside of destination buffer + if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset outside of destination buffer // get matchlength - if ((length=(token&ML_MASK)) == ML_MASK) { while (ipoend-COPYLENGTH) + + if unlikely(cpy>oend-(COPYLENGTH+(STEPSIZE-4))) { - if (cpy > oend) goto _output_error; // Error : request to write outside of destination buffer + if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); while(opbase); int nbAttempts=MAX_NB_ATTEMPTS; - size_t ml=0; + size_t repl=0, ml=0; + U16 delta; // HC4 match finder LZ4HC_Insert(hc4, ip); ref = HASH_POINTER(ip); -#if 1 +#define REPEAT_OPTIMIZATION +#ifdef REPEAT_OPTIMIZATION + // Detect repetitive sequences of length <= 4 if (ref >= ip-4) // potential repetition { if (A32(ref) == A32(ip)) // confirmed { - const U16 delta = (U16)(ip-ref); - const BYTE* ptr = ip; - const BYTE* end; - ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - end = ip + ml - (MINMATCH-1); - while(ptr < end-delta) - { - DELTANEXT(ptr) = delta; // Pre-Load - ptr++; - } - do - { - DELTANEXT(ptr) = delta; - HashTable[HASH_VALUE(ptr)] = (ptr) - base; // Head of chain - ptr++; - } while(ptr < end); - hc4->nextToUpdate = end; + delta = (U16)(ip-ref); + repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; *matchpos = ref; } ref = GETNEXT(ref); } #endif - while ((ref >= ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts)) + while ((ref >= ip-MAX_DISTANCE) && (nbAttempts)) { nbAttempts--; if (*(ref+ml) == *(ip+ml)) @@ -423,6 +411,29 @@ forceinline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, ref = GETNEXT(ref); } +#ifdef REPEAT_OPTIMIZATION + // Complete table + if (repl) + { + const BYTE* ptr = ip; + const BYTE* end; + + end = ip + repl - (MINMATCH-1); + while(ptr < end-delta) + { + DELTANEXT(ptr) = delta; // Pre-Load + ptr++; + } + do + { + DELTANEXT(ptr) = delta; + HashTable[HASH_VALUE(ptr)] = (ptr) - base; // Head of chain + ptr++; + } while(ptr < end); + hc4->nextToUpdate = end; + } +#endif + return (int)ml; } @@ -440,7 +451,7 @@ forceinline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, LZ4HC_Insert(hc4, ip); ref = HASH_POINTER(ip); - while ((ref >= ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts)) + while ((ref >= ip-MAX_DISTANCE) && (nbAttempts)) { nbAttempts--; if (*(startLimit + longest) == *(ref - delta + longest)) -- cgit v0.12