From 2f0a717a35abbc2117c446272c4c96b892e991fc Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 14 Jun 2014 16:56:24 +0100 Subject: LZ4 Streaming : check overlapping input/dictionary --- Makefile | 5 ++-- lz4.c | 88 +++++++++++++++++++++++++++++++++++++------------------ lz4.h | 23 +++++++++------ programs/Makefile | 2 +- programs/lz4cli.c | 2 +- 5 files changed, 79 insertions(+), 41 deletions(-) diff --git a/Makefile b/Makefile index ede6844..6baf5af 100644 --- a/Makefile +++ b/Makefile @@ -30,9 +30,10 @@ # - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c # ################################################################ -export RELEASE=rc118 +# Version numbers +export RELEASE=r118 LIBVER_MAJOR=1 -LIBVER_MINOR=0 +LIBVER_MINOR=2 LIBVER_PATCH=0 LIBVER=$(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH) diff --git a/lz4.c b/lz4.c index 9922f85..1977ef0 100644 --- a/lz4.c +++ b/lz4.c @@ -254,7 +254,7 @@ typedef struct { const BYTE* dictionary; const BYTE* bufferStart; U32 dictSize; -} LZ4_dict_t_internal; +} LZ4_stream_t_internal; typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive; typedef enum { byPtr, byU32, byU16 } tableType_t; @@ -416,7 +416,7 @@ static unsigned LZ4_count(const BYTE* pIn, const BYTE* pRef, const BYTE* pInLimi pIn += LZ4_NbCommonBytes(diff); return (unsigned)(pIn - pStart); } - if (LZ4_ARCH64) if ((pIn<(pInLimit-3)) && (A32(pRef) == A32(pIn))) { pIn+=4; pRef+=4; } + if (sizeof(void*)==8) if ((pIn<(pInLimit-3)) && (A32(pRef) == A32(pIn))) { pIn+=4; pRef+=4; } if ((pIn<(pInLimit-1)) && (A16(pRef) == A16(pIn))) { pIn+=2; pRef+=2; } if ((pIncurrentOffset; lowLimit = (const BYTE*)source - dictPtr->dictSize; - if (lowLimit < base) lowLimit = base; break; case usingExtDict: base = (const BYTE*)source - dictPtr->currentOffset; @@ -484,12 +483,12 @@ static int LZ4_compress_generic( /* Main Loop */ for ( ; ; ) { - const BYTE* forwardIp = ip; const BYTE* ref; BYTE* token; { - int step=1; - int searchMatchNb = (1U << skipStrength) + 3; + const BYTE* forwardIp = ip; + unsigned step=1; + unsigned searchMatchNb = (1U << skipStrength); /* Find a match */ do { @@ -497,9 +496,9 @@ static int LZ4_compress_generic( ip = forwardIp; forwardIp += step; step = searchMatchNb++ >> skipStrength; - if (unlikely (step>8)) step=8; // slows down uncompressible data; required for valid forwardIp + //if (step>8) step=8; // required for valid forwardIp ; slows down uncompressible data a bit - if (unlikely(ip > mflimit)) goto _last_literals; + if (unlikely(forwardIp > mflimit)) goto _last_literals; ref = LZ4_getPositionOnHash(h, ctx, tableType, base); if (dict==usingExtDict) @@ -547,13 +546,14 @@ _next_match: LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref)); /* Encode MatchLength */ - ref += refDelta; { unsigned matchLength; if ((dict==usingExtDict) && (lowLimit==dictionary)) { - const BYTE* limit = ip + (dictEnd-ref); + const BYTE* limit; + ref += refDelta; + limit = ip + (dictEnd-ref); if (limit > matchlimit) limit = matchlimit; matchLength = LZ4_count(ip+MINMATCH, ref+MINMATCH, limit); ip += MINMATCH + matchLength; @@ -689,13 +689,13 @@ int LZ4_free (void* LZ4_stream) int LZ4_loadDict (void* LZ4_dict, const char* dictionary, int dictSize) { - LZ4_dict_t_internal* dict = (LZ4_dict_t_internal*) LZ4_dict; + LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; const BYTE* p = (const BYTE*)dictionary; const BYTE* const dictEnd = p + dictSize; const BYTE* base; - LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_dict_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ - if (dict->initCheck) MEM_INIT(dict, 0, sizeof(LZ4_dict_t_internal)); + LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ + if (dict->initCheck) MEM_INIT(dict, 0, sizeof(LZ4_stream_t_internal)); /* Uninitialized structure detected */ if (dictSize < MINMATCH) { @@ -720,7 +720,7 @@ int LZ4_loadDict (void* LZ4_dict, const char* dictionary, int dictSize) } -void LZ4_renormDictT(LZ4_dict_t_internal* LZ4_dict, const BYTE* src) +void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) { if ((LZ4_dict->currentOffset > 0x80000000) || ((size_t)LZ4_dict->currentOffset > (size_t)src)) /* address space overflow */ @@ -742,13 +742,26 @@ void LZ4_renormDictT(LZ4_dict_t_internal* LZ4_dict, const BYTE* src) int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize) { - LZ4_dict_t_internal* streamPtr = (LZ4_dict_t_internal*)LZ4_stream; + LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream; const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; const BYTE* smallest = (const BYTE*) source; + if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */ if ((streamPtr->dictSize>0) && (smallest > dictEnd)) smallest = dictEnd; LZ4_renormDictT(streamPtr, smallest); + /* Check overlapping input/dictionary space */ + { + const BYTE* sourceEnd = (const BYTE*) source + inputSize; + if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) + { + streamPtr->dictionary = sourceEnd; + streamPtr->dictSize = dictEnd - sourceEnd; + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + } + } + if (dictEnd == (const BYTE*)source) { int result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, 0, notLimited, byU32, withPrefix64k); @@ -768,13 +781,26 @@ int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize) { - LZ4_dict_t_internal* streamPtr = (LZ4_dict_t_internal*)LZ4_stream; + LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream; const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; const BYTE* smallest = (const BYTE*) source; + if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */ if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd; LZ4_renormDictT(streamPtr, smallest); + /* Check overlapping input/dictionary space */ + { + const BYTE* sourceEnd = (const BYTE*) source + inputSize; + if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) + { + streamPtr->dictionary = sourceEnd; + streamPtr->dictSize = dictEnd - sourceEnd; + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + } + } + if (dictEnd == (const BYTE*)source) { int result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k); @@ -796,13 +822,13 @@ int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, c // Hidden debug function, to force separate dictionary mode int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize) { - LZ4_dict_t_internal* streamPtr = (LZ4_dict_t_internal*)LZ4_dict; + LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict; int result; const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; const BYTE* smallest = dictEnd; if (smallest > (const BYTE*) source) smallest = (const BYTE*) source; - LZ4_renormDictT((LZ4_dict_t_internal*)LZ4_dict, smallest); + LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest); result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict); @@ -816,7 +842,7 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* int LZ4_moveDict (void* LZ4_dict, char* safeBuffer, int dictSize) { - LZ4_dict_t_internal* dict = (LZ4_dict_t_internal*) LZ4_dict; + LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; const BYTE* previousDictEnd = dict->dictionary + dict->dictSize; if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */ @@ -866,9 +892,12 @@ FORCE_INLINE int LZ4_decompress_generic( BYTE* oexit = op + targetOutputSize; const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; - - /*const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; / static reduces speed for LZ4_decompress_safe() on GCC64 */ +#define OLD +#ifdef OLD + const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ +#else const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ +#endif static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; @@ -974,12 +1003,15 @@ FORCE_INLINE int LZ4_decompress_generic( op[1] = ref[1]; op[2] = ref[2]; op[3] = ref[3]; - /*op += 4, ref += 4; ref -= dec32table[op-ref]; +#ifdef OLD + op += 4, ref += 4; ref -= dec32table[op-ref]; A32(op) = A32(ref); - op += STEPSIZE-4; ref -= dec64;*/ + op += STEPSIZE-4; ref -= dec64; +#else ref += dec32table[op-ref]; A32(op+4) = A32(ref); op += STEPSIZE; ref -= dec64; +#endif } else { LZ4_COPYSTEP(op,ref); } cpy = op + length - (STEPSIZE-4); @@ -1060,7 +1092,7 @@ int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } -void LZ4_init(LZ4_dict_t_internal* lz4ds, const BYTE* base) +void LZ4_init(LZ4_stream_t_internal* lz4ds, const BYTE* base) { MEM_INIT(lz4ds->hashTable, 0, LZ4_STREAMSIZE); lz4ds->bufferStart = base; @@ -1069,20 +1101,20 @@ void LZ4_init(LZ4_dict_t_internal* lz4ds, const BYTE* base) int LZ4_resetStreamState(void* state, const char* inputBuffer) { if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ - LZ4_init((LZ4_dict_t_internal*)state, (const BYTE*)inputBuffer); + LZ4_init((LZ4_stream_t_internal*)state, (const BYTE*)inputBuffer); return 0; } void* LZ4_create (const char* inputBuffer) { void* lz4ds = ALLOCATOR(4, LZ4_STREAMSIZE_U32); - LZ4_init ((LZ4_dict_t_internal*)lz4ds, (const BYTE*)inputBuffer); + LZ4_init ((LZ4_stream_t_internal*)lz4ds, (const BYTE*)inputBuffer); return lz4ds; } char* LZ4_slideInputBuffer (void* LZ4_Data) { - LZ4_dict_t_internal* lz4ds = (LZ4_dict_t_internal*)LZ4_Data; + LZ4_stream_t_internal* lz4ds = (LZ4_stream_t_internal*)LZ4_Data; LZ4_moveDict((LZ4_stream_t*)LZ4_Data, (char*)lz4ds->bufferStart, 64 KB); diff --git a/lz4.h b/lz4.h index f2975d4..60928cd 100644 --- a/lz4.h +++ b/lz4.h @@ -157,20 +157,19 @@ LZ4_decompress_safe_partial() : int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize); -/************************************** - Experimental Streaming Functions -**************************************/ +/*********************************************** + Experimental Streaming Compression Functions +***********************************************/ #define LZ4_STREAMSIZE_U32 ((1 << (LZ4_MEMORY_USAGE-2)) + 8) #define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U32 * sizeof(unsigned int)) /* * LZ4_stream_t * information structure to track an LZ4 stream. - * set it to zero, or use LZ4_loadDict() to init it before first use. + * important : set this structure content to zero before first use ! */ typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t; - /* * If you prefer dynamic allocation methods, * LZ4_createStream @@ -180,10 +179,12 @@ typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t; void* LZ4_createStream(); int LZ4_free (void* LZ4_stream); + /* * LZ4_loadDict * Use this function to load a static dictionary into LZ4_stream. - * Loading a size of 0 is allowed and init the LZ4_stream_t structure. + * Any previous data will be forgotten, only 'dictionary' will remain in memory. + * Loading a size of 0 is allowed (same effect as init). * Return : 1 if OK, 0 if error */ int LZ4_loadDict (void* LZ4_stream, const char* dictionary, int dictSize); @@ -198,7 +199,7 @@ int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int /* * LZ4_compress_limitedOutput_continue * Same as before, but also specify a maximum target compressed size (maxOutputSize) - * If it cannot be met, compression exits, and return a zero. + * If objective cannot be met, compression exits, and returns a zero. */ int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize); @@ -213,6 +214,10 @@ int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, c int LZ4_moveDict (void* LZ4_stream, char* safeBuffer, int dictSize); +/************************************************ + Experimental Streaming Decompression Functions +************************************************/ + /* *_usingDict() : These decoding functions work the same as their "normal" versions, @@ -247,12 +252,12 @@ They are only provided here for compatibility with older user programs. int LZ4_uncompress (const char* source, char* dest, int outputSize); int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); -/* Obsolete external allocation functions */ +/* Obsolete functions for externally allocated state; use streaming interface instead */ int LZ4_sizeofState(void); int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); -/* Obsolete streaming functions */ +/* Obsolete streaming functions; use new streaming interface whenever possible */ void* LZ4_create (const char* inputBuffer); int LZ4_sizeofStreamState(void); int LZ4_resetStreamState(void* state, const char* inputBuffer); diff --git a/programs/Makefile b/programs/Makefile index 811dda2..6ec2788 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -30,7 +30,7 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ################################################################ -RELEASE=rc118 +RELEASE=r118 DESTDIR= PREFIX=/usr CC:=$(CC) diff --git a/programs/lz4cli.c b/programs/lz4cli.c index fd2721d..e6e6740 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -109,7 +109,7 @@ //**************************** #define COMPRESSOR_NAME "LZ4 Compression CLI" #ifndef LZ4_VERSION -# define LZ4_VERSION "v1.1.8" +# define LZ4_VERSION "v1.2.0" #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ4_VERSION, AUTHOR, __DATE__ -- cgit v0.12