From d239a23337e5eba41f557b48eb26f0db81b28f26 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 18 Oct 2014 11:18:14 +0100 Subject: updated LZ4HC API --- lz4.h | 10 +- lz4hc.c | 368 +++++++++++++++++++++++++-------------------------- lz4hc.h | 58 +++++++- programs/fullbench.c | 4 +- programs/fuzzer.c | 90 ++++++++++++- 5 files changed, 333 insertions(+), 197 deletions(-) diff --git a/lz4.h b/lz4.h index 44ada14..7ad736f 100644 --- a/lz4.h +++ b/lz4.h @@ -193,7 +193,7 @@ void LZ4_resetStream (LZ4_stream_t* LZ4_streamPtr); * LZ4_freeStream releases its memory. */ LZ4_stream_t* LZ4_createStream(void); -int LZ4_freeStream (LZ4_stream_t* LZ4_stream); +int LZ4_freeStream (LZ4_stream_t* LZ4_streamPtr); /* * LZ4_loadDict @@ -202,21 +202,21 @@ int LZ4_freeStream (LZ4_stream_t* LZ4_stream); * Loading a size of 0 is allowed. * Return : 1 if OK, 0 if error */ -int LZ4_loadDict (LZ4_stream_t* LZ4_stream, const char* dictionary, int dictSize); +int LZ4_loadDict (LZ4_stream_t* LZ4_streamPtr, const char* dictionary, int dictSize); /* * LZ4_compress_continue * Compress data block 'source', using blocks compressed before as dictionary to improve compression ratio * Previous data blocks are assumed to still be present at their previous location. */ -int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); +int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); /* * LZ4_compress_limitedOutput_continue * Same as before, but also specify a maximum target compressed size (maxOutputSize) * If objective cannot be met, compression exits, and returns a zero. */ -int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize); +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); /* * LZ4_saveDict @@ -227,7 +227,7 @@ int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* s * Return : dictionary size in bytes, or 0 if error * Note : any dictSize > 64 KB will be interpreted as 64KB. */ -int LZ4_saveDict (LZ4_stream_t* LZ4_stream, char* safeBuffer, int dictSize); +int LZ4_saveDict (LZ4_stream_t* LZ4_streamPtr, char* safeBuffer, int dictSize); /************************************************ diff --git a/lz4hc.c b/lz4hc.c index 34a6173..2cf494e 100644 --- a/lz4hc.c +++ b/lz4hc.c @@ -214,8 +214,8 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; #define MINLENGTH (MFLIMIT+1) #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define KB *(1U<<10) -#define MB *(1U<<20) +#define KB *(1<<10) +#define MB *(1<<20) #define GB *(1U<<30) @@ -226,16 +226,12 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; # define STEPSIZE 8 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) -# define AARCH A64 -# define HTYPE U32 -# define INITBASE(b,s) const BYTE* const b = s +# define AARCH A64 #else /* 32-bit */ # define STEPSIZE 4 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); -# define AARCH A32 -# define HTYPE U32 -# define INITBASE(b,s) const BYTE* const b = s +# define AARCH A32 #endif #if defined(LZ4_BIG_ENDIAN) @@ -252,18 +248,20 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; **************************************/ typedef struct { + U32 hashTable[HASHTABLESIZE]; + U16 chainTable[MAXD]; const BYTE* inputBuffer; const BYTE* base; const BYTE* end; - HTYPE hashTable[HASHTABLESIZE]; - U16 chainTable[MAXD]; - const BYTE* nextToUpdate; + U32 nextToUpdate; + U32 compressionLevel; } LZ4HC_Data_Structure; /************************************** Macros **************************************/ +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(!!(c)) }; } /* Visual : use only *after* variable declarations */ #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d> ((MINMATCH*8)-HASH_LOG)) @@ -342,87 +340,41 @@ FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) #endif -int LZ4_sizeofStreamStateHC() -{ - return sizeof(LZ4HC_Data_Structure); -} - -FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base) +FORCE_INLINE void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* base) { MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); - hc4->nextToUpdate = base + 1; + hc4->nextToUpdate = 1; hc4->base = base; hc4->inputBuffer = base; hc4->end = base; } -int LZ4_resetStreamStateHC(void* state, const char* inputBuffer) -{ - if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */ - LZ4_initHC((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer); - return 0; -} - - -void* LZ4_createHC (const char* inputBuffer) -{ - void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); - LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer); - return hc4; -} - - -int LZ4_freeHC (void* LZ4HC_Data) -{ - FREEMEM(LZ4HC_Data); - return (0); -} - /* Update chains up to ip (excluded) */ FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) { - U16* chainTable = hc4->chainTable; - HTYPE* HashTable = hc4->hashTable; - INITBASE(base,hc4->base); + U16* chainTable = hc4->chainTable; + U32* HashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + const U32 target = ip - base; + U32 idx = hc4->nextToUpdate; - while(hc4->nextToUpdate < ip) + while(idx < target) { - const BYTE* const p = hc4->nextToUpdate; - size_t delta = (p) - HASH_POINTER(p); + U32 h = HASH_VALUE(base+idx); + size_t delta = idx - HashTable[h]; if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; - DELTANEXT(p) = (U16)delta; - HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base); - hc4->nextToUpdate++; + chainTable[idx & 0xFFFF] = (U16)delta; + HashTable[h] = idx; + idx++; } -} - - -char* LZ4_slideInputBufferHC(void* LZ4HC_Data) -{ - LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data; - size_t distance = (hc4->end - 64 KB) - hc4->inputBuffer; - - if (hc4->end <= hc4->inputBuffer + 64 KB) return (char*)(hc4->end); /* no update : less than 64KB within buffer */ - distance = (distance >> 16) << 16; /* Must be a multiple of 64 KB */ - LZ4HC_Insert(hc4, hc4->end - MINMATCH); - memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB); - hc4->nextToUpdate -= distance; - hc4->base -= distance; - if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) /* Avoid overflow */ - { - int i; - hc4->base += 1 GB; - for (i=0; ihashTable[i] -= 1 GB; - } - hc4->end -= distance; - return (char*)(hc4->end); + hc4->nextToUpdate = target; } -FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) +static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) { const BYTE* p1t = p1; @@ -443,124 +395,79 @@ FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BY FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos, const int maxNbAttempts) { U16* const chainTable = hc4->chainTable; - HTYPE* const HashTable = hc4->hashTable; - const BYTE* ref; - INITBASE(base,hc4->base); + U32* const HashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + int matchIndex; + const int idxLow = (ip-base) > 64 KB ? (ip - base) - 64 KB : 0; + const BYTE* match; int nbAttempts=maxNbAttempts; - size_t repl=0, ml=0; - U16 delta=0; /* useless assignment, to remove an uninitialization warning */ + size_t ml=0; /* HC4 match finder */ LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); - -#define REPEAT_OPTIMIZATION -#ifdef REPEAT_OPTIMIZATION - /* Detect repetitive sequences of length <= 4 */ - if ((U32)(ip-ref) <= 4) /* potential repetition */ - { - if (A32(ref) == A32(ip)) /* confirmed */ - { - delta = (U16)(ip-ref); - repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - *matchpos = ref; - } - ref = GETNEXT(ref); - } -#endif + matchIndex = HashTable[HASH_VALUE(ip)]; - while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) + while ((matchIndex>idxLow) && (nbAttempts)) { nbAttempts--; - if (*(ref+ml) == *(ip+ml)) - if (A32(ref) == A32(ip)) - { - size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = ref; } - } - ref = GETNEXT(ref); - } - -#ifdef REPEAT_OPTIMIZATION - /* Complete table */ - if (repl) - { - const BYTE* ptr = ip; - const BYTE* end; - - end = ip + repl - (MINMATCH-1); - while(ptr < end-delta) + match = base + matchIndex; + if (*(match+ml) == *(ip+ml)) + if (A32(match) == A32(ip)) { - DELTANEXT(ptr) = delta; /* Pre-Load */ - ptr++; + size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { ml = mlt; *matchpos = match; } } - do - { - DELTANEXT(ptr) = delta; - HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); /* Head of chain */ - ptr++; - } while(ptr < end); - hc4->nextToUpdate = end; + matchIndex -= chainTable[matchIndex & 0xFFFF]; } -#endif return (int)ml; } -FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts) +FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( + LZ4HC_Data_Structure* hc4, + const BYTE* ip, + const BYTE* startLimit, + const BYTE* matchlimit, + int longest, + const BYTE** matchpos, + const BYTE** startpos, + const int maxNbAttempts) { - U16* const chainTable = hc4->chainTable; - HTYPE* const HashTable = hc4->hashTable; - INITBASE(base,hc4->base); - const BYTE* ref; + U16* const chainTable = hc4->chainTable; + U32* const HashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + const BYTE* match; + int matchIndex; + const int idxLow = (ip-base) > 64 KB ? (ip-base) - 64 KB : 0; int nbAttempts = maxNbAttempts; int delta = (int)(ip-startLimit); /* First Match */ LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); + matchIndex = HashTable[HASH_VALUE(ip)]; - while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) + while ((matchIndex>idxLow) && (nbAttempts)) { nbAttempts--; - if (*(startLimit + longest) == *(ref - delta + longest)) - if (A32(ref) == A32(ip)) + match = base + matchIndex; + if (*(startLimit + longest) == *(match - delta + longest)) + if (A32(match) == A32(ip)) { -#if 1 - const BYTE* reft = ref+MINMATCH; - const BYTE* ipt = ip+MINMATCH; - const BYTE* startt = ip; - - while (iptstartLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;} + while ((startt>startLimit) && (tmpMatch > hc4->inputBuffer) && (startt[-1] == tmpMatch[-1])) {startt--; tmpMatch--;} if ((ipt-startt) > longest) { longest = (int)(ipt-startt); - *matchpos = reft; + *matchpos = tmpMatch; *startpos = startt; } } - ref = GETNEXT(ref); + matchIndex -= chainTable[matchIndex & 0xFFFF]; } return longest; @@ -574,7 +481,7 @@ FORCE_INLINE int LZ4HC_encodeSequence ( BYTE** op, const BYTE** anchor, int matchLength, - const BYTE* ref, + const BYTE* match, limitedOutput_directive limitedOutputBuffer, BYTE* oend) { @@ -592,7 +499,7 @@ FORCE_INLINE int LZ4HC_encodeSequence ( LZ4_BLINDCOPY(*anchor, *op, length); /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref)); + LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-match)); /* Encode MatchLength */ length = (int)(matchLength-MINMATCH); @@ -629,7 +536,7 @@ static int LZ4HC_compress_generic ( BYTE* op = (BYTE*) dest; BYTE* const oend = op + maxOutputSize; - const int maxNbAttempts = compressionLevel > MAX_COMPRESSION_LEVEL ? 1 << MAX_COMPRESSION_LEVEL : compressionLevel ? 1<<(compressionLevel-1) : 1< MAX_COMPRESSION_LEVEL) compressionLevel = MAX_COMPRESSION_LEVEL; + if (compressionLevel == 0) compressionLevel = LZ4HC_DEFAULT_COMPRESSIONLEVEL; + maxNbAttempts = 1 << compressionLevel; /* Ensure blocks follow each other */ if (ip != ctx->end) return 0; ctx->end += inputSize; @@ -794,7 +705,6 @@ _Search3: ml2 = ml3; goto _Search3; - } /* Encode Last Literals */ @@ -814,28 +724,18 @@ _Search3: int LZ4_compressHC2(const char* source, char* dest, int inputSize, int compressionLevel) { - void* ctx = LZ4_createHC(source); - int result; - if (ctx==NULL) return 0; - - result = LZ4HC_compress_generic (ctx, source, dest, inputSize, 0, compressionLevel, noLimit); - - LZ4_freeHC(ctx); - return result; + LZ4HC_Data_Structure ctx; + LZ4HC_init(&ctx, (const BYTE*)source); + return LZ4HC_compress_generic (&ctx, source, dest, inputSize, 0, compressionLevel, noLimit); } int LZ4_compressHC(const char* source, char* dest, int inputSize) { return LZ4_compressHC2(source, dest, inputSize, 0); } int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel) { - void* ctx = LZ4_createHC(source); - int result; - if (ctx==NULL) return 0; - - result = LZ4HC_compress_generic (ctx, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput); - - LZ4_freeHC(ctx); - return result; + LZ4HC_Data_Structure ctx; + LZ4HC_init(&ctx, (const BYTE*)source); + return LZ4HC_compress_generic (&ctx, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput); } int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) @@ -853,7 +753,7 @@ int LZ4_sizeofStateHC() { return sizeof(LZ4HC_Data_Structure); } int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel) { if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ - LZ4_initHC ((LZ4HC_Data_Structure*)state, (const BYTE*)source); + LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)source); return LZ4HC_compress_generic (state, source, dest, inputSize, 0, compressionLevel, noLimit); } @@ -864,7 +764,7 @@ int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel) { if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ - LZ4_initHC ((LZ4HC_Data_Structure*)state, (const BYTE*)source); + LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)source); return LZ4HC_compress_generic (state, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput); } @@ -872,26 +772,124 @@ int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, c { return LZ4_compressHC2_limitedOutput_withStateHC (state, source, dest, inputSize, maxOutputSize, 0); } -/**************************** - Stream functions -****************************/ +/************************************** + Experimental Streaming Functions +**************************************/ +/* allocation */ +LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); } +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; }; -int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize) + +/* initialization */ +void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) { - return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, 0, noLimit); + LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= LZ4_STREAMHCSIZE); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ + ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL; + ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel; } -int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel) +int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize) { - return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit); + LZ4HC_init ((LZ4HC_Data_Structure*) LZ4_streamHCPtr, (const BYTE*) dictionary); + LZ4HC_Insert ((LZ4HC_Data_Structure*) LZ4_streamHCPtr, (const BYTE*)dictionary +(dictSize-3)); + ((LZ4HC_Data_Structure*) LZ4_streamHCPtr)->end = (const BYTE*)dictionary + dictSize; + return 1; +} + + +/* compression */ + +int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize) +{ + if (((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base == NULL) LZ4HC_init ((LZ4HC_Data_Structure*) LZ4_streamHCPtr, (const BYTE*) source); + return LZ4HC_compress_generic (LZ4_streamHCPtr, source, dest, inputSize, 0, ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel, noLimit); +} + +int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize) +{ + if (((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base == NULL) LZ4HC_init ((LZ4HC_Data_Structure*) LZ4_streamHCPtr, (const BYTE*) source); + return LZ4HC_compress_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel, limitedOutput); +} + + +/* dictionary saving */ + +int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) +{ + LZ4HC_Data_Structure* sp = (LZ4HC_Data_Structure*)LZ4_streamHCPtr; + if (dictSize > 64 KB) dictSize = 64 KB; + if (dictSize < 0) dictSize = 0; + if (dictSize > (sp->end - sp->base)) dictSize = sp->end - sp->base; + memcpy(safeBuffer, sp->end - dictSize, dictSize); + LZ4_loadDictHC(LZ4_streamHCPtr, safeBuffer, dictSize); + return dictSize; } + + +/*********************************** + Deprecated Streaming functions +***********************************/ +int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; } + +int LZ4_resetStreamStateHC(void* state, const char* inputBuffer) +{ + if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */ + LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer); + return 0; +} + +void* LZ4_createHC (const char* inputBuffer) +{ + void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); + LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer); + return hc4; +} + +int LZ4_freeHC (void* LZ4HC_Data) +{ + FREEMEM(LZ4HC_Data); + return (0); +} + +/* +int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize) +{ + return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, 0, noLimit); +} int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, 0, limitedOutput); } +*/ + +int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel) +{ + return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit); +} int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel) { return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput); } + +char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +{ + LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data; + size_t distance = (hc4->end - 64 KB) - hc4->inputBuffer; + + if (hc4->end <= hc4->inputBuffer + 64 KB) return (char*)(hc4->end); /* no update : less than 64KB within buffer */ + + distance = (distance >> 16) << 16; /* Must be a multiple of 64 KB */ + LZ4HC_Insert(hc4, hc4->end - MINMATCH); + memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB); + hc4->base -= distance; + if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) /* Avoid overflow */ + { + int i; + hc4->base += 1 GB; + for (i=0; ihashTable[i] -= 1 GB; + } + hc4->end -= distance; + return (char*)(hc4->end); +} diff --git a/lz4hc.h b/lz4hc.h index deb2394..29a05f5 100644 --- a/lz4hc.h +++ b/lz4hc.h @@ -74,7 +74,7 @@ int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize */ /* Note : -Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license) + Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license) */ @@ -101,13 +101,63 @@ They just use the externally allocated memory area instead of allocating their o */ + + +/************************************** + Experimental Streaming Functions +**************************************/ +#define LZ4_STREAMHCSIZE_U32 65546 +#define LZ4_STREAMHCSIZE (LZ4_STREAMHCSIZE_U32 * sizeof(unsigned int)) +typedef struct { unsigned int table[LZ4_STREAMHCSIZE_U32]; } LZ4_streamHC_t; + +/* +This structure allows static allocation of LZ4 HC streaming state. +State must then be initialized using LZ4_resetStreamHC() before first use. + +If you prefer dynamic allocation, please refer to functions below. +*/ + +LZ4_streamHC_t* LZ4_createStreamHC(void); +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr); + +/* +These functions create and release memory for LZ4 HC streaming state. +Newly created states are already initialized. +Existing state space can be re-used anytime using LZ4_resetStreamHC(). +*/ + +void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); +int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize); + +int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize); +int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int maxDictSize); + +/* +These functions compress data in successive blocks of any size, using previous blocks as dictionary. + +Before starting compression, state must be properly initialized, using LZ4_resetStreamHC(). +A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional). + +Then, use LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue() to compress each successive block. +They work like usual LZ4_compressHC() or LZ4_compressHC_limitedOutput(), but use previous memory blocks to improve compression. +Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression. + +If, for any reason, previous data block can't be preserved in memory for next block compression, +you can still preserve it by moving it to a safer place, +using LZ4_saveDictHC(). +*/ + + + /************************************** - Streaming Functions + Deprecated Streaming Functions **************************************/ /* Note : these streaming functions still follows the older model */ void* LZ4_createHC (const char* inputBuffer); -int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize); -int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize); +//int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize); +//int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize); char* LZ4_slideInputBufferHC (void* LZ4HC_Data); int LZ4_freeHC (void* LZ4HC_Data); diff --git a/programs/fullbench.c b/programs/fullbench.c index f34c68c..4caea16 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -313,12 +313,12 @@ static int local_LZ4_compressHC_limitedOutput(const char* in, char* out, int inS static int local_LZ4_compressHC_continue(const char* in, char* out, int inSize) { - return LZ4_compressHC_continue(ctx, in, out, inSize); + return LZ4_compressHC_continue((LZ4_streamHC_t*)ctx, in, out, inSize); } static int local_LZ4_compressHC_limitedOutput_continue(const char* in, char* out, int inSize) { - return LZ4_compressHC_limitedOutput_continue(ctx, in, out, inSize, LZ4_compressBound(inSize)); + return LZ4_compressHC_limitedOutput_continue((LZ4_streamHC_t*)ctx, in, out, inSize, LZ4_compressBound(inSize)); } static int local_LZ4F_compressFrame(const char* in, char* out, int inSize) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 283352c..d736b63 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -621,14 +621,102 @@ _output_error: } } +static const unsigned testInputSize = 128 KB; +static const unsigned testCompressedSize = 64 KB; static void FUZ_unitTests(void) { + const unsigned testNb = 0; + const unsigned seed = 0; + const unsigned cycleNb= 0; + char testInput[testInputSize]; + char testCompressed[testCompressedSize]; + char testVerify[testCompressedSize]; + U32 randState = 0; + + // Init + FUZ_fillCompressibleNoiseBuffer(testInput, testInputSize, 0.50, &randState); + + // 32-bits address space overflow test FUZ_AddressOverflow(); + + // LZ4 HC streaming tests + { + LZ4_streamHC_t* sp; + LZ4_streamHC_t sHC; + //XXH64_state_t xxh; + U64 crcOrig; + U64 crcNew; + int result; + + // Allocation test + sp = LZ4_createStreamHC(); + FUZ_CHECKTEST(sp==NULL, "LZ4_createStreamHC() allocation failed"); + LZ4_freeStreamHC(sp); + + // simple compression test + crcOrig = XXH64(testInput, testCompressedSize, 0); + LZ4_resetStreamHC(&sHC, 0); + result = LZ4_compressHC_limitedOutput_continue(&sHC, testInput, testCompressed, testCompressedSize, testCompressedSize-1); + FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed"); + + result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize); + FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed"); + crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); + + // simple dictionary compression test + crcOrig = XXH64(testInput + 64 KB, testCompressedSize, 0); + LZ4_resetStreamHC(&sHC, 0); + LZ4_loadDictHC(&sHC, testInput, 64 KB); + result = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1); + FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); + + result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, testCompressedSize, testInput, 64 KB); + FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() dictionary decompression failed"); + crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption"); + + // dictionary multi compression test + { + int result1, result2; + int segSize = testCompressedSize / 2; + crcOrig = XXH64(testInput + segSize, 64 KB, 0); + LZ4_resetStreamHC(&sHC, 0); + LZ4_loadDictHC(&sHC, testInput, segSize); + result1 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + segSize, testCompressed, segSize, segSize -1); + FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); + result2 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize -1); + FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); + + result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result1, segSize, testInput, segSize); + FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 1 failed"); + result = LZ4_decompress_safe_usingDict(testCompressed+result1, testVerify+segSize, result2, segSize, testInput, 2*segSize); + FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 2 failed"); + crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption"); + } + + // remote dictionary compression test + crcOrig = XXH64(testInput + 64 KB, testCompressedSize, 0); + LZ4_resetStreamHC(&sHC, 0); + LZ4_loadDictHC(&sHC, testInput, 32 KB); + result = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1); + //FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() remote dictionary failed : result = %i", result); + + result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, testCompressedSize, testInput, 32 KB); + //FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() dictionary decompression failed"); + crcNew = XXH64(testVerify, testCompressedSize, 0); + //FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption"); + } + + return; +_output_error: + exit(1); } -int FUZ_usage(void) +static int FUZ_usage(void) { DISPLAY( "Usage :\n"); DISPLAY( " %s [args]\n", programName); -- cgit v0.12