From e450018588560537c2c4b4b2dd3515a9ef3a83f7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 21 Oct 2014 00:12:55 +0100 Subject: LZ4 HC : fixed small dictionary streaming compression --- lz4.c | 6 +++--- lz4hc.c | 26 ++++++++++++------------ programs/fuzzer.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 70 insertions(+), 21 deletions(-) mode change 100644 => 100755 lz4.c mode change 100644 => 100755 lz4hc.c mode change 100644 => 100755 programs/fuzzer.c diff --git a/lz4.c b/lz4.c old mode 100644 new mode 100755 index 39f176f..8adf96a --- a/lz4.c +++ b/lz4.c @@ -876,8 +876,8 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) * in order to remove useless branches during compilation optimization. */ FORCE_INLINE int LZ4_decompress_generic( - const char* source, - char* dest, + const char* const source, + char* const dest, int inputSize, int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */ @@ -885,7 +885,7 @@ FORCE_INLINE int LZ4_decompress_generic( int partialDecoding, /* full, partial */ int targetOutputSize, /* only used if partialDecoding==partial */ int dict, /* noDict, withPrefix64k, usingExtDict */ - const char* dictStart, /* only if dict==usingExtDict */ + const char* const dictStart, /* only if dict==usingExtDict */ int dictSize /* note : = 0 if noDict */ ) { diff --git a/lz4hc.c b/lz4hc.c old mode 100644 new mode 100755 index 72739a7..38e0ce4 --- a/lz4hc.c +++ b/lz4hc.c @@ -346,13 +346,13 @@ FORCE_INLINE void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* base) { MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); - hc4->nextToUpdate = 1; - hc4->base = base; + hc4->nextToUpdate = 64 KB; + hc4->base = base - 64 KB; hc4->inputBuffer = base; hc4->end = base; - hc4->dictBase = base; - hc4->dictLimit = 0; - hc4->lowLimit = 0; + hc4->dictBase = base - 64 KB; + hc4->dictLimit = 64 KB; + hc4->lowLimit = 64 KB; } @@ -422,17 +422,17 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, // I const BYTE* const base = hc4->base; const BYTE* const dictBase = hc4->dictBase; const U32 dictLimit = hc4->dictLimit; + const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); U32 matchIndex; - const U32 idxLow = (ip-base) > 64 KB ? (U32)(ip - base) - 64 KB : 0; const BYTE* match; int nbAttempts=maxNbAttempts; size_t ml=0; - /* HC4 match finder */ + /* HC4 match finder */ LZ4HC_Insert(hc4, ip); matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - while ((matchIndex>idxLow) && (nbAttempts)) + while ((matchIndex>=lowLimit) && (nbAttempts)) { nbAttempts--; if (matchIndex >= dictLimit) @@ -480,19 +480,19 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( U32* const HashTable = hc4->hashTable; const BYTE* const base = hc4->base; const U32 dictLimit = hc4->dictLimit; - const U32 dictLowLimit = hc4->lowLimit; + const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); const BYTE* const dictBase = hc4->dictBase; const BYTE* match; U32 matchIndex; - const U32 idxLow = (ip-base) > 64 KB ? (U32)(ip-base) - 64 KB : 0; int nbAttempts = maxNbAttempts; int delta = (int)(ip-iLowLimit); - /* First Match */ + + /* First Match */ LZ4HC_Insert(hc4, ip); matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - while ((matchIndex>idxLow) && (nbAttempts)) + while ((matchIndex>=lowLimit) && (nbAttempts)) { nbAttempts--; if (matchIndex >= dictLimit) @@ -527,7 +527,7 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) mlt += LZ4HC_CommonLength(ip+mlt, base+dictLimit, iHighLimit); - while ((ip+back > iLowLimit) && (matchIndex+back > dictLowLimit) && (ip[back-1] == match[back-1])) back--; + while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--; mlt -= back; if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } } diff --git a/programs/fuzzer.c b/programs/fuzzer.c old mode 100644 new mode 100755 index cf56251..225361b --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -673,7 +673,7 @@ static void FUZ_unitTests(void) const unsigned cycleNb= 0; char testInput[testInputSize]; char testCompressed[testCompressedSize]; - char testVerify[testCompressedSize]; + char testVerify[testInputSize]; U32 randState = 0; // Init @@ -719,7 +719,7 @@ static void FUZ_unitTests(void) crcNew = XXH64(testVerify, testCompressedSize, 0); FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption"); - // dictionary multi compression test + // multiple HC compression test with dictionary { int result1, result2; int segSize = testCompressedSize / 2; @@ -727,9 +727,9 @@ static void FUZ_unitTests(void) LZ4_resetStreamHC(&sHC, 0); LZ4_loadDictHC(&sHC, testInput, segSize); result1 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + segSize, testCompressed, segSize, segSize -1); - FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); - result2 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize -1); - FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); + FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result1); + result2 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize-1); + FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result2); result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result1, segSize, testInput, segSize); FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 1 failed"); @@ -750,6 +750,55 @@ static void FUZ_unitTests(void) FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe_usingDict() decompression failed following remote dictionary HC compression test"); crcNew = XXH64(testVerify, testCompressedSize, 0); FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() decompression corruption"); + + // multiple HC compression with ext. dictionary + { + XXH64_state_t crcOrigState; + XXH64_state_t crcNewState; + const char* dict = testInput + 3; + int dictSize = (FUZ_rand(&randState) & 8191); + char* dst = testVerify; + + size_t segStart = dictSize + 7; + int segSize = (FUZ_rand(&randState) & 8191); + int segNb = 1; + + LZ4_resetStreamHC(&sHC, 0); + LZ4_loadDictHC(&sHC, dict, dictSize); + + XXH64_reset(&crcOrigState, 0); + XXH64_reset(&crcNewState, 0); + + while (segStart + segSize < testInputSize) + { + XXH64_update(&crcOrigState, testInput + segStart, segSize); + crcOrig = XXH64_digest(&crcOrigState); + result = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + segStart, testCompressed, segSize, LZ4_compressBound(segSize)); + FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); + + result = LZ4_decompress_safe_usingDict(testCompressed, dst, result, segSize, dict, dictSize); + FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe_usingDict() dictionary decompression part %i failed", segNb); + XXH64_update(&crcNewState, dst, segSize); + crcNew = XXH64_digest(&crcNewState); + if (crcOrig!=crcNew) + { + size_t c=0; + while (dst[c] == testInput[segStart+c]) c++; + DISPLAY("Bad decompression at %u / %u \n", (U32)c, (U32)segSize); + } + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() part %i corruption", segNb); + + dict = dst; + //dict = testInput + segStart; + dictSize = segSize; + + dst += segSize + 1; + segNb ++; + + segStart += segSize + (FUZ_rand(&randState) & 0xF) + 1; + segSize = (FUZ_rand(&randState) & 8191); + } + } } printf("All unit tests completed succesfully \n"); -- cgit v0.12