From 56c2b79ed015d4b154d4bd3a9cab27e7c613ba51 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 13 Sep 2014 19:49:01 +0100 Subject: Frame decompression speed optimization --- lz4.c | 19 ++++++++++++++-- lz4frame.c | 64 ++++++++++++++++++++++++++++++++++++++++------------ programs/frametest.c | 9 ++++---- programs/fullbench.c | 14 ++++++++++-- 4 files changed, 83 insertions(+), 23 deletions(-) diff --git a/lz4.c b/lz4.c index 4e2026e..b39a91d 100644 --- a/lz4.c +++ b/lz4.c @@ -1153,14 +1153,29 @@ Advanced decoding functions : the dictionary must be explicitly provided within parameters */ +FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize) +{ + if ((dictStart+dictSize == source) && (dictSize >= (int)(64 KB - 1))) + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, NULL, 64 KB); + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, dictStart, dictSize); +} + int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) { - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize); + //return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize); + return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize); } int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) { - return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize); + //return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize); + return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize); +} + +/* debug function */ +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize); } diff --git a/lz4frame.c b/lz4frame.c index 0b54840..390af0e 100644 --- a/lz4frame.c +++ b/lz4frame.c @@ -407,7 +407,6 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d int (*compress)(void*, const char*, char*, int, int); - if (cctxPtr->cStage != 1) return -ERROR_GENERIC; if (dstMaxSize < LZ4F_compressBound(srcSize, &(cctxPtr->prefs))) return -ERROR_dstMaxSize_tooSmall; if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull; @@ -681,7 +680,6 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt /* validate */ if (version != 1) return -ERROR_GENERIC; /* Version Number, only supported value */ - //if (blockMode != blockIndependent) return -ERROR_GENERIC; /* Only supported blockMode for the time being */ if (blockChecksumFlag != 0) return -ERROR_GENERIC; /* Only supported value for the time being */ if (contentSizeFlag != 0) return -ERROR_GENERIC; /* Only supported value for the time being */ if (((FLG>>1)&_1BIT) != 0) return -ERROR_GENERIC; /* Reserved bit */ @@ -700,7 +698,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt if (contentChecksumFlag) XXH32_resetState(&(dctxPtr->xxh), 0); /* alloc */ - bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==blockLinked) * 64 KB); + bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==blockLinked) * 128 KB); if (bufferNeeded > dctxPtr->maxBufferSize) /* tmp buffers too small */ { FREEMEM(dctxPtr->tmpIn); @@ -711,10 +709,9 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt dctxPtr->tmpOutBuffer= ALLOCATOR(dctxPtr->maxBufferSize); if (dctxPtr->tmpOutBuffer== NULL) return -ERROR_GENERIC; } - dctxPtr->tmpOut = dctxPtr->tmpOutBuffer; - if (dctxPtr->frameInfo.blockMode==blockLinked) dctxPtr->tmpOut += 64 KB; + dctxPtr->dict = dctxPtr->tmpOutBuffer; dctxPtr->dictSize = 0; - dctxPtr->dict = dctxPtr->tmpOut; + dctxPtr->tmpOut = dctxPtr->tmpOutBuffer; return 7; } @@ -765,11 +762,46 @@ static void LZ4F_saveDict(LZ4F_dctx_internal_t* dctxPtr, const BYTE* decoded, si size_t preserveDictSize; if (newDictSize > 64 KB) newDictSize = 64 KB; preserveDictSize = 64 KB - newDictSize; - memmove(dctxPtr->tmpOutBuffer, dctxPtr->tmpOutBuffer + newDictSize, preserveDictSize); - memcpy(dctxPtr->tmpOutBuffer + preserveDictSize, decoded + decodedSize - newDictSize, newDictSize); - dctxPtr->dictSize += newDictSize; - if (dctxPtr->dictSize > 64 KB) dctxPtr->dictSize = 64 KB; - dctxPtr->dict = dctxPtr->tmpOut - dctxPtr->dictSize; + if (preserveDictSize > dctxPtr->dictSize) preserveDictSize = dctxPtr->dictSize; + + memmove(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveDictSize, preserveDictSize); + memmove(dctxPtr->tmpOutBuffer + preserveDictSize, decoded + decodedSize - newDictSize, newDictSize); + + dctxPtr->dict = dctxPtr->tmpOutBuffer; + dctxPtr->dictSize = preserveDictSize + newDictSize; + dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + dctxPtr->dictSize; +} + + +static void LZ4F_pointDict(LZ4F_dctx_internal_t* dctxPtr, const BYTE* decoded, size_t decodedSize) +{ + /* large decoded block */ + if (decodedSize >= (64 KB - 1)) + { + dctxPtr->dict = (BYTE*)decoded; + dctxPtr->dictSize = decodedSize; + dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + 64 KB; + return; + } + + /* decoded block in the continuity of dictionary */ + if (dctxPtr->dict + dctxPtr->dictSize == decoded) + { + dctxPtr->dictSize += decodedSize; + if (dctxPtr->dict == dctxPtr->tmpOutBuffer) /* extended tmp buffer, don't go beyond 128 KB == maxDictSize */ + { + if (dctxPtr->dictSize > 128 KB) + { + memcpy(dctxPtr->tmpOutBuffer, dctxPtr->tmpOutBuffer + dctxPtr->dictSize - 64 KB, 64 KB); + dctxPtr->dictSize = 64 KB; + } + dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + dctxPtr->dictSize; + } + return; + } + + /* small block, and not contiguous : let's save that */ + LZ4F_saveDict(dctxPtr, decoded, decodedSize); } @@ -931,7 +963,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr; memcpy(dstPtr, srcPtr, sizeToCopy); if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), srcPtr, sizeToCopy); - if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_saveDict(dctxPtr, srcPtr, sizeToCopy); + if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_pointDict(dctxPtr, srcPtr, sizeToCopy); srcPtr += sizeToCopy; dstPtr += sizeToCopy; if (sizeToCopy == dctxPtr->tmpInTarget) /* all copied */ @@ -987,12 +1019,11 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, else decoder = LZ4F_decompress_safe; - if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize) /* not enough dst room : decode into tmpOut */ + if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize) /* not enough place into dst : decode into tmpOut */ { decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize); if (decodedSize < 0) return -ERROR_GENERIC; /* decompression failed */ if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dctxPtr->tmpOut, decodedSize); - if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_saveDict(dctxPtr, dctxPtr->tmpOut, decodedSize); dctxPtr->tmpOutSize = decodedSize; dctxPtr->tmpOutStart = 0; dctxPtr->dStage = dstage_flushOut; @@ -1001,7 +1032,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize); if (decodedSize < 0) return -ERROR_GENERIC; /* decompression failed */ if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize); - if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_saveDict(dctxPtr, dstPtr, decodedSize); + if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_pointDict(dctxPtr, dstPtr, decodedSize); dstPtr += decodedSize; dctxPtr->dStage = dstage_getCBlockSize; break; @@ -1016,6 +1047,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, dstPtr += sizeToCopy; if (dctxPtr->tmpOutStart == dctxPtr->tmpOutSize) { + if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_pointDict(dctxPtr, dctxPtr->tmpOut, dctxPtr->tmpOutSize); dctxPtr->dStage = dstage_getCBlockSize; break; } @@ -1077,6 +1109,8 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, } } + if ((dctxPtr->frameInfo.blockMode==blockLinked) && (dctxPtr->dict != dctxPtr->tmpOutBuffer)) + LZ4F_saveDict(dctxPtr, NULL, 0); if (srcPtrsrcExpect = srcPtr; diff --git a/programs/frametest.c b/programs/frametest.c index caa4956..8be7752 100644 --- a/programs/frametest.c +++ b/programs/frametest.c @@ -355,13 +355,13 @@ _output_error: } -static void locateBuffDiff(const void* buff1, const void* buff2) +static void locateBuffDiff(const void* buff1, const void* buff2, size_t size) { int p=0; BYTE* b1=(BYTE*)buff1; BYTE* b2=(BYTE*)buff2; while (b1[p]==b2[p]) p++; - printf("Error at pos %i : %02X != %02X \n", p, b1[p], b2[p]); + printf("Error at pos %i/%i : %02X != %02X \n", p, (int)size, b1[p], b2[p]); } @@ -473,13 +473,14 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi if (oSize > (size_t)(oend-op)) oSize = oend-op; oSize = oend-op; result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL); - if (result == (size_t)-ERROR_checksum_invalid) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer); + if (result == (size_t)-ERROR_checksum_invalid) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize); CHECK(LZ4F_isError(result), "Decompression failed (error %i)", (int)result); op += oSize; ip += iSize; } CHECK(result != 0, "Frame decompression failed (error %i)", (int)result); crcDecoded = XXH64(decodedBuffer, op-(BYTE*)decodedBuffer, 1); + if (crcDecoded != crcOrig) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize); CHECK(crcDecoded != crcOrig, "Decompression corruption"); } @@ -613,7 +614,7 @@ int main(int argc, char** argv) if (nbTests<=0) nbTests=1; - if (testNb==0) result = basicTests(seed, ((double)proba) / 100); + //if (testNb==0) result = basicTests(seed, ((double)proba) / 100); if (result) return 1; return fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); } diff --git a/programs/fullbench.c b/programs/fullbench.c index f87b857..9292f20 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -354,6 +354,15 @@ static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int in return outSize; } +extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const char* dict, int dictSize); + +static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_safe_forceExtDict(in, out, inSize, outSize, in - 65536, 65536); + return outSize; +} + static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize) { return LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize); @@ -380,7 +389,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) # define NB_COMPRESSION_ALGORITHMS 14 double totalCTime[NB_COMPRESSION_ALGORITHMS+1] = {0}; double totalCSize[NB_COMPRESSION_ALGORITHMS+1] = {0}; -# define NB_DECOMPRESSION_ALGORITHMS 8 +# define NB_DECOMPRESSION_ALGORITHMS 9 double totalDTime[NB_DECOMPRESSION_ALGORITHMS+1] = {0}; size_t errorCode; @@ -589,7 +598,8 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) case 5: decompressionFunction = LZ4_decompress_safe_withPrefix64k; dName = "LZ4_decompress_safe_withPrefix64k"; break; case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break; case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break; - case 8: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress"; + case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break; + case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress"; errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL); if (LZ4F_isError(errorCode)) { DISPLAY("Preparation error compressing frame\n"); return 1; } chunkP[0].origSize = benchedSize; -- cgit v0.12