From b1d022fa72b75af8fb373e26ac8d2f0f14a9e6fe Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 11 Sep 2014 22:27:14 +0100 Subject: slightly improved frame compression speed --- lz4frame.c | 24 +++++++++++------------- programs/frametest.c | 30 ++++++++++++++++-------------- programs/fullbench.c | 2 +- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/lz4frame.c b/lz4frame.c index 55da791..728e070 100644 --- a/lz4frame.c +++ b/lz4frame.c @@ -318,6 +318,7 @@ size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* ds BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; BYTE* headerStart; + size_t requiredBuffSize; if (dstMaxSize < LZ4F_MAXHEADERFRAME_SIZE) return -ERROR_dstMaxSize_tooSmall; if (cctxPtr->cStage != 0) return -ERROR_GENERIC; @@ -327,15 +328,15 @@ size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* ds cctxPtr->prefs = *preferencesPtr; if (cctxPtr->prefs.frameInfo.blockSizeID == 0) cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT; cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID); - if (cctxPtr->maxBufferSize < cctxPtr->maxBlockSize + (cctxPtr->prefs.frameInfo.blockMode == blockLinked)) + requiredBuffSize = cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == blockLinked) * 128 KB); + if (cctxPtr->maxBufferSize < requiredBuffSize) { - cctxPtr->maxBufferSize = cctxPtr->maxBlockSize; - if (cctxPtr->prefs.frameInfo.blockMode == blockLinked) cctxPtr->maxBufferSize += 128 KB; + cctxPtr->maxBufferSize = requiredBuffSize; FREEMEM(cctxPtr->tmpBuff); cctxPtr->tmpBuff = ALLOCATOR(cctxPtr->maxBufferSize); if (cctxPtr->tmpBuff == NULL) return -ERROR_allocation_failed; - cctxPtr->tmpIn = cctxPtr->tmpBuff; } + cctxPtr->tmpIn = cctxPtr->tmpBuff; cctxPtr->tmpInSize = 0; XXH32_resetState(&(cctxPtr->xxh), 0); LZ4_resetStream(&(cctxPtr->lz4ctx)); @@ -444,6 +445,7 @@ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuff memcpy(dstPtr, cctxPtr->tmpIn, blockSize); dstPtr += blockSize; } + if (cctxPtr->prefs.frameInfo.blockMode==blockLinked) cctxPtr->tmpIn += blockSize; cctxPtr->tmpInSize = 0; } } @@ -471,13 +473,8 @@ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuff if ((cctxPtr->prefs.frameInfo.blockMode == blockLinked) && (lastBlockCompressed)) { /* last compressed input up to 64 KB become dictionary */ - if (0 && (lastBlockCompressed==1) && - (cctxPtr->tmpBuff + cctxPtr->maxBufferSize > cctxPtr->tmpIn + cctxPtr->tmpInSize + cctxPtr->maxBlockSize)) - { - /* in theory, no need to "save", everything is properly stacked and tracked, so where is the problem ? */ - cctxPtr->tmpIn += cctxPtr->tmpInSize; - } - else + if ((lastBlockCompressed==2) || + ((cctxPtr->tmpBuff + cctxPtr->maxBufferSize) < (cctxPtr->tmpIn + cctxPtr->maxBlockSize))) { int result; result = LZ4_saveDict (&(cctxPtr->lz4ctx), (char*)(cctxPtr->tmpBuff), 64 KB); @@ -486,7 +483,7 @@ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuff } } - if (srcPtr < srcEnd) /* some input data left */ + if (srcPtr < srcEnd) /* some input data left, necessarily < blockSize */ { /* fill tmp buffer */ size_t sizeToCopy = srcEnd - srcPtr; @@ -542,11 +539,12 @@ size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, memcpy(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize); dstPtr += cctxPtr->tmpInSize; } + if (cctxPtr->prefs.frameInfo.blockMode==blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize; cctxPtr->tmpInSize = 0; } if ((cctxPtr->prefs.frameInfo.blockMode == blockLinked) - )//&& (cctxPtr->maxBufferSize < (cctxPtr->tmpIn - cctxPtr->tmpDict) + cctxPtr->tmpInSize + cctxPtr->maxBlockSize )) + && ((cctxPtr->tmpBuff + cctxPtr->maxBufferSize) < (cctxPtr->tmpIn + cctxPtr->maxBlockSize))) { /* last 64 KB of input become dictionary */ int result = LZ4_saveDict (&(cctxPtr->lz4ctx), (char*)(cctxPtr->tmpBuff), 64 KB); diff --git a/programs/frametest.c b/programs/frametest.c index 9c332d3..c70cc2f 100644 --- a/programs/frametest.c +++ b/programs/frametest.c @@ -191,7 +191,7 @@ static unsigned FUZ_highbit(U32 v32) } -int basicTests(U32 seed, int nbCycles, int startCycle, double compressibility) +int basicTests(U32 seed, double compressibility) { int testResult = 0; void* CNBuffer; @@ -203,7 +203,6 @@ int basicTests(U32 seed, int nbCycles, int startCycle, double compressibility) LZ4F_decompressionContext_t dCtx; U64 crcOrig; - (void)nbCycles; (void)startCycle; // Create compressible test buffer CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); compressedBuffer = malloc(LZ4F_compressFrameBound(COMPRESSIBLE_NOISE_LENGTH, NULL)); @@ -356,6 +355,16 @@ _output_error: } +static void locateBuffDiff(const void* buff1, const void* buff2) +{ + int p=0; + BYTE* b1=(BYTE*)buff1; + BYTE* b2=(BYTE*)buff2; + while (b1[p]==b2[p]) p++; + printf("Error at pos %i : %02X != %02X \n", p, b1[p], b2[p]); + } + + static const U32 srcDataLength = 9 MB; /* needs to be > 2x4MB to test large blocks */ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressibility) @@ -403,10 +412,10 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi U64 crcOrig, crcDecoded; DISPLAYUPDATE(2, "\r%5i ", testNb); - crcOrig = XXH64(srcBuffer+srcStart, srcSize, 1); + crcOrig = XXH64((BYTE*)srcBuffer+srcStart, srcSize, 1); { - const BYTE* ip = srcBuffer + srcStart; + const BYTE* ip = (const BYTE*)srcBuffer + srcStart; const BYTE* const iend = ip + srcSize; BYTE* op = compressedBuffer; BYTE* const oend = op + LZ4F_compressFrameBound(srcDataLength, NULL); @@ -454,14 +463,7 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi if (oSize > (size_t)(oend-op)) oSize = oend-op; oSize = oend-op; result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL); - if (result == (size_t)-ERROR_checksum_invalid) - { - int p=0; - BYTE* b1=(BYTE*)srcBuffer+srcStart; - BYTE* b2=(BYTE*)decodedBuffer; - while (b1[p]==b2[p]) p++; - printf("Error at pos %i : %02X != %02X \n", p, b1[p], b2[p]); - } + if (result == (size_t)-ERROR_checksum_invalid) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer); CHECK(LZ4F_isError(result), "Decompression failed (error %i)", (int)result); op += oSize; ip += iSize; @@ -514,7 +516,7 @@ int main(int argc, char** argv) int nbTests = nbTestsDefault; int testNb = 0; int proba = FUZ_COMPRESSIBILITY_DEFAULT; - int result; + int result=0; // Check command line programName = argv[0]; @@ -601,7 +603,7 @@ int main(int argc, char** argv) if (nbTests<=0) nbTests=1; - result = basicTests(seed, nbTests, testNb, ((double)proba) / 100); + if (testNb==0) result = basicTests(seed, ((double)proba) / 100); if (result) return 1; return fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); } diff --git a/programs/fullbench.c b/programs/fullbench.c index b6a1c02..96120e3 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -323,7 +323,7 @@ static int local_LZ4_compressHC_limitedOutput_continue(const char* in, char* out static int local_LZ4F_compressFrame(const char* in, char* out, int inSize) { - return LZ4F_compressFrame(out, 2*inSize, in, inSize, NULL); + return LZ4F_compressFrame(out, 2*inSize + (4<<20), in, inSize, NULL); } static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int outSize) -- cgit v0.12