summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <yann.collet.73@gmail.com>2014-09-13 18:49:01 (GMT)
committerYann Collet <yann.collet.73@gmail.com>2014-09-13 18:49:01 (GMT)
commit56c2b79ed015d4b154d4bd3a9cab27e7c613ba51 (patch)
treec2bf30ba320863f53e583ce4326b231dc331ecf7
parent38912f55e3c3b782529c2dd1e682d6af7c8bd052 (diff)
downloadlz4-56c2b79ed015d4b154d4bd3a9cab27e7c613ba51.zip
lz4-56c2b79ed015d4b154d4bd3a9cab27e7c613ba51.tar.gz
lz4-56c2b79ed015d4b154d4bd3a9cab27e7c613ba51.tar.bz2
Frame decompression speed optimization
-rw-r--r--lz4.c19
-rw-r--r--lz4frame.c64
-rw-r--r--programs/frametest.c9
-rw-r--r--programs/fullbench.c14
4 files changed, 83 insertions, 23 deletions
diff --git a/lz4.c b/lz4.c
index 4e2026e..b39a91d 100644
--- a/lz4.c
+++ b/lz4.c
@@ -1153,14 +1153,29 @@ Advanced decoding functions :
the dictionary must be explicitly provided within parameters
*/
+FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+{
+ if ((dictStart+dictSize == source) && (dictSize >= (int)(64 KB - 1)))
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, NULL, 64 KB);
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, dictStart, dictSize);
+}
+
int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
{
- return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
+ //return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
+ return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
}
int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
{
- return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize);
+ //return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize);
+ return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
+}
+
+/* debug function */
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
}
diff --git a/lz4frame.c b/lz4frame.c
index 0b54840..390af0e 100644
--- a/lz4frame.c
+++ b/lz4frame.c
@@ -407,7 +407,6 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d
int (*compress)(void*, const char*, char*, int, int);
-
if (cctxPtr->cStage != 1) return -ERROR_GENERIC;
if (dstMaxSize < LZ4F_compressBound(srcSize, &(cctxPtr->prefs))) return -ERROR_dstMaxSize_tooSmall;
if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
@@ -681,7 +680,6 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt
/* validate */
if (version != 1) return -ERROR_GENERIC; /* Version Number, only supported value */
- //if (blockMode != blockIndependent) return -ERROR_GENERIC; /* Only supported blockMode for the time being */
if (blockChecksumFlag != 0) return -ERROR_GENERIC; /* Only supported value for the time being */
if (contentSizeFlag != 0) return -ERROR_GENERIC; /* Only supported value for the time being */
if (((FLG>>1)&_1BIT) != 0) return -ERROR_GENERIC; /* Reserved bit */
@@ -700,7 +698,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt
if (contentChecksumFlag) XXH32_resetState(&(dctxPtr->xxh), 0);
/* alloc */
- bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==blockLinked) * 64 KB);
+ bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==blockLinked) * 128 KB);
if (bufferNeeded > dctxPtr->maxBufferSize) /* tmp buffers too small */
{
FREEMEM(dctxPtr->tmpIn);
@@ -711,10 +709,9 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPt
dctxPtr->tmpOutBuffer= ALLOCATOR(dctxPtr->maxBufferSize);
if (dctxPtr->tmpOutBuffer== NULL) return -ERROR_GENERIC;
}
- dctxPtr->tmpOut = dctxPtr->tmpOutBuffer;
- if (dctxPtr->frameInfo.blockMode==blockLinked) dctxPtr->tmpOut += 64 KB;
+ dctxPtr->dict = dctxPtr->tmpOutBuffer;
dctxPtr->dictSize = 0;
- dctxPtr->dict = dctxPtr->tmpOut;
+ dctxPtr->tmpOut = dctxPtr->tmpOutBuffer;
return 7;
}
@@ -765,11 +762,46 @@ static void LZ4F_saveDict(LZ4F_dctx_internal_t* dctxPtr, const BYTE* decoded, si
size_t preserveDictSize;
if (newDictSize > 64 KB) newDictSize = 64 KB;
preserveDictSize = 64 KB - newDictSize;
- memmove(dctxPtr->tmpOutBuffer, dctxPtr->tmpOutBuffer + newDictSize, preserveDictSize);
- memcpy(dctxPtr->tmpOutBuffer + preserveDictSize, decoded + decodedSize - newDictSize, newDictSize);
- dctxPtr->dictSize += newDictSize;
- if (dctxPtr->dictSize > 64 KB) dctxPtr->dictSize = 64 KB;
- dctxPtr->dict = dctxPtr->tmpOut - dctxPtr->dictSize;
+ if (preserveDictSize > dctxPtr->dictSize) preserveDictSize = dctxPtr->dictSize;
+
+ memmove(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveDictSize, preserveDictSize);
+ memmove(dctxPtr->tmpOutBuffer + preserveDictSize, decoded + decodedSize - newDictSize, newDictSize);
+
+ dctxPtr->dict = dctxPtr->tmpOutBuffer;
+ dctxPtr->dictSize = preserveDictSize + newDictSize;
+ dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + dctxPtr->dictSize;
+}
+
+
+static void LZ4F_pointDict(LZ4F_dctx_internal_t* dctxPtr, const BYTE* decoded, size_t decodedSize)
+{
+ /* large decoded block */
+ if (decodedSize >= (64 KB - 1))
+ {
+ dctxPtr->dict = (BYTE*)decoded;
+ dctxPtr->dictSize = decodedSize;
+ dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + 64 KB;
+ return;
+ }
+
+ /* decoded block in the continuity of dictionary */
+ if (dctxPtr->dict + dctxPtr->dictSize == decoded)
+ {
+ dctxPtr->dictSize += decodedSize;
+ if (dctxPtr->dict == dctxPtr->tmpOutBuffer) /* extended tmp buffer, don't go beyond 128 KB == maxDictSize */
+ {
+ if (dctxPtr->dictSize > 128 KB)
+ {
+ memcpy(dctxPtr->tmpOutBuffer, dctxPtr->tmpOutBuffer + dctxPtr->dictSize - 64 KB, 64 KB);
+ dctxPtr->dictSize = 64 KB;
+ }
+ dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + dctxPtr->dictSize;
+ }
+ return;
+ }
+
+ /* small block, and not contiguous : let's save that */
+ LZ4F_saveDict(dctxPtr, decoded, decodedSize);
}
@@ -931,7 +963,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr;
memcpy(dstPtr, srcPtr, sizeToCopy);
if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), srcPtr, sizeToCopy);
- if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_saveDict(dctxPtr, srcPtr, sizeToCopy);
+ if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_pointDict(dctxPtr, srcPtr, sizeToCopy);
srcPtr += sizeToCopy;
dstPtr += sizeToCopy;
if (sizeToCopy == dctxPtr->tmpInTarget) /* all copied */
@@ -987,12 +1019,11 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
else
decoder = LZ4F_decompress_safe;
- if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize) /* not enough dst room : decode into tmpOut */
+ if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize) /* not enough place into dst : decode into tmpOut */
{
decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
if (decodedSize < 0) return -ERROR_GENERIC; /* decompression failed */
if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dctxPtr->tmpOut, decodedSize);
- if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_saveDict(dctxPtr, dctxPtr->tmpOut, decodedSize);
dctxPtr->tmpOutSize = decodedSize;
dctxPtr->tmpOutStart = 0;
dctxPtr->dStage = dstage_flushOut;
@@ -1001,7 +1032,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
if (decodedSize < 0) return -ERROR_GENERIC; /* decompression failed */
if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize);
- if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_saveDict(dctxPtr, dstPtr, decodedSize);
+ if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_pointDict(dctxPtr, dstPtr, decodedSize);
dstPtr += decodedSize;
dctxPtr->dStage = dstage_getCBlockSize;
break;
@@ -1016,6 +1047,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
dstPtr += sizeToCopy;
if (dctxPtr->tmpOutStart == dctxPtr->tmpOutSize)
{
+ if (dctxPtr->frameInfo.blockMode==blockLinked) LZ4F_pointDict(dctxPtr, dctxPtr->tmpOut, dctxPtr->tmpOutSize);
dctxPtr->dStage = dstage_getCBlockSize;
break;
}
@@ -1077,6 +1109,8 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
}
}
+ if ((dctxPtr->frameInfo.blockMode==blockLinked) && (dctxPtr->dict != dctxPtr->tmpOutBuffer))
+ LZ4F_saveDict(dctxPtr, NULL, 0);
if (srcPtr<srcEnd) /* function must be called again with following source data */
dctxPtr->srcExpect = srcPtr;
diff --git a/programs/frametest.c b/programs/frametest.c
index caa4956..8be7752 100644
--- a/programs/frametest.c
+++ b/programs/frametest.c
@@ -355,13 +355,13 @@ _output_error:
}
-static void locateBuffDiff(const void* buff1, const void* buff2)
+static void locateBuffDiff(const void* buff1, const void* buff2, size_t size)
{
int p=0;
BYTE* b1=(BYTE*)buff1;
BYTE* b2=(BYTE*)buff2;
while (b1[p]==b2[p]) p++;
- printf("Error at pos %i : %02X != %02X \n", p, b1[p], b2[p]);
+ printf("Error at pos %i/%i : %02X != %02X \n", p, (int)size, b1[p], b2[p]);
}
@@ -473,13 +473,14 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi
if (oSize > (size_t)(oend-op)) oSize = oend-op;
oSize = oend-op;
result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL);
- if (result == (size_t)-ERROR_checksum_invalid) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer);
+ if (result == (size_t)-ERROR_checksum_invalid) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize);
CHECK(LZ4F_isError(result), "Decompression failed (error %i)", (int)result);
op += oSize;
ip += iSize;
}
CHECK(result != 0, "Frame decompression failed (error %i)", (int)result);
crcDecoded = XXH64(decodedBuffer, op-(BYTE*)decodedBuffer, 1);
+ if (crcDecoded != crcOrig) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize);
CHECK(crcDecoded != crcOrig, "Decompression corruption");
}
@@ -613,7 +614,7 @@ int main(int argc, char** argv)
if (nbTests<=0) nbTests=1;
- if (testNb==0) result = basicTests(seed, ((double)proba) / 100);
+ //if (testNb==0) result = basicTests(seed, ((double)proba) / 100);
if (result) return 1;
return fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
}
diff --git a/programs/fullbench.c b/programs/fullbench.c
index f87b857..9292f20 100644
--- a/programs/fullbench.c
+++ b/programs/fullbench.c
@@ -354,6 +354,15 @@ static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int in
return outSize;
}
+extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const char* dict, int dictSize);
+
+static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize)
+{
+ (void)inSize;
+ LZ4_decompress_safe_forceExtDict(in, out, inSize, outSize, in - 65536, 65536);
+ return outSize;
+}
+
static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize)
{
return LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize);
@@ -380,7 +389,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
# define NB_COMPRESSION_ALGORITHMS 14
double totalCTime[NB_COMPRESSION_ALGORITHMS+1] = {0};
double totalCSize[NB_COMPRESSION_ALGORITHMS+1] = {0};
-# define NB_DECOMPRESSION_ALGORITHMS 8
+# define NB_DECOMPRESSION_ALGORITHMS 9
double totalDTime[NB_DECOMPRESSION_ALGORITHMS+1] = {0};
size_t errorCode;
@@ -589,7 +598,8 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
case 5: decompressionFunction = LZ4_decompress_safe_withPrefix64k; dName = "LZ4_decompress_safe_withPrefix64k"; break;
case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break;
case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break;
- case 8: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress";
+ case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break;
+ case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress";
errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL);
if (LZ4F_isError(errorCode)) { DISPLAY("Preparation error compressing frame\n"); return 1; }
chunkP[0].origSize = benchedSize;