summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <Cyan4973@users.noreply.github.com>2018-05-03 19:55:04 (GMT)
committerGitHub <noreply@github.com>2018-05-03 19:55:04 (GMT)
commit2b6c4f3d6367bf3f8bc38358109ab0a1b1d7340b (patch)
tree69ae53c32b8decdc9e41d75922877fc30bc15664
parent95607a749b8bbe6f9323408ddd740ef4ff248794 (diff)
parentffbff1f3602d53a82f1b80b3669fe24219bde544 (diff)
downloadlz4-2b6c4f3d6367bf3f8bc38358109ab0a1b1d7340b.zip
lz4-2b6c4f3d6367bf3f8bc38358109ab0a1b1d7340b.tar.gz
lz4-2b6c4f3d6367bf3f8bc38358109ab0a1b1d7340b.tar.bz2
Merge pull request #530 from lz4/lz4fRingBuffer
Random lz4f clarifications
-rw-r--r--NEWS4
-rw-r--r--README.md33
-rw-r--r--doc/lz4_manual.html54
-rw-r--r--lib/lz4frame.c76
4 files changed, 103 insertions, 64 deletions
diff --git a/NEWS b/NEWS
index 90cafc6..0139e61 100644
--- a/NEWS
+++ b/NEWS
@@ -1,13 +1,13 @@
v1.8.2
perf: *much* faster dictionary compression on small files, by @felixhandte
+perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv)
perf: slightly faster HC compression and decompression speed
perf: very small compression ratio improvement
-perf: improved decompression binary size and speed, by Alexey Tourbin (@svpv)
fix : compression compatible with low memory addresses (< 0xFFFF)
fix : decompression segfault when provided with NULL input, by @terrelln
cli : new command --favor-decSpeed
cli : benchmark mode more accurate for small inputs
-fullbench : can measure _destSize() variants, by @felixhandte
+fullbench : can bench _destSize() variants, by @felixhandte
doc : clarified block format parsing restrictions, by Alexey Tourbin (@svpv)
v1.8.1
diff --git a/README.md b/README.md
index 596fdac..406792a 100644
--- a/README.md
+++ b/README.md
@@ -43,33 +43,32 @@ Benchmarks
-------------------------
The benchmark uses [lzbench], from @inikep
-compiled with GCC v6.2.0 on Linux 64-bits.
-The reference system uses a Core i7-3930K CPU @ 4.5GHz.
+compiled with GCC v7.3.0 on Linux 64-bits (Debian 4.15.17-1).
+The reference system uses a Core i7-6700K CPU @ 4.0GHz.
Benchmark evaluates the compression of reference [Silesia Corpus]
in single-thread mode.
[lzbench]: https://github.com/inikep/lzbench
[Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
-| Compressor | Ratio | Compression | Decompression |
-| ---------- | ----- | ----------- | ------------- |
-| memcpy | 1.000 | 7300 MB/s | 7300 MB/s |
-|**LZ4 fast 8 (v1.7.3)**| 1.799 |**911 MB/s** | **3360 MB/s** |
-|**LZ4 default (v1.7.3)**|**2.101**|**625 MB/s** | **3220 MB/s** |
-| LZO 2.09 | 2.108 | 620 MB/s | 845 MB/s |
-| QuickLZ 1.5.0 | 2.238 | 510 MB/s | 600 MB/s |
-| Snappy 1.1.3 | 2.091 | 450 MB/s | 1550 MB/s |
-| LZF v3.6 | 2.073 | 365 MB/s | 820 MB/s |
-| [Zstandard] 1.1.1 -1 | 2.876 | 330 MB/s | 930 MB/s |
-| [Zstandard] 1.1.1 -3 | 3.164 | 200 MB/s | 810 MB/s |
-| [zlib] deflate 1.2.8 -1| 2.730 | 100 MB/s | 370 MB/s |
-|**LZ4 HC -9 (v1.7.3)** |**2.720**| 34 MB/s | **3240 MB/s** |
-| [zlib] deflate 1.2.8 -6| 3.099 | 33 MB/s | 390 MB/s |
+| Compressor | Ratio | Compression | Decompression |
+| ---------- | ----- | ----------- | ------------- |
+| memcpy | 1.000 |13100 MB/s | 13100 MB/s |
+|**LZ4 default (v1.8.2)** |**2.101**|**730 MB/s** | **3900 MB/s** |
+| LZO 2.09 | 2.108 | 630 MB/s | 800 MB/s |
+| QuickLZ 1.5.0 | 2.238 | 530 MB/s | 720 MB/s |
+| Snappy 1.1.4 | 2.091 | 525 MB/s | 1750 MB/s |
+| [Zstandard] 1.3.4 -1 | 2.877 | 470 MB/s | 1380 MB/s |
+| LZF v3.6 | 2.073 | 380 MB/s | 840 MB/s |
+| [zlib] deflate 1.2.11 -1| 2.730 | 100 MB/s | 380 MB/s |
+|**LZ4 HC -9 (v1.8.2)** |**2.721**| 40 MB/s | **3920 MB/s** |
+| [zlib] deflate 1.2.11 -6| 3.099 | 34 MB/s | 410 MB/s |
[zlib]: http://www.zlib.net/
[Zstandard]: http://www.zstd.net/
-LZ4 is also compatible and well optimized for x32 mode, for which it provides an additional +10% speed performance.
+LZ4 is also compatible and well optimized for x32 mode,
+for which it provides some additional speed performance.
Installation
diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index ddd2724..e079db2 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html
@@ -206,38 +206,59 @@ int LZ4_freeStream (LZ4_stream_t* streamPtr);
<pre><b>LZ4_streamDecode_t* LZ4_createStreamDecode(void);
int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
-</b><p> creation / destruction of streaming decompression tracking structure.
- A tracking structure can be re-used multiple times sequentially.
+</b><p> creation / destruction of streaming decompression tracking context.
+ A tracking context can be re-used multiple times.
+
</p></pre><BR>
<pre><b>int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
-</b><p> An LZ4_streamDecode_t structure can be allocated once and re-used multiple times.
+</b><p> An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
Use this function to start decompression of a new stream of blocks.
A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+ Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
@return : 1 if OK, 0 if error
</p></pre><BR>
+<pre><b>int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs)) </b>/* for static allocation; mbs presumed valid */<b>
+</b><p> Note : in a ring buffer scenario (optional),
+ blocks are presumed decompressed next to each other
+ up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ at which stage it resumes from beginning of ring buffer.
+ When setting such a ring buffer for streaming decompression,
+ provides the minimum size of this ring buffer
+ to be compatible with any source respecting maxBlockSize condition.
+ @return : minimum ring buffer size,
+ or 0 if there is an error (invalid maxBlockSize).
+
+</p></pre><BR>
+
<pre><b>int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
</b><p> These decoding functions allow decompression of consecutive blocks in "streaming" mode.
A block is an unsplittable entity, it must be presented entirely to a decompression function.
- Decompression functions only accept one block at a time.
+ Decompression functions only accepts one block at a time.
The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
- If less than 64KB of data has been decoded all the data must be present.
-
- Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
- - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
- In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
- - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- maxBlockSize is implementation dependent. It's the maximum size of any single block.
+ If less than 64KB of data has been decoded, all the data must be present.
+
+ Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ In which case, encoding and decoding buffers do not need to be synchronized.
+ Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ - Synchronized mode :
+ Decompression buffer size is _exactly_ the same as compression buffer size,
+ and follows exactly same update rule (block boundaries at same positions),
+ and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
In which case, encoding and decoding buffers do not need to be synchronized,
and encoding ring buffer can have any size, including small ones ( < 64 KB).
- - _At least_ 64 KB + 8 bytes + maxBlockSize.
- In which case, encoding and decoding buffers do not need to be synchronized,
- and encoding ring buffer can have any size, including larger than decoding buffer.
- Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
- and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
+
+ Whenever these conditions are not possible,
+ save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
</p></pre><BR>
<pre><b>int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
@@ -245,6 +266,7 @@ int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize,
</b><p> These decoding functions work the same as
a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
</p></pre><BR>
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index 6cf2a06..f57db24 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -96,6 +96,19 @@ You can contact the author at :
#define LZ4F_STATIC_ASSERT(c) { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+# include <stdio.h>
+static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
/*-************************************
* Basic Types
@@ -408,6 +421,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
LZ4_stream_t lz4ctx;
LZ4F_cctx_t *cctxPtr = &cctx;
+ DEBUGLOG(4, "LZ4F_compressFrame");
MEM_INIT(&cctx, 0, sizeof(cctx));
cctx.version = LZ4F_VERSION;
cctx.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
@@ -1198,24 +1212,31 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoP
/* LZ4F_updateDict() :
* only used for LZ4F_blockLinked mode */
-static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+ const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+ unsigned withinTmp)
{
if (dctx->dictSize==0)
dctx->dict = (const BYTE*)dstPtr; /* priority to dictionary continuity */
- if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity */
+ if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity, directly within dstBuffer */
dctx->dictSize += dstSize;
return;
}
- if (dstPtr - dstPtr0 + dstSize >= 64 KB) { /* dstBuffer large enough to become dictionary */
- dctx->dict = (const BYTE*)dstPtr0;
- dctx->dictSize = dstPtr - dstPtr0 + dstSize;
+ if (dstPtr - dstBufferStart + dstSize >= 64 KB) { /* history in dstBuffer becomes large enough to become dictionary */
+ dctx->dict = (const BYTE*)dstBufferStart;
+ dctx->dictSize = dstPtr - dstBufferStart + dstSize;
return;
}
- if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {
- /* assumption : dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart */
+ assert(dstSize < 64 KB); /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+ /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */
+
+ if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */
+ /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+ assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
dctx->dictSize += dstSize;
return;
}
@@ -1236,7 +1257,7 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize,
if (dctx->dict == dctx->tmpOutBuffer) { /* copy dst into tmp to complete dict */
if (dctx->dictSize + dstSize > dctx->maxBufferSize) { /* tmp buffer not large enough */
- size_t const preserveSize = 64 KB - dstSize; /* note : dstSize < 64 KB */
+ size_t const preserveSize = 64 KB - dstSize;
memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
dctx->dictSize = preserveSize;
}
@@ -1246,7 +1267,7 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize,
}
/* join dict & dest into tmp */
- { size_t preserveSize = 64 KB - dstSize; /* note : dstSize < 64 KB */
+ { size_t preserveSize = 64 KB - dstSize;
if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
@@ -1313,7 +1334,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
}
dctx->tmpInSize = 0;
if (srcEnd-srcPtr == 0) return minFHSize; /* 0-size input */
- dctx->tmpInTarget = minFHSize; /* minimum to attempt decode */
+ dctx->tmpInTarget = minFHSize; /* minimum size to decode header */
dctx->dStage = dstage_storeFrameHeader;
/* fall-through */
@@ -1470,8 +1491,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
if (readCRC != calcCRC)
return err0r(LZ4F_ERROR_blockChecksum_invalid);
- }
- }
+ } }
dctx->dStage = dstage_getBlockHeader; /* new block */
break;
@@ -1512,13 +1532,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
} }
if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
- const char *dict = (const char *)dctx->dict;
+ const char* dict = (const char*)dctx->dict;
size_t dictSize = dctx->dictSize;
int decodedSize;
if (dict && dictSize > 1 GB) {
/* the dictSize param is an int, avoid truncation / sign issues */
- dict += dictSize - 1 GB;
- dictSize = 1 GB;
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
}
/* enough capacity in `dst` to decompress directly there */
decodedSize = LZ4_decompress_safe_usingDict(
@@ -1552,18 +1572,16 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
} else { /* dict not within tmp */
size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
- }
- }
+ } }
/* Decode block */
- {
- const char *dict = (const char *)dctx->dict;
+ { const char* dict = (const char*)dctx->dict;
size_t dictSize = dctx->dictSize;
int decodedSize;
if (dict && dictSize > 1 GB) {
/* the dictSize param is an int, avoid truncation / sign issues */
- dict += dictSize - 1 GB;
- dictSize = 1 GB;
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
}
decodedSize = LZ4_decompress_safe_usingDict(
(const char*)selectedIn, (char*)dctx->tmpOut,
@@ -1586,8 +1604,8 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
/* dictionary management */
- if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
- LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1);
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+ LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
dctx->tmpOutStart += sizeToCopy;
dstPtr += sizeToCopy;
@@ -1596,8 +1614,9 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
dctx->dStage = dstage_getBlockHeader; /* get next block */
break;
}
+ /* could not flush everything : stop there, just request a block header */
+ doAnotherStage = 0;
nextSrcSizeHint = BHSize;
- doAnotherStage = 0; /* still some data to flush */
break;
}
@@ -1634,7 +1653,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
selectedIn = dctx->tmpIn;
} /* if (dctx->dStage == dstage_storeSuffix) */
- /* case dstage_checkSuffix: */ /* no direct call, avoid scan-build warning */
+ /* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */
{ U32 const readCRC = LZ4F_readLE32(selectedIn);
U32 const resultCRC = XXH32_digest(&(dctx->xxh));
if (readCRC != resultCRC)
@@ -1658,8 +1677,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
if (dctx->dStage == dstage_storeSFrameSize)
case dstage_storeSFrameSize:
- {
- size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+ { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
(size_t)(srcEnd - srcPtr) );
memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
srcPtr += sizeToCopy;
@@ -1673,7 +1691,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
selectedIn = dctx->header + 4;
} /* if (dctx->dStage == dstage_storeSFrameSize) */
- /* case dstage_decodeSFrameSize: */ /* no direct access */
+ /* case dstage_decodeSFrameSize: */ /* no direct entry */
{ size_t const SFrameSize = LZ4F_readLE32(selectedIn);
dctx->frameInfo.contentSize = SFrameSize;
dctx->tmpInTarget = SFrameSize;
@@ -1692,7 +1710,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
LZ4F_resetDecompressionContext(dctx);
break;
}
- }
+ } /* switch (dctx->dStage) */
} /* while (doAnotherStage) */
/* preserve history within tmp whenever necessary */