summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <cyan@fb.com>2018-05-02 23:05:42 (GMT)
committerYann Collet <cyan@fb.com>2018-05-02 23:05:42 (GMT)
commitc25eb1666654c378a6f9e74c9975181cb2767a2a (patch)
tree2acac6d8a5f25b2b06470c0ce6f2b247418411df
parent858d12e3e1ac072fb0cec188a55bdf1f106fb2da (diff)
downloadlz4-c25eb1666654c378a6f9e74c9975181cb2767a2a.zip
lz4-c25eb1666654c378a6f9e74c9975181cb2767a2a.tar.gz
lz4-c25eb1666654c378a6f9e74c9975181cb2767a2a.tar.bz2
random lz4f clarifications
the initial intention was to update lz4f ring buffer strategy, but lz4f doesn't use ring buffer. Instead, it uses the destination buffer as much as possible, and merely copies just what's required to preserve history into its own buffer, at the end. Pretty efficient. This patch just clarifies a few comments and add some assert(). It's built on top of #528. It also updates doc.
-rw-r--r--doc/lz4_manual.html54
-rw-r--r--lib/lz4frame.c76
2 files changed, 85 insertions, 45 deletions
diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index ddd2724..e079db2 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html
@@ -206,38 +206,59 @@ int LZ4_freeStream (LZ4_stream_t* streamPtr);
<pre><b>LZ4_streamDecode_t* LZ4_createStreamDecode(void);
int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
-</b><p> creation / destruction of streaming decompression tracking structure.
- A tracking structure can be re-used multiple times sequentially.
+</b><p> creation / destruction of streaming decompression tracking context.
+ A tracking context can be re-used multiple times.
+
</p></pre><BR>
<pre><b>int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
-</b><p> An LZ4_streamDecode_t structure can be allocated once and re-used multiple times.
+</b><p> An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
Use this function to start decompression of a new stream of blocks.
A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+ Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
@return : 1 if OK, 0 if error
</p></pre><BR>
+<pre><b>int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs)) </b>/* for static allocation; mbs presumed valid */<b>
+</b><p> Note : in a ring buffer scenario (optional),
+ blocks are presumed decompressed next to each other
+ up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ at which stage it resumes from beginning of ring buffer.
+ When setting such a ring buffer for streaming decompression,
+ provides the minimum size of this ring buffer
+ to be compatible with any source respecting maxBlockSize condition.
+ @return : minimum ring buffer size,
+ or 0 if there is an error (invalid maxBlockSize).
+
+</p></pre><BR>
+
<pre><b>int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
</b><p> These decoding functions allow decompression of consecutive blocks in "streaming" mode.
A block is an unsplittable entity, it must be presented entirely to a decompression function.
- Decompression functions only accept one block at a time.
+ Decompression functions only accepts one block at a time.
The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
- If less than 64KB of data has been decoded all the data must be present.
-
- Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
- - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
- In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
- - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- maxBlockSize is implementation dependent. It's the maximum size of any single block.
+ If less than 64KB of data has been decoded, all the data must be present.
+
+ Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ In which case, encoding and decoding buffers do not need to be synchronized.
+ Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ - Synchronized mode :
+ Decompression buffer size is _exactly_ the same as compression buffer size,
+ and follows exactly same update rule (block boundaries at same positions),
+ and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
In which case, encoding and decoding buffers do not need to be synchronized,
and encoding ring buffer can have any size, including small ones ( < 64 KB).
- - _At least_ 64 KB + 8 bytes + maxBlockSize.
- In which case, encoding and decoding buffers do not need to be synchronized,
- and encoding ring buffer can have any size, including larger than decoding buffer.
- Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
- and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
+
+ Whenever these conditions are not possible,
+ save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
</p></pre><BR>
<pre><b>int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
@@ -245,6 +266,7 @@ int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize,
</b><p> These decoding functions work the same as
a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
</p></pre><BR>
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index 6cf2a06..f57db24 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -96,6 +96,19 @@ You can contact the author at :
#define LZ4F_STATIC_ASSERT(c) { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+# include <stdio.h>
+static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
/*-************************************
* Basic Types
@@ -408,6 +421,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
LZ4_stream_t lz4ctx;
LZ4F_cctx_t *cctxPtr = &cctx;
+ DEBUGLOG(4, "LZ4F_compressFrame");
MEM_INIT(&cctx, 0, sizeof(cctx));
cctx.version = LZ4F_VERSION;
cctx.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
@@ -1198,24 +1212,31 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoP
/* LZ4F_updateDict() :
* only used for LZ4F_blockLinked mode */
-static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+ const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+ unsigned withinTmp)
{
if (dctx->dictSize==0)
dctx->dict = (const BYTE*)dstPtr; /* priority to dictionary continuity */
- if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity */
+ if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity, directly within dstBuffer */
dctx->dictSize += dstSize;
return;
}
- if (dstPtr - dstPtr0 + dstSize >= 64 KB) { /* dstBuffer large enough to become dictionary */
- dctx->dict = (const BYTE*)dstPtr0;
- dctx->dictSize = dstPtr - dstPtr0 + dstSize;
+ if (dstPtr - dstBufferStart + dstSize >= 64 KB) { /* history in dstBuffer becomes large enough to become dictionary */
+ dctx->dict = (const BYTE*)dstBufferStart;
+ dctx->dictSize = dstPtr - dstBufferStart + dstSize;
return;
}
- if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {
- /* assumption : dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart */
+ assert(dstSize < 64 KB); /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+ /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */
+
+ if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */
+ /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+ assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
dctx->dictSize += dstSize;
return;
}
@@ -1236,7 +1257,7 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize,
if (dctx->dict == dctx->tmpOutBuffer) { /* copy dst into tmp to complete dict */
if (dctx->dictSize + dstSize > dctx->maxBufferSize) { /* tmp buffer not large enough */
- size_t const preserveSize = 64 KB - dstSize; /* note : dstSize < 64 KB */
+ size_t const preserveSize = 64 KB - dstSize;
memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
dctx->dictSize = preserveSize;
}
@@ -1246,7 +1267,7 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize,
}
/* join dict & dest into tmp */
- { size_t preserveSize = 64 KB - dstSize; /* note : dstSize < 64 KB */
+ { size_t preserveSize = 64 KB - dstSize;
if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
@@ -1313,7 +1334,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
}
dctx->tmpInSize = 0;
if (srcEnd-srcPtr == 0) return minFHSize; /* 0-size input */
- dctx->tmpInTarget = minFHSize; /* minimum to attempt decode */
+ dctx->tmpInTarget = minFHSize; /* minimum size to decode header */
dctx->dStage = dstage_storeFrameHeader;
/* fall-through */
@@ -1470,8 +1491,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
if (readCRC != calcCRC)
return err0r(LZ4F_ERROR_blockChecksum_invalid);
- }
- }
+ } }
dctx->dStage = dstage_getBlockHeader; /* new block */
break;
@@ -1512,13 +1532,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
} }
if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
- const char *dict = (const char *)dctx->dict;
+ const char* dict = (const char*)dctx->dict;
size_t dictSize = dctx->dictSize;
int decodedSize;
if (dict && dictSize > 1 GB) {
/* the dictSize param is an int, avoid truncation / sign issues */
- dict += dictSize - 1 GB;
- dictSize = 1 GB;
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
}
/* enough capacity in `dst` to decompress directly there */
decodedSize = LZ4_decompress_safe_usingDict(
@@ -1552,18 +1572,16 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
} else { /* dict not within tmp */
size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
- }
- }
+ } }
/* Decode block */
- {
- const char *dict = (const char *)dctx->dict;
+ { const char* dict = (const char*)dctx->dict;
size_t dictSize = dctx->dictSize;
int decodedSize;
if (dict && dictSize > 1 GB) {
/* the dictSize param is an int, avoid truncation / sign issues */
- dict += dictSize - 1 GB;
- dictSize = 1 GB;
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
}
decodedSize = LZ4_decompress_safe_usingDict(
(const char*)selectedIn, (char*)dctx->tmpOut,
@@ -1586,8 +1604,8 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
/* dictionary management */
- if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
- LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1);
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+ LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
dctx->tmpOutStart += sizeToCopy;
dstPtr += sizeToCopy;
@@ -1596,8 +1614,9 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
dctx->dStage = dstage_getBlockHeader; /* get next block */
break;
}
+ /* could not flush everything : stop there, just request a block header */
+ doAnotherStage = 0;
nextSrcSizeHint = BHSize;
- doAnotherStage = 0; /* still some data to flush */
break;
}
@@ -1634,7 +1653,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
selectedIn = dctx->tmpIn;
} /* if (dctx->dStage == dstage_storeSuffix) */
- /* case dstage_checkSuffix: */ /* no direct call, avoid scan-build warning */
+ /* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */
{ U32 const readCRC = LZ4F_readLE32(selectedIn);
U32 const resultCRC = XXH32_digest(&(dctx->xxh));
if (readCRC != resultCRC)
@@ -1658,8 +1677,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
if (dctx->dStage == dstage_storeSFrameSize)
case dstage_storeSFrameSize:
- {
- size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+ { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
(size_t)(srcEnd - srcPtr) );
memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
srcPtr += sizeToCopy;
@@ -1673,7 +1691,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
selectedIn = dctx->header + 4;
} /* if (dctx->dStage == dstage_storeSFrameSize) */
- /* case dstage_decodeSFrameSize: */ /* no direct access */
+ /* case dstage_decodeSFrameSize: */ /* no direct entry */
{ size_t const SFrameSize = LZ4F_readLE32(selectedIn);
dctx->frameInfo.contentSize = SFrameSize;
dctx->tmpInTarget = SFrameSize;
@@ -1692,7 +1710,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
LZ4F_resetDecompressionContext(dctx);
break;
}
- }
+ } /* switch (dctx->dStage) */
} /* while (doAnotherStage) */
/* preserve history within tmp whenever necessary */