summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/lz4.c101
-rw-r--r--lib/lz4.h54
-rw-r--r--lib/lz4frame.c76
-rw-r--r--lib/lz4hc.c120
4 files changed, 215 insertions, 136 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 1e0d8e9..3860c51 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1398,6 +1398,9 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
const int safeDecode = (endOnInput==endOnInputSize);
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+ /* Set up the "end" pointers for the shortcut. */
+ const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+ const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
/* Special cases */
if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */
@@ -1407,39 +1410,56 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
/* Main Loop : decode sequences */
while (1) {
- size_t length;
const BYTE* match;
size_t offset;
unsigned const token = *ip++;
+ size_t length = token >> ML_BITS; /* literal length */
assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
- /* shortcut for common case :
- * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes).
- * this shortcut was tested on x86 and x64, where it improves decoding speed.
- * it has not yet been benchmarked on ARM, Power, mips, etc.
- * NOTE: The loop begins with a read, so we must have one byte left at the end. */
- if (endOnInput
- && ((ip + 14 /*maxLL*/ + 2 /*offset*/ < iend)
- & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend)
- & (token < (15<<ML_BITS))
- & ((token & ML_MASK) != 15) ) ) {
- size_t const ll = token >> ML_BITS;
- size_t const off = LZ4_readLE16(ip+ll);
- const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */
- if ((off >= 8) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) {
- size_t const ml = (token & ML_MASK) + MINMATCH;
- memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/;
- memcpy(op + 0, matchPtr + 0, 8);
- memcpy(op + 8, matchPtr + 8, 8);
- memcpy(op +16, matchPtr +16, 2);
- op += ml;
+
+ /* A two-stage shortcut for the most common case:
+ * 1) If the literal length is 0..14, and there is enough space,
+ * enter the shortcut and copy 16 bytes on behalf of the literals
+ * (in the fast mode, only 8 bytes can be safely copied this way).
+ * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+ * manner; but we ensure that there's enough space in the output for
+ * those 18 bytes earlier, upon entering the shortcut (in other words,
+ * there is a combined check for both stages).
+ */
+ if ( (endOnInput ? length != RUN_MASK : length <= 8)
+ /* strictly "less than" on input, to re-enter the loop with at least one byte */
+ && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+ /* Copy the literals */
+ memcpy(op, ip, endOnInput ? 16 : 8);
+ op += length; ip += length;
+
+ /* The second stage: prepare for match copying, decode full info.
+ * If it doesn't work out, the info won't be wasted. */
+ length = token & ML_MASK; /* match length */
+ offset = LZ4_readLE16(ip); ip += 2;
+ match = op - offset;
+
+ /* Do not deal with overlapping matches. */
+ if ( (length != ML_MASK)
+ && (offset >= 8)
+ && (dict==withPrefix64k || match >= lowPrefix) ) {
+ /* Copy the match. */
+ memcpy(op + 0, match + 0, 8);
+ memcpy(op + 8, match + 8, 8);
+ memcpy(op +16, match +16, 2);
+ op += length + MINMATCH;
+ /* Both stages worked, load the next token. */
continue;
}
+
+ /* The second stage didn't work out, but the info is ready.
+ * Propel it right to the point of match copying. */
+ goto _copy_match;
}
/* decode literal length */
- if ((length=(token>>ML_BITS)) == RUN_MASK) {
+ if (length == RUN_MASK) {
unsigned s;
if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error; /* overflow detection */
do {
@@ -1473,11 +1493,14 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
/* get offset */
offset = LZ4_readLE16(ip); ip+=2;
match = op - offset;
- if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
- LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */
/* get matchlength */
length = token & ML_MASK;
+
+_copy_match:
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
+ LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */
+
if (length == ML_MASK) {
unsigned s;
do {
@@ -1664,12 +1687,11 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
return 0;
}
-/*!
- * LZ4_setStreamDecode() :
- * Use this function to instruct where to find the dictionary.
- * This function is not necessary if previous data is still available where it was decoded.
- * Loading a size of 0 is allowed (same effect as no dictionary).
- * Return : 1 if OK, 0 if error
+/*! LZ4_setStreamDecode() :
+ * Use this function to instruct where to find the dictionary.
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
*/
int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
{
@@ -1681,6 +1703,25 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
return 1;
}
+/*! LZ4_decoderRingBufferSize() :
+ * when setting a ring buffer for streaming decompression (optional scenario),
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * Note : in a ring buffer scenario,
+ * blocks are presumed decompressed next to each other.
+ * When not enough space remains for next block (remainingSize < maxBlockSize),
+ * decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+ if (maxBlockSize < 0) return 0;
+ if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+ if (maxBlockSize < 16) maxBlockSize = 16;
+ return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
/*
*_continue() :
These decoding functions allow decompression of multiple blocks in "streaming" mode.
diff --git a/lib/lz4.h b/lib/lz4.h
index d8238f5..7d13122 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -293,41 +293,62 @@ LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxD
* Streaming Decompression Functions
* Bufferless synchronous API
************************************************/
-typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* incomplete type (defined later) */
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
- * creation / destruction of streaming decompression tracking structure.
- * A tracking structure can be re-used multiple times sequentially. */
+ * creation / destruction of streaming decompression tracking context.
+ * A tracking context can be re-used multiple times.
+ */
LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
/*! LZ4_setStreamDecode() :
- * An LZ4_streamDecode_t structure can be allocated once and re-used multiple times.
+ * An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
* Use this function to start decompression of a new stream of blocks.
* A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
* @return : 1 if OK, 0 if error
*/
LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+/*! LZ4_decoderRingBufferSize() : v1.8.2
+ * Note : in a ring buffer scenario (optional),
+ * blocks are presumed decompressed next to each other
+ * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ * at which stage it resumes from beginning of ring buffer.
+ * When setting such a ring buffer for streaming decompression,
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs)) /* for static allocation; mbs presumed valid */
+
/*! LZ4_decompress_*_continue() :
* These decoding functions allow decompression of consecutive blocks in "streaming" mode.
* A block is an unsplittable entity, it must be presented entirely to a decompression function.
- * Decompression functions only accept one block at a time.
+ * Decompression functions only accepts one block at a time.
* The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
- * If less than 64KB of data has been decoded all the data must be present.
+ * If less than 64KB of data has been decoded, all the data must be present.
*
- * Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
- * - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
- * In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
- * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- * maxBlockSize is implementation dependent. It's the maximum size of any single block.
+ * Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ * In which case, encoding and decoding buffers do not need to be synchronized.
+ * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ * - Synchronized mode :
+ * Decompression buffer size is _exactly_ the same as compression buffer size,
+ * and follows exactly same update rule (block boundaries at same positions),
+ * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
* In which case, encoding and decoding buffers do not need to be synchronized,
* and encoding ring buffer can have any size, including small ones ( < 64 KB).
- * - _At least_ 64 KB + 8 bytes + maxBlockSize.
- * In which case, encoding and decoding buffers do not need to be synchronized,
- * and encoding ring buffer can have any size, including larger than decoding buffer.
- * Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
- * and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
+ *
+ * Whenever these conditions are not possible,
+ * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
*/
LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
@@ -337,6 +358,7 @@ LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecod
* These decoding functions work the same as
* a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
* They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
*/
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index 91e5a43..aa7889b 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -96,6 +96,19 @@ You can contact the author at :
#define LZ4F_STATIC_ASSERT(c) { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+# include <stdio.h>
+static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
/*-************************************
* Basic Types
@@ -408,6 +421,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
LZ4_stream_t lz4ctx;
LZ4F_cctx_t *cctxPtr = &cctx;
+ DEBUGLOG(4, "LZ4F_compressFrame");
MEM_INIT(&cctx, 0, sizeof(cctx));
cctx.version = LZ4F_VERSION;
cctx.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
@@ -1207,24 +1221,31 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoP
/* LZ4F_updateDict() :
* only used for LZ4F_blockLinked mode */
-static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+ const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+ unsigned withinTmp)
{
if (dctx->dictSize==0)
dctx->dict = (const BYTE*)dstPtr; /* priority to dictionary continuity */
- if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity */
+ if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity, directly within dstBuffer */
dctx->dictSize += dstSize;
return;
}
- if (dstPtr - dstPtr0 + dstSize >= 64 KB) { /* dstBuffer large enough to become dictionary */
- dctx->dict = (const BYTE*)dstPtr0;
- dctx->dictSize = dstPtr - dstPtr0 + dstSize;
+ if (dstPtr - dstBufferStart + dstSize >= 64 KB) { /* history in dstBuffer becomes large enough to become dictionary */
+ dctx->dict = (const BYTE*)dstBufferStart;
+ dctx->dictSize = dstPtr - dstBufferStart + dstSize;
return;
}
- if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {
- /* assumption : dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart */
+ assert(dstSize < 64 KB); /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+ /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */
+
+ if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */
+ /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+ assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
dctx->dictSize += dstSize;
return;
}
@@ -1245,7 +1266,7 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize,
if (dctx->dict == dctx->tmpOutBuffer) { /* copy dst into tmp to complete dict */
if (dctx->dictSize + dstSize > dctx->maxBufferSize) { /* tmp buffer not large enough */
- size_t const preserveSize = 64 KB - dstSize; /* note : dstSize < 64 KB */
+ size_t const preserveSize = 64 KB - dstSize;
memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
dctx->dictSize = preserveSize;
}
@@ -1255,7 +1276,7 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize,
}
/* join dict & dest into tmp */
- { size_t preserveSize = 64 KB - dstSize; /* note : dstSize < 64 KB */
+ { size_t preserveSize = 64 KB - dstSize;
if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
@@ -1322,7 +1343,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
}
dctx->tmpInSize = 0;
if (srcEnd-srcPtr == 0) return minFHSize; /* 0-size input */
- dctx->tmpInTarget = minFHSize; /* minimum to attempt decode */
+ dctx->tmpInTarget = minFHSize; /* minimum size to decode header */
dctx->dStage = dstage_storeFrameHeader;
/* fall-through */
@@ -1479,8 +1500,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
if (readCRC != calcCRC)
return err0r(LZ4F_ERROR_blockChecksum_invalid);
- }
- }
+ } }
dctx->dStage = dstage_getBlockHeader; /* new block */
break;
@@ -1521,13 +1541,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
} }
if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
- const char *dict = (const char *)dctx->dict;
+ const char* dict = (const char*)dctx->dict;
size_t dictSize = dctx->dictSize;
int decodedSize;
if (dict && dictSize > 1 GB) {
/* the dictSize param is an int, avoid truncation / sign issues */
- dict += dictSize - 1 GB;
- dictSize = 1 GB;
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
}
/* enough capacity in `dst` to decompress directly there */
decodedSize = LZ4_decompress_safe_usingDict(
@@ -1561,18 +1581,16 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
} else { /* dict not within tmp */
size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
- }
- }
+ } }
/* Decode block */
- {
- const char *dict = (const char *)dctx->dict;
+ { const char* dict = (const char*)dctx->dict;
size_t dictSize = dctx->dictSize;
int decodedSize;
if (dict && dictSize > 1 GB) {
/* the dictSize param is an int, avoid truncation / sign issues */
- dict += dictSize - 1 GB;
- dictSize = 1 GB;
+ dict += dictSize - 64 KB;
+ dictSize = 64 KB;
}
decodedSize = LZ4_decompress_safe_usingDict(
(const char*)selectedIn, (char*)dctx->tmpOut,
@@ -1595,8 +1613,8 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
/* dictionary management */
- if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
- LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1);
+ if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+ LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
dctx->tmpOutStart += sizeToCopy;
dstPtr += sizeToCopy;
@@ -1605,8 +1623,9 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
dctx->dStage = dstage_getBlockHeader; /* get next block */
break;
}
+ /* could not flush everything : stop there, just request a block header */
+ doAnotherStage = 0;
nextSrcSizeHint = BHSize;
- doAnotherStage = 0; /* still some data to flush */
break;
}
@@ -1643,7 +1662,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
selectedIn = dctx->tmpIn;
} /* if (dctx->dStage == dstage_storeSuffix) */
- /* case dstage_checkSuffix: */ /* no direct call, avoid scan-build warning */
+ /* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */
{ U32 const readCRC = LZ4F_readLE32(selectedIn);
U32 const resultCRC = XXH32_digest(&(dctx->xxh));
if (readCRC != resultCRC)
@@ -1667,8 +1686,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
if (dctx->dStage == dstage_storeSFrameSize)
case dstage_storeSFrameSize:
- {
- size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+ { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
(size_t)(srcEnd - srcPtr) );
memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
srcPtr += sizeToCopy;
@@ -1682,7 +1700,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
selectedIn = dctx->header + 4;
} /* if (dctx->dStage == dstage_storeSFrameSize) */
- /* case dstage_decodeSFrameSize: */ /* no direct access */
+ /* case dstage_decodeSFrameSize: */ /* no direct entry */
{ size_t const SFrameSize = LZ4F_readLE32(selectedIn);
dctx->frameInfo.contentSize = SFrameSize;
dctx->tmpInTarget = SFrameSize;
@@ -1701,7 +1719,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
LZ4F_resetDecompressionContext(dctx);
break;
}
- }
+ } /* switch (dctx->dStage) */
} /* while (doAnotherStage) */
/* preserve history within tmp whenever necessary */
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 948d66d..0859ea6 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -138,6 +138,7 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
{
int back = 0;
int const min = (int)MAX(iMin - ip, mMin - match);
+ assert(min <= 0);
assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
while ( (back > min)
@@ -222,9 +223,9 @@ LZ4HC_InsertAndGetWiderMatch (
const U32 dictLimit = hc4->dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const U32 ipIndex = (U32)(ip - base);
- const U32 lowLimit = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
+ const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
const BYTE* const dictBase = hc4->dictBase;
- int const delta = (int)(ip-iLowLimit);
+ int const lookBackLength = (int)(ip-iLowLimit);
int nbAttempts = maxNbAttempts;
U32 const pattern = LZ4_read32(ip);
U32 matchIndex;
@@ -236,10 +237,10 @@ LZ4HC_InsertAndGetWiderMatch (
/* First Match */
LZ4HC_Insert(hc4, ip);
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
- DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
- matchIndex, lowLimit);
+ DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+ matchIndex, lowestMatchIndex);
- while ((matchIndex>=lowLimit) && (nbAttempts)) {
+ while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
nbAttempts--;
assert(matchIndex < ipIndex);
@@ -247,34 +248,35 @@ LZ4HC_InsertAndGetWiderMatch (
/* do nothing */
} else if (matchIndex >= dictLimit) {
const BYTE* const matchPtr = base + matchIndex;
+ assert(matchPtr >= lowPrefixPtr);
+ assert(matchPtr < ip);
assert(longest >= 1);
- if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - delta + longest - 1)) {
+ if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
if (LZ4_read32(matchPtr) == pattern) {
int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
- int const back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+ int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
mlt -= back;
-
if (mlt > longest) {
longest = mlt;
*matchpos = matchPtr+back;
*startpos = ip+back;
- } }
- }
+ } } }
} else { /* matchIndex < dictLimit */
const BYTE* const matchPtr = dictBase + matchIndex;
if (LZ4_read32(matchPtr) == pattern) {
+ const BYTE* const dictStart = dictBase + hc4->lowLimit;
int mlt;
int back = 0;
const BYTE* vLimit = ip + (dictLimit - matchIndex);
if (vLimit > iHighLimit) vLimit = iHighLimit;
mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
- mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
- back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictBase+lowLimit) : 0;
+ mlt += LZ4_count(ip+mlt, lowPrefixPtr, iHighLimit);
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
mlt -= back;
if (mlt > longest) {
longest = mlt;
- *matchpos = base + matchIndex + back;
+ *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */
*startpos = ip + back;
} } }
@@ -306,13 +308,13 @@ LZ4HC_InsertAndGetWiderMatch (
matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
}
} } } }
- } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
+ } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
- if (dict == usingDictCtx && nbAttempts && ipIndex - lowLimit < MAX_DISTANCE) {
+ if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) {
size_t const dictEndOffset = dictCtx->end - dictCtx->base;
assert(dictEndOffset <= 1 GB);
dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
- matchIndex = dictMatchIndex + lowLimit - (U32)dictEndOffset;
+ matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
while (ipIndex - matchIndex <= MAX_DISTANCE && nbAttempts--) {
const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
@@ -322,7 +324,7 @@ LZ4HC_InsertAndGetWiderMatch (
const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
if (vLimit > iHighLimit) vLimit = iHighLimit;
mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
- back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
mlt -= back;
if (mlt > longest) {
longest = mlt;
@@ -459,14 +461,14 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
BYTE* op = (BYTE*) dest;
BYTE* oend = op + maxOutputSize;
- int ml, ml2, ml3, ml0;
+ int ml0, ml, ml2, ml3;
+ const BYTE* start0;
+ const BYTE* ref0;
const BYTE* ref = NULL;
const BYTE* start2 = NULL;
const BYTE* ref2 = NULL;
const BYTE* start3 = NULL;
const BYTE* ref3 = NULL;
- const BYTE* start0;
- const BYTE* ref0;
/* init */
*srcSizePtr = 0;
@@ -479,31 +481,27 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
if (ml<MINMATCH) { ip++; continue; }
/* saved, in case we would skip too much */
- start0 = ip;
- ref0 = ref;
- ml0 = ml;
+ start0 = ip; ref0 = ref; ml0 = ml;
_Search2:
- if (ip+ml <= mflimit)
+ if (ip+ml <= mflimit) {
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
maxNbAttempts, patternAnalysis, dict, favorCompressionRatio);
- else
+ } else {
ml2 = ml;
+ }
- if (ml2 == ml) { /* No better match */
+ if (ml2 == ml) { /* No better match => encode ML1 */
optr = op;
if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
continue;
}
- if (start0 < ip) {
- if (start2 < ip + ml0) { /* empirical */
- ip = start0;
- ref = ref0;
- ml = ml0;
- }
- }
+ if (start0 < ip) { /* first match was skipped at least once */
+ if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */
+ ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */
+ } }
/* Here, start0==ip */
if ((start2 - ip) < 3) { /* First Match too small : removed */
@@ -531,14 +529,15 @@ _Search3:
}
/* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
- if (start2 + ml2 <= mflimit)
+ if (start2 + ml2 <= mflimit) {
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
maxNbAttempts, patternAnalysis, dict, favorCompressionRatio);
- else
+ } else {
ml3 = ml2;
+ }
- if (ml3 == ml2) { /* No better match : 2 sequences to encode */
+ if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */
/* ip & ref are known; Now for ml */
if (start2 < ip+ml) ml = (int)(start2 - ip);
/* Now, encode 2 sequences */
@@ -583,11 +582,12 @@ _Search3:
}
/*
- * OK, now we have 3 ascending matches; let's write at least the first one
- * ip & ref are known; Now for ml
+ * OK, now we have 3 ascending matches;
+ * let's write the first one ML1.
+ * ip & ref are known; Now decide ml.
*/
if (start2 < ip+ml) {
- if ((start2 - ip) < (int)ML_MASK) {
+ if ((start2 - ip) < OPTIMAL_ML) {
int correction;
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
@@ -604,14 +604,13 @@ _Search3:
optr = op;
if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
- ip = start2;
- ref = ref2;
- ml = ml2;
+ /* ML2 becomes ML1 */
+ ip = start2; ref = ref2; ml = ml2;
- start2 = start3;
- ref2 = ref3;
- ml2 = ml3;
+ /* ML3 becomes ML2 */
+ start2 = start3; ref2 = ref3; ml2 = ml3;
+ /* let's find a new ML3 */
goto _Search3;
}
@@ -682,19 +681,19 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
U32 targetLength;
} cParams_t;
static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
- { lz4hc, 2, 16 }, /* 0, unused */
- { lz4hc, 2, 16 }, /* 1, unused */
- { lz4hc, 2, 16 }, /* 2, unused */
- { lz4hc, 4, 16 }, /* 3 */
- { lz4hc, 8, 16 }, /* 4 */
- { lz4hc, 16, 16 }, /* 5 */
- { lz4hc, 32, 16 }, /* 6 */
- { lz4hc, 64, 16 }, /* 7 */
- { lz4hc, 128, 16 }, /* 8 */
- { lz4hc, 256, 16 }, /* 9 */
- { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
- { lz4opt, 512,128 }, /*11 */
- { lz4opt,8192, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
+ { lz4hc, 2, 16 }, /* 0, unused */
+ { lz4hc, 2, 16 }, /* 1, unused */
+ { lz4hc, 2, 16 }, /* 2, unused */
+ { lz4hc, 4, 16 }, /* 3 */
+ { lz4hc, 8, 16 }, /* 4 */
+ { lz4hc, 16, 16 }, /* 5 */
+ { lz4hc, 32, 16 }, /* 6 */
+ { lz4hc, 64, 16 }, /* 7 */
+ { lz4hc, 128, 16 }, /* 8 */
+ { lz4hc, 256, 16 }, /* 9 */
+ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
+ { lz4opt, 512,128 }, /*11 */
+ { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
};
DEBUGLOG(4, "LZ4HC_compress_generic(%p, %p, %d)", ctx, src, *srcSizePtr);
@@ -705,8 +704,6 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
ctx->end += *srcSizePtr;
if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
- assert(cLevel >= 0);
- assert(cLevel <= LZ4HC_CLEVEL_MAX);
{ cParams_t const cParam = clTable[cLevel];
HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
if (cParam.strat == lz4hc)
@@ -905,7 +902,8 @@ void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC
static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
{
DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
- if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
+ if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
+ LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
/* Only one memory segment for extDict, so any previous extDict is lost at this stage */
ctxPtr->lowLimit = ctxPtr->dictLimit;