summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <Cyan4973@users.noreply.github.com>2019-04-11 22:42:54 (GMT)
committerGitHub <noreply@github.com>2019-04-11 22:42:54 (GMT)
commit723ba904e2393c69d01f6730bd8b4171e6182845 (patch)
tree520f596bf16fc20a94201b1371efec1c5a694522
parent013fee5665cbf03113c1c2e78d5b50fa9663b306 (diff)
parent8d76c8a44a15cc7c0c1f345ba750e44edac7abb7 (diff)
downloadlz4-723ba904e2393c69d01f6730bd8b4171e6182845.zip
lz4-723ba904e2393c69d01f6730bd8b4171e6182845.tar.gz
lz4-723ba904e2393c69d01f6730bd8b4171e6182845.tar.bz2
Merge pull request #664 from lz4/maxdist
introduce LZ4_DISTANCE_MAX build macro
-rw-r--r--doc/lz4_manual.html1
-rw-r--r--doc/lz4frame_manual.html61
-rw-r--r--lib/README.md7
-rw-r--r--lib/lz4.c35
-rw-r--r--lib/lz4hc.c16
-rw-r--r--programs/Makefile2
6 files changed, 84 insertions, 38 deletions
diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index 1c6dba7..ef1a8b5 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html
@@ -396,6 +396,7 @@ union LZ4_stream_u {
Note : initialization fails if size and alignment conditions are not respected.
In which case, the function will @return NULL.
Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ Note3: Before v1.9.0, use LZ4_resetStream() instead
</p></pre><BR>
diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html
index 4277c3c..d5496a1 100644
--- a/doc/lz4frame_manual.html
+++ b/doc/lz4frame_manual.html
@@ -237,25 +237,58 @@ LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
<a name="Chapter10"></a><h2>Streaming decompression functions</h2><pre></pre>
+<pre><b>size_t LZ4F_headerSize(const void* src, size_t srcSize);
+</b><p> Provide the header size of a frame starting at `src`.
+ `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
+ which is enough to decode the header length.
+ @return : size of frame header
+ or an error code, which can be tested using LZ4F_isError()
+ note : Frame header size is variable, but is guaranteed to be
+ >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes.
+
+</p></pre><BR>
+
<pre><b>size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
LZ4F_frameInfo_t* frameInfoPtr,
const void* srcBuffer, size_t* srcSizePtr);
</b><p> This function extracts frame parameters (max blockSize, dictID, etc.).
- Its usage is optional.
- Extracted information is typically useful for allocation and dictionary.
- This function works in 2 situations :
- - At the beginning of a new frame, in which case
- it will decode information from `srcBuffer`, starting the decoding process.
- Input size must be large enough to successfully decode the entire frame header.
- Frame header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes.
- It's allowed to provide more input data than this minimum.
- - After decoding has been started.
- In which case, no input is read, frame parameters are extracted from dctx.
- - If decoding has barely started, but not yet extracted information from header,
+ Its usage is optional: user can call LZ4F_decompress() directly.
+
+ Extracted information will fill an existing LZ4F_frameInfo_t structure.
+ This can be useful for allocation and dictionary identification purposes.
+
+ LZ4F_getFrameInfo() can work in the following situations :
+
+ 1) At the beginning of a new frame, before any invocation of LZ4F_decompress().
+ It will decode header from `srcBuffer`,
+ consuming the header and starting the decoding process.
+
+ Input size must be large enough to contain the full frame header.
+ Frame header size can be known beforehand by LZ4F_headerSize().
+ Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes,
+ and not more than <= LZ4F_HEADER_SIZE_MAX bytes.
+ Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work.
+ It's allowed to provide more input data than the header size,
+ LZ4F_getFrameInfo() will only consume the header.
+
+ If input size is not large enough,
+ aka if it's smaller than header size,
+ function will fail and return an error code.
+
+ 2) After decoding has been started,
+ it's possible to invoke LZ4F_getFrameInfo() anytime
+ to extract already decoded frame parameters stored within dctx.
+
+ Note that, if decoding has barely started,
+ and not yet read enough information to decode the header,
LZ4F_getFrameInfo() will fail.
- The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
- Decompression must resume from (srcBuffer + *srcSizePtr).
- @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+
+ The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value).
+ LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started,
+ and when decoding the header has been successful.
+ Decompression must then resume from (srcBuffer + *srcSizePtr).
+
+ @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call,
or an error code which can be tested using LZ4F_isError().
note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
diff --git a/lib/README.md b/lib/README.md
index be8eba0..c6daaea 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -52,6 +52,13 @@ The following build macro can be determined at compilation time :
For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
+- `LZ4_DISTANCE_MAX` : control the maximum offset that the compressor will allow.
+ Set to 65535 by default, which is the maximum value supported by lz4 format.
+ Reducing maximum distance will reduce opportunities for LZ4 to find matches,
+ hence will produce worse the compression ratio.
+ However, a smaller max distance may allow compatibility with specific decoders using limited memory budget.
+ This build macro only influences the compressed output of the compressor.
+
- `LZ4_DISABLE_DEPRECATE_WARNINGS` : invoking a deprecated function will make the compiler generate a warning.
This is meant to invite users to update their source code.
Should this be a problem, it's generally to make the compiler ignore these warnings,
diff --git a/lib/lz4.c b/lib/lz4.c
index 34587ce..ca3684f 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -395,8 +395,13 @@ static const int LZ4_minLength = (MFLIMIT+1);
#define MB *(1 <<20)
#define GB *(1U<<30)
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#ifndef LZ4_DISTANCE_MAX /* can be user - defined at compile time */
+# define LZ4_DISTANCE_MAX 65535
+#endif
+
+#if (LZ4_DISTANCE_MAX > 65535) /* max supported by LZ4 format */
+# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
#define ML_BITS 4
#define ML_MASK ((1U<<ML_BITS)-1)
@@ -734,7 +739,7 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(
}
}
- /* Adding a gap, so all previous entries are > MAX_DISTANCE back, is faster
+ /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
* than compressing without a gap. However, compressing with
* currentOffset == 0 is faster still, so we preserve that case.
*/
@@ -850,7 +855,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
forwardH = LZ4_hashPosition(forwardIp, tableType);
LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
- } while ( (match+MAX_DISTANCE < ip)
+ } while ( (match+LZ4_DISTANCE_MAX < ip)
|| (LZ4_read32(match) != LZ4_read32(ip)) );
} else { /* byU32, byU16 */
@@ -901,8 +906,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue; /* match outside of valid area */
assert(matchIndex < current);
- if ((tableType != byU16) && (matchIndex+MAX_DISTANCE < current)) continue; /* too far */
- if (tableType == byU16) assert((current - matchIndex) <= MAX_DISTANCE); /* too_far presumed impossible with byU16 */
+ if ((tableType != byU16) && (matchIndex+LZ4_DISTANCE_MAX < current)) continue; /* too far */
+ if (tableType == byU16) assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* too_far presumed impossible with byU16 */
if (LZ4_read32(match) == LZ4_read32(ip)) {
if (maybe_extMem) offset = current - matchIndex;
@@ -961,11 +966,11 @@ _next_match:
/* Encode Offset */
if (maybe_extMem) { /* static test */
DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
- assert(offset <= MAX_DISTANCE && offset > 0);
+ assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
LZ4_writeLE16(op, (U16)offset); op+=2;
} else {
DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match));
- assert(ip-match <= MAX_DISTANCE);
+ assert(ip-match <= LZ4_DISTANCE_MAX);
LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
}
@@ -1030,7 +1035,7 @@ _next_match:
match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
LZ4_putPosition(ip, cctx->hashTable, tableType, base);
- if ( (match+MAX_DISTANCE >= ip)
+ if ( (match+LZ4_DISTANCE_MAX >= ip)
&& (LZ4_read32(match) == LZ4_read32(ip)) )
{ token=op++; *token=0; goto _next_match; }
@@ -1065,7 +1070,7 @@ _next_match:
LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
assert(matchIndex < current);
if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
- && ((tableType==byU16) ? 1 : (matchIndex+MAX_DISTANCE >= current))
+ && ((tableType==byU16) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
&& (LZ4_read32(match) == LZ4_read32(ip)) ) {
token=op++;
*token=0;
@@ -1132,14 +1137,14 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int
if (inputSize < LZ4_64Klimit) {
return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
} else {
- const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
}
} else {
if (inputSize < LZ4_64Klimit) {;
return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
} else {
- const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
}
}
@@ -1169,7 +1174,7 @@ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst
return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
}
} else {
- const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
LZ4_prepareTable(ctx, srcSize, tableType);
return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
}
@@ -1183,7 +1188,7 @@ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst
return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
}
} else {
- const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
LZ4_prepareTable(ctx, srcSize, tableType);
return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
}
@@ -1246,7 +1251,7 @@ static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src,
if (*srcSizePtr < LZ4_64Klimit) {
return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
} else {
- tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+ tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
} }
}
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 411b6cc..a6dc7a2 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -132,7 +132,7 @@ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
while (idx < target) {
U32 const h = LZ4HC_hashPtr(base+idx);
size_t delta = idx - hashTable[h];
- if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+ if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
DELTANEXTU16(chainTable, idx) = (U16)delta;
hashTable[h] = idx;
idx++;
@@ -235,7 +235,7 @@ LZ4HC_InsertAndGetWiderMatch (
const U32 dictLimit = hc4->dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const U32 ipIndex = (U32)(ip - base);
- const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
+ const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
const BYTE* const dictBase = hc4->dictBase;
int const lookBackLength = (int)(ip-iLowLimit);
int nbAttempts = maxNbAttempts;
@@ -325,7 +325,7 @@ LZ4HC_InsertAndGetWiderMatch (
const BYTE* const matchPtr = base + matchCandidateIdx;
if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
- const BYTE* const lowestMatchPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
+ const BYTE* const lowestMatchPtr = (lowPrefixPtr + LZ4_DISTANCE_MAX >= ip) ? lowPrefixPtr : ip - LZ4_DISTANCE_MAX;
size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
size_t const currentSegmentLength = backLength + forwardPatternLength;
@@ -338,7 +338,7 @@ LZ4HC_InsertAndGetWiderMatch (
size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
if ((size_t)longest < maxML) {
assert(base + matchIndex < ip);
- if (ip - (base+matchIndex) > MAX_DISTANCE) break;
+ if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
assert(maxML < 2 GB);
longest = (int)maxML;
*matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
@@ -359,12 +359,12 @@ LZ4HC_InsertAndGetWiderMatch (
if ( dict == usingDictCtxHc
&& nbAttempts
- && ipIndex - lowestMatchIndex < MAX_DISTANCE) {
+ && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
assert(dictEndOffset <= 1 GB);
matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
- while (ipIndex - matchIndex <= MAX_DISTANCE && nbAttempts--) {
+ while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
if (LZ4_read32(matchPtr) == pattern) {
@@ -453,7 +453,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
*op += length;
/* Encode Offset */
- assert( (*ip - match) <= MAX_DISTANCE ); /* note : consider providing offset as a value, rather than as a pointer difference */
+ assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
/* Encode MatchLength */
@@ -1435,7 +1435,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */
rPos += ml;
assert(ml >= MINMATCH);
- assert((offset >= 1) && (offset <= MAX_DISTANCE));
+ assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
opSaved = op;
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */
goto _dest_overflow;
diff --git a/programs/Makefile b/programs/Makefile
index af461fe..92fd683 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -94,7 +94,7 @@ lz4.1: lz4.1.md $(LIBVER_SRC)
man: lz4.1
clean-man:
- rm lz4.1
+ $(RM) lz4.1
preview-man: clean-man man
man ./lz4.1