From f1fa91d6fc74576dcde98b6514cfc8cc57f6f50f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 8 Oct 2017 23:40:21 -0700 Subject: insertAndFindBestMatch defers to insertAndGetWiderMatch --- lib/lz4hc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index d7f8d23..f4a0981 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -116,7 +116,7 @@ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) hc4->nextToUpdate = target; } - +#if 0 LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, const BYTE** matchpos, @@ -163,7 +163,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc return (int)ml; } - +#endif LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( LZ4HC_CCtx_internal* hc4, @@ -234,6 +234,16 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( return longest; } +LZ4_FORCE_INLINE +int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + const BYTE** matchpos, + const int maxNbAttempts) +{ + const BYTE* uselessPtr = ip; + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts); +} + typedef enum { noLimit = 0, @@ -353,7 +363,7 @@ static int LZ4HC_compress_hashChain ( /* Main Loop */ while (ip < mflimit) { ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); - if (!ml) { ip++; continue; } + if (ml Date: Sun, 8 Oct 2017 23:55:42 -0700 Subject: improved search of rep-1 patterns --- lib/lz4hc.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 131 insertions(+), 27 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index f4a0981..e28d682 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -165,6 +165,54 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc } #endif +/** LZ4HC_countBack() : + * @return : negative value, nb of common bytes before ip/match */ +static int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) +{ + int back=0; + while ( (ip+back > iMin) + && (match+back > mMin) + && (ip[back-1] == match[back-1])) + back--; + return back; +} + +static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, reg_t pattern) +{ + const BYTE* const iStart = ip; + + while (likely(ip=iLow+4)) { + if (LZ4_read32(ip-4) != pattern) break; + ip -= 4; + } + while (likely(ip>iLow)) { + if (ip[-1] != (BYTE)pattern) break; + ip--; + } + + return (unsigned)(iStart - ip); +} + +typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; + LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( LZ4HC_CCtx_internal* hc4, const BYTE* const ip, @@ -180,11 +228,13 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( const BYTE* const base = hc4->base; const U32 dictLimit = hc4->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; - const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); + const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE; const BYTE* const dictBase = hc4->dictBase; - int const delta = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; + reg_t const pattern = LZ4_read_ARCH(ip); U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; /* First Match */ @@ -195,27 +245,29 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( nbAttempts--; if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; - if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) { - if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { - int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - int back = 0; - - while ( (ip+back > iLowLimit) - && (matchPtr+back > lowPrefixPtr) - && (ip[back-1] == matchPtr[back-1])) { - back--; - } - - mlt -= back; + if (LZ4_read32(matchPtr) == (U32)pattern) { + int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); +#if 0 + /* more generic but unfortunately slower ... */ + int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr); +#else + int back = 0; + while ( (ip+back > iLowLimit) + && (matchPtr+back > lowPrefixPtr) + && (ip[back-1] == matchPtr[back-1])) { + back--; + } +#endif + mlt -= back; - if (mlt > longest) { - longest = mlt; - *matchpos = matchPtr+back; - *startpos = ip+back; - } } } - } else { + if (mlt > longest) { + longest = mlt; + *matchpos = matchPtr+back; + *startpos = ip+back; + } } + } else { /* matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; - if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { + if (LZ4_read32(matchPtr) == (U32)pattern) { int mlt; int back=0; const BYTE* vLimit = ip + (dictLimit - matchIndex); @@ -223,13 +275,65 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); - while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--; + while ( (ip+back > iLowLimit) + && (matchIndex+back > lowLimit) + && (ip[back-1] == matchPtr[back-1])) + back--; mlt -= back; - if (mlt > longest) { longest = mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } - } - } - matchIndex -= DELTANEXTU16(chainTable, matchIndex); - } + if (mlt > longest) { + longest = mlt; + *matchpos = base + matchIndex + back; + *startpos = ip + back; + } } } + + { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); + matchIndex -= nextOffset; + if (1 && (nextOffset==1)) { + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if (LZ4_read32(ip+4) == (U32)pattern) { /* should check ip limit */ + repeat = rep_confirmed; + srcPatternLength = LZ4HC_countPattern(ip+8, iHighLimit, pattern) + 8; + } else { + repeat = rep_not; + } } + if ( (repeat == rep_confirmed) /* proven repeated pattern (1-2-4) */ + && (matchIndex >= dictLimit) ) { /* same segment only */ + const BYTE* const matchPtr = base + matchIndex; + if (LZ4_read_ARCH(matchPtr) == pattern) { /* good candidate */ + size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); + const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; + size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, (U32)pattern); + size_t const currentSegmentLength = backLength + forwardPatternLength; + + if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ +#if 1 + matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ +#else + const BYTE* const matchCandidate = matchPtr + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, pattern might be followed by more match */ + int matchLength = (int)(LZ4_count(ip + srcPatternLength, matchCandidate + srcPatternLength, iHighLimit) + srcPatternLength); + int back = 0; + while ( (ip+back > iLowLimit) + && (matchPtr+back > lowPrefixPtr) + && (ip[back-1] == matchPtr[back-1])) { + back--; + } + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = base + matchIndex + back; + *startpos = ip + back; + } + matchIndex -= (U32)backLength; + matchIndex -= DELTANEXTU16(chainTable, matchIndex); /* skip directly to next potential pattern segment */ +#endif + } else { + matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ + //matchIndex -= DELTANEXTU16(chainTable, matchIndex); /* skip directly to following candidate; slightly faster, but miss some rare corner cases (likely when back is useful)*/ + } + } } } } + } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */ return longest; } -- cgit v0.12 From 1ee17e4eb8268aecbec3611f77061a38478a22ad Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 9 Oct 2017 00:31:12 -0700 Subject: optional fuse --- lib/lz4hc.c | 121 ++++++++++++++++++++++++++---------------------------------- 1 file changed, 53 insertions(+), 68 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index e28d682..010ac2d 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -116,55 +116,6 @@ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) hc4->nextToUpdate = target; } -#if 0 -LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - const BYTE** matchpos, - const int maxNbAttempts) -{ - U16* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - const BYTE* const dictBase = hc4->dictBase; - const U32 dictLimit = hc4->dictLimit; - const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); - U32 matchIndex; - int nbAttempts = maxNbAttempts; - size_t ml = 0; - - /* HC4 match finder */ - LZ4HC_Insert(hc4, ip); - matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - - while ((matchIndex>=lowLimit) && (nbAttempts)) { - nbAttempts--; - if (matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - if ( (*(match+ml) == *(ip+ml)) /* can be longer */ - && (LZ4_read32(match) == LZ4_read32(ip)) ) - { - size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = match; } - } - } else { - const BYTE* const match = dictBase + matchIndex; - if (LZ4_read32(match) == LZ4_read32(ip)) { - size_t mlt; - const BYTE* vLimit = ip + (dictLimit - matchIndex); - if (vLimit > iLimit) vLimit = iLimit; - mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; - if ((ip+mlt == vLimit) && (vLimit < iLimit)) - mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ - } - } - matchIndex -= DELTANEXTU16(chainTable, matchIndex); - } - - return (int)ml; -} -#endif - /** LZ4HC_countBack() : * @return : negative value, nb of common bytes before ip/match */ static int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, @@ -308,26 +259,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ -#if 1 matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ -#else - const BYTE* const matchCandidate = matchPtr + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, pattern might be followed by more match */ - int matchLength = (int)(LZ4_count(ip + srcPatternLength, matchCandidate + srcPatternLength, iHighLimit) + srcPatternLength); - int back = 0; - while ( (ip+back > iLowLimit) - && (matchPtr+back > lowPrefixPtr) - && (ip[back-1] == matchPtr[back-1])) { - back--; - } - matchLength -= back; - if (matchLength > longest) { - longest = matchLength; - *matchpos = base + matchIndex + back; - *startpos = ip + back; - } - matchIndex -= (U32)backLength; - matchIndex -= DELTANEXTU16(chainTable, matchIndex); /* skip directly to next potential pattern segment */ -#endif } else { matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ //matchIndex -= DELTANEXTU16(chainTable, matchIndex); /* skip directly to following candidate; slightly faster, but miss some rare corner cases (likely when back is useful)*/ @@ -338,6 +270,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( return longest; } +#if 0 LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, @@ -348,6 +281,58 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts); } +#else + +LZ4_FORCE_INLINE +int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + const BYTE** matchpos, + const int maxNbAttempts) +{ + U16* const chainTable = hc4->chainTable; + U32* const HashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + const BYTE* const dictBase = hc4->dictBase; + const U32 dictLimit = hc4->dictLimit; + const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); + U32 matchIndex; + int nbAttempts = maxNbAttempts; + size_t ml = 0; + + /* HC4 match finder */ + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + + while ((matchIndex>=lowLimit) && (nbAttempts)) { + nbAttempts--; + if (matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + if ( (*(match+ml) == *(ip+ml)) /* can be longer */ + && (LZ4_read32(match) == LZ4_read32(ip)) ) + { + size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (mlt > ml) { ml = mlt; *matchpos = match; } + } + } else { + const BYTE* const match = dictBase + matchIndex; + if (LZ4_read32(match) == LZ4_read32(ip)) { + size_t mlt; + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit); + if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + } + } + matchIndex -= DELTANEXTU16(chainTable, matchIndex); + } + + return (int)ml; +} +#endif + + typedef enum { noLimit = 0, -- cgit v0.12 From bdca63ed69dc1873b385c87b25d64fa5ebd595f7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 9 Oct 2017 00:36:47 -0700 Subject: early out is not better --- lib/README.md | 2 +- lib/lz4hc.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/README.md b/lib/README.md index 7082fe3..fc5d4e9 100644 --- a/lib/README.md +++ b/lib/README.md @@ -15,7 +15,7 @@ They generate and decode data using [LZ4 block format]. For more compression ratio at the cost of compression speed, the High Compression variant called **lz4hc** is available. -Add files **`lz4hc.c`** and **`lz4hc.h`**. +Add files **`lz4hc.c`**, **`lz4hc.h`** and **`lz4opt.h`**. The variant still depends on regular `lib/lz4.*` source files. diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 010ac2d..19636c5 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -262,7 +262,6 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ } else { matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ - //matchIndex -= DELTANEXTU16(chainTable, matchIndex); /* skip directly to following candidate; slightly faster, but miss some rare corner cases (likely when back is useful)*/ } } } } } } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */ -- cgit v0.12 From 97c18f5f0edf89fb7846bc2923bdc1568a2d95c1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 9 Oct 2017 01:44:05 -0700 Subject: re-inserted last byte test in widerMatch --- lib/lz4hc.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 19636c5..bd75911 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -181,6 +181,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( const BYTE* const lowPrefixPtr = base + dictLimit; const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE; const BYTE* const dictBase = hc4->dictBase; + int const delta = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; reg_t const pattern = LZ4_read_ARCH(ip); U32 matchIndex; @@ -196,26 +197,28 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( nbAttempts--; if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; - if (LZ4_read32(matchPtr) == (U32)pattern) { - int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); -#if 0 - /* more generic but unfortunately slower ... */ - int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr); -#else - int back = 0; - while ( (ip+back > iLowLimit) - && (matchPtr+back > lowPrefixPtr) - && (ip[back-1] == matchPtr[back-1])) { - back--; - } -#endif - mlt -= back; + if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) { + if (LZ4_read32(matchPtr) == (U32)pattern) { + int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + #if 0 + /* more generic but unfortunately slower ... */ + int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr); + #else + int back = 0; + while ( (ip+back > iLowLimit) + && (matchPtr+back > lowPrefixPtr) + && (ip[back-1] == matchPtr[back-1])) { + back--; + } + #endif + mlt -= back; - if (mlt > longest) { - longest = mlt; - *matchpos = matchPtr+back; - *startpos = ip+back; - } } + if (mlt > longest) { + longest = mlt; + *matchpos = matchPtr+back; + *startpos = ip+back; + } } + } } else { /* matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; if (LZ4_read32(matchPtr) == (U32)pattern) { -- cgit v0.12 From a4314829db575453911046e2b0d5a19697b14a67 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 9 Oct 2017 01:50:28 -0700 Subject: fused getLongerMatch and getWiderMatch --- lib/lz4hc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index bd75911..47474d3 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -272,7 +272,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( return longest; } -#if 0 +#if 1 LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, -- cgit v0.12 From 708e2cbb60c7c074cbe9277388abc397403ef418 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Oct 2017 16:39:40 -0700 Subject: simplified early exit when single solution --- lib/lz4opt.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 584dc97..1f8f580 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -245,11 +245,11 @@ static int LZ4HC_compress_optimal ( if ((size_t)matches[match_num-1].len > sufficient_len) { /* good enough solution : immediate encoding */ - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto encode; + int const firstML = (int)matches[match_num-1].len; + const BYTE* const matchPos = ip - matches[match_num-1].off; + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, (int)firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ + return 0; /* error */ + continue; } /* set prices using matches at position = 0 */ -- cgit v0.12 From ac2ad52257defcc944471d0fdf3c6d68abebe904 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Oct 2017 16:43:36 -0700 Subject: renamed last_pos into last_match_pos --- lib/lz4opt.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 1f8f580..e6a748c 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -197,7 +197,7 @@ LZ4_FORCE_INLINE int LZ4HC_BinTree_GetAllMatches ( #define SET_PRICE(pos, ml, offset, ll, cost) \ { \ - while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ + while (last_match_pos < pos) { opt[last_match_pos+1].price = 1<<30; last_match_pos++; } \ opt[pos].mlen = (int)ml; \ opt[pos].off = (int)offset; \ opt[pos].litlen = (int)ll; \ @@ -236,7 +236,7 @@ static int LZ4HC_compress_optimal ( /* Main Loop */ while (ip < mflimit) { size_t const llen = ip - anchor; - size_t last_pos = 0; + size_t last_match_pos = 0; size_t match_num, cur, best_mlen, best_off; memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */ @@ -259,14 +259,14 @@ static int LZ4HC_compress_optimal ( best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ for ( ; mlen <= best_mlen ; mlen++) { size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen); - SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_pos and opt[pos] */ + SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_match_pos and opt[pos] */ } } } - if (last_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */ + if (last_match_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */ /* check further positions */ opt[0].mlen = opt[1].mlen = 1; - for (cur = 1; cur <= last_pos; cur++) { + for (cur = 1; cur <= last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; /* establish baseline price if cur is literal */ @@ -285,17 +285,17 @@ static int LZ4HC_compress_optimal ( } if (price < (size_t)opt[cur].price) - SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */ + SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_match_pos */ } - if (cur == last_pos || curPtr >= mflimit) break; + if (cur == last_match_pos || curPtr >= mflimit) break; match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) { /* immediate encoding */ best_mlen = matches[match_num-1].len; best_off = matches[match_num-1].off; - last_pos = cur + 1; + last_match_pos = cur + 1; goto encode; } @@ -319,16 +319,16 @@ static int LZ4HC_compress_optimal ( price = opt[cur].price + LZ4HC_sequencePrice(0, ml); } - if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) { + if (cur + ml > last_match_pos || price < (size_t)opt[cur + ml].price) { SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price); } } } } - } /* for (cur = 1; cur <= last_pos; cur++) */ + } /* for (cur = 1; cur <= last_match_pos; cur++) */ - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; -encode: /* cur, last_pos, best_mlen, best_off must be set */ +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ opt[0].mlen = 1; while (1) { /* from end to beginning */ size_t const ml = opt[cur].mlen; @@ -343,7 +343,7 @@ encode: /* cur, last_pos, best_mlen, best_off must be set */ /* encode all recorded sequences */ cur = 0; - while (cur < last_pos) { + while (cur < last_match_pos) { int const ml = opt[cur].mlen; int const offset = opt[cur].off; if (ml == 1) { ip++; cur++; continue; } -- cgit v0.12 From 6cec68de39345ac754d75b5ed694b7e3e28cc3ec Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Oct 2017 16:47:25 -0700 Subject: added assert --- lib/lz4.c | 8 ++++++++ lib/lz4opt.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/lz4.c b/lib/lz4.c index 19967f2..016e5c7 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -289,6 +289,14 @@ static const int LZ4_minLength = (MFLIMIT+1); /*-************************************ * Error detection **************************************/ +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + #define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index e6a748c..69c48dc 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -262,7 +262,7 @@ static int LZ4HC_compress_optimal ( SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_match_pos and opt[pos] */ } } } - if (last_match_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */ + assert(last_match_pos >= MINMATCH); /* check further positions */ opt[0].mlen = opt[1].mlen = 1; -- cgit v0.12 From 7bb0a617eeefacbbda15a0966319bed02ed68ac9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 11:00:10 -0700 Subject: simplified initial cost conditions llen integrated in opt[] --- lib/lz4opt.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 69c48dc..f8eb50e 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -252,20 +252,29 @@ static int LZ4HC_compress_optimal ( continue; } - /* set prices using matches at position = 0 */ + /* set prices for first positions (literals) */ + { size_t rPos; + for (rPos = 0 ; rPos < MINMATCH ; rPos++) { + int const cost = (int)LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = (int)(llen + rPos); + opt[rPos].price = cost; + DEBUGLOG(7, "rPos:%3u => cost:%3i (litlen=%i)", + (U32)rPos, cost, opt[rPos].litlen); + } } + /* set prices using matches found for rPos = 0 */ { size_t matchNb; for (matchNb = 0; matchNb < match_num; matchNb++) { size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ for ( ; mlen <= best_mlen ; mlen++) { - size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen); + size_t const cost = LZ4HC_sequencePrice(llen, mlen); SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_match_pos and opt[pos] */ } } } - assert(last_match_pos >= MINMATCH); /* check further positions */ - opt[0].mlen = opt[1].mlen = 1; for (cur = 1; cur <= last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; @@ -274,11 +283,7 @@ static int LZ4HC_compress_optimal ( if (opt[cur-1].mlen == 1) { /* no match at previous position */ litlen = opt[cur-1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen); - } else { - price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen); - } + price = opt[cur-1].price - LZ4HC_literalsPrice(litlen-1) + LZ4HC_literalsPrice(litlen); } else { litlen = 1; price = opt[cur - 1].price + LZ4HC_literalsPrice(1); @@ -313,7 +318,7 @@ static int LZ4HC_compress_optimal ( if (cur > ll) price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml); else - price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen); + price = LZ4HC_sequencePrice(ll, ml); } else { ll = 0; price = opt[cur].price + LZ4HC_sequencePrice(0, ml); -- cgit v0.12 From c058753393063b4ec72de996b103327710f2c36b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 11:24:56 -0700 Subject: refactor variable matchnum separate initial and iterative search renamed nb_matches --- lib/lz4opt.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index f8eb50e..c7c95a8 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -197,7 +197,7 @@ LZ4_FORCE_INLINE int LZ4HC_BinTree_GetAllMatches ( #define SET_PRICE(pos, ml, offset, ll, cost) \ { \ - while (last_match_pos < pos) { opt[last_match_pos+1].price = 1<<30; last_match_pos++; } \ + while (last_match_pos < pos) { opt[last_match_pos+1].price = 1<<30; last_match_pos++; } \ opt[pos].mlen = (int)ml; \ opt[pos].off = (int)offset; \ opt[pos].litlen = (int)ll; \ @@ -237,16 +237,15 @@ static int LZ4HC_compress_optimal ( while (ip < mflimit) { size_t const llen = ip - anchor; size_t last_match_pos = 0; - size_t match_num, cur, best_mlen, best_off; - memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */ + size_t cur, best_mlen, best_off; - match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); - if (!match_num) { ip++; continue; } + size_t const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); + if (!nb_matches_initial) { ip++; continue; } - if ((size_t)matches[match_num-1].len > sufficient_len) { + if ((size_t)matches[nb_matches_initial-1].len > sufficient_len) { /* good enough solution : immediate encoding */ - int const firstML = (int)matches[match_num-1].len; - const BYTE* const matchPos = ip - matches[match_num-1].off; + int const firstML = (int)matches[nb_matches_initial-1].len; + const BYTE* const matchPos = ip - matches[nb_matches_initial-1].off; if ( LZ4HC_encodeSequence(&ip, &op, &anchor, (int)firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ return 0; /* error */ continue; @@ -265,7 +264,7 @@ static int LZ4HC_compress_optimal ( } } /* set prices using matches found for rPos = 0 */ { size_t matchNb; - for (matchNb = 0; matchNb < match_num; matchNb++) { + for (matchNb = 0; matchNb < nb_matches_initial; matchNb++) { size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ for ( ; mlen <= best_mlen ; mlen++) { @@ -277,6 +276,7 @@ static int LZ4HC_compress_optimal ( /* check further positions */ for (cur = 1; cur <= last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; + size_t nb_matches; /* establish baseline price if cur is literal */ { size_t price, litlen; @@ -295,18 +295,18 @@ static int LZ4HC_compress_optimal ( if (cur == last_match_pos || curPtr >= mflimit) break; - match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); - if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) { + nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + if ((nb_matches > 0) && (size_t)matches[nb_matches-1].len > sufficient_len) { /* immediate encoding */ - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; + best_mlen = matches[nb_matches-1].len; + best_off = matches[nb_matches-1].off; last_match_pos = cur + 1; goto encode; } /* set prices using matches at position = cur */ { size_t matchNb; - for (matchNb = 0; matchNb < match_num; matchNb++) { + for (matchNb = 0; matchNb < nb_matches; matchNb++) { size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? (size_t)matches[matchNb].len : LZ4_OPT_NUM - cur; -- cgit v0.12 From fc879fe170d1644f8fd93017d53e44bc2b3cde04 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 11:32:15 -0700 Subject: lz4opt: refactor sequence reverse traversal --- lib/lz4opt.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index c7c95a8..7d487a7 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -334,17 +334,25 @@ static int LZ4HC_compress_optimal ( cur = last_match_pos - best_mlen; encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ opt[0].mlen = 1; - while (1) { /* from end to beginning */ - size_t const ml = opt[cur].mlen; - int const offset = opt[cur].off; - opt[cur].mlen = (int)best_mlen; - opt[cur].off = (int)best_off; - best_mlen = ml; - best_off = offset; - if (ml > cur) break; /* can this happen ? */ - cur -= ml; - } + DEBUGLOG(6, "sequence reverse traversal"); + { int candidate_pos = (int)cur; + int selected_matchLength = (int)best_mlen; + int selected_offset = (int)best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = opt[candidate_pos].mlen; + int const next_offset = opt[candidate_pos].off; + assert(next_matchLength > 0); /* note : can be 1, means literal */ + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + DEBUGLOG(6, "rPos:%3i, matchLength:%3i", candidate_pos, selected_matchLength); + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + candidate_pos -= next_matchLength; + } } /* encode all recorded sequences */ cur = 0; @@ -352,7 +360,9 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ int const ml = opt[cur].mlen; int const offset = opt[cur].off; if (ml == 1) { ip++; cur++; continue; } + assert(ml >= MINMATCH); cur += ml; + assert((offset >= 1) && (offset <=65535)); if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0; } } /* while (ip < mflimit) */ -- cgit v0.12 From ee62faee085fe04ed657626a2320a5140989746c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 12:05:00 -0700 Subject: minor refactor reduce variable scope remove one macro usage --- lib/lz4opt.h | 63 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 7d487a7..48862f2 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -265,13 +265,18 @@ static int LZ4HC_compress_optimal ( /* set prices using matches found for rPos = 0 */ { size_t matchNb; for (matchNb = 0; matchNb < nb_matches_initial; matchNb++) { - size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; - best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ - for ( ; mlen <= best_mlen ; mlen++) { + int mlen = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; + int const matchML = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = matches[matchNb].off; + assert(matchML < LZ4_OPT_NUM); + for ( ; mlen <= matchML ; mlen++) { size_t const cost = LZ4HC_sequencePrice(llen, mlen); - SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_match_pos and opt[pos] */ + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = (int)llen; + opt[mlen].price = (int)cost; } } } - assert(last_match_pos >= MINMATCH); + last_match_pos = matches[nb_matches_initial-1].len; /* check further positions */ for (cur = 1; cur <= last_match_pos; cur++) { @@ -308,17 +313,14 @@ static int LZ4HC_compress_optimal ( { size_t matchNb; for (matchNb = 0; matchNb < nb_matches; matchNb++) { size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; - best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? + size_t const matchML = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? (size_t)matches[matchNb].len : LZ4_OPT_NUM - cur; - for ( ; ml <= best_mlen ; ml++) { + for ( ; ml <= matchML ; ml++) { size_t ll, price; if (opt[cur].mlen == 1) { ll = opt[cur].litlen; - if (cur > ll) - price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml); - else - price = LZ4HC_sequencePrice(ll, ml); + price = ((cur > ll) ? opt[cur - ll].price : 0) + LZ4HC_sequencePrice(ll, ml); } else { ll = 0; price = opt[cur].price + LZ4HC_sequencePrice(0, ml); @@ -327,7 +329,7 @@ static int LZ4HC_compress_optimal ( if (cur + ml > last_match_pos || price < (size_t)opt[cur + ml].price) { SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price); } } } } - } /* for (cur = 1; cur <= last_match_pos; cur++) */ + } /* for (cur = 1; cur <= last_match_pos; cur++) */ best_mlen = opt[last_match_pos].mlen; best_off = opt[last_match_pos].off; @@ -337,7 +339,6 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(cur < LZ4_OPT_NUM); assert(last_match_pos >= 1); /* == 1 when only one candidate */ opt[0].mlen = 1; - DEBUGLOG(6, "sequence reverse traversal"); { int candidate_pos = (int)cur; int selected_matchLength = (int)best_mlen; int selected_offset = (int)best_off; @@ -347,31 +348,37 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(next_matchLength > 0); /* note : can be 1, means literal */ opt[candidate_pos].mlen = selected_matchLength; opt[candidate_pos].off = selected_offset; - DEBUGLOG(6, "rPos:%3i, matchLength:%3i", candidate_pos, selected_matchLength); selected_matchLength = next_matchLength; selected_offset = next_offset; if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ candidate_pos -= next_matchLength; } } - /* encode all recorded sequences */ - cur = 0; - while (cur < last_match_pos) { - int const ml = opt[cur].mlen; - int const offset = opt[cur].off; - if (ml == 1) { ip++; cur++; continue; } - assert(ml >= MINMATCH); - cur += ml; - assert((offset >= 1) && (offset <=65535)); - if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0; - } + /* encode all recorded sequences in order */ + { size_t rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { ip++; rPos++; continue; } /* literal */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && (offset <=65535)); + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ + return 0; /* error */ + } } } /* while (ip < mflimit) */ /* Encode Last Literals */ { int lastRun = (int)(iend - anchor); - if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (BYTE)(lastRun< (U32)maxOutputSize)) + return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { + *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; + *op++ = (BYTE) lastRun; + } else *op++ = (BYTE)(lastRun< Date: Fri, 20 Oct 2017 13:32:45 -0700 Subject: removed one macro usage --- lib/lz4opt.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 48862f2..488af90 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -284,7 +284,8 @@ static int LZ4HC_compress_optimal ( size_t nb_matches; /* establish baseline price if cur is literal */ - { size_t price, litlen; + { size_t price; + int litlen; if (opt[cur-1].mlen == 1) { /* no match at previous position */ litlen = opt[cur-1].litlen + 1; @@ -293,13 +294,19 @@ static int LZ4HC_compress_optimal ( litlen = 1; price = opt[cur - 1].price + LZ4HC_literalsPrice(1); } - - if (price < (size_t)opt[cur].price) - SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_match_pos */ + if (price < (size_t)opt[cur].price) { + opt[cur].mlen = 1; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = (int)price; + } } if (cur == last_match_pos || curPtr >= mflimit) break; + //assert(cur+2 <= last_match_pos); + //assert(cur+3 <= last_match_pos); + nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); if ((nb_matches > 0) && (size_t)matches[nb_matches-1].len > sufficient_len) { /* immediate encoding */ -- cgit v0.12 From d813134619d2d2cae09d0f8bb1629ef1206f269d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 13:44:49 -0700 Subject: removed SET_PRICE macro --- lib/lz4opt.h | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 488af90..dd4e15d 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -195,16 +195,6 @@ LZ4_FORCE_INLINE int LZ4HC_BinTree_GetAllMatches ( } -#define SET_PRICE(pos, ml, offset, ll, cost) \ -{ \ - while (last_match_pos < pos) { opt[last_match_pos+1].price = 1<<30; last_match_pos++; } \ - opt[pos].mlen = (int)ml; \ - opt[pos].off = (int)offset; \ - opt[pos].litlen = (int)ll; \ - opt[pos].price = (int)cost; \ -} - - static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, const char* const source, @@ -319,22 +309,29 @@ static int LZ4HC_compress_optimal ( /* set prices using matches at position = cur */ { size_t matchNb; for (matchNb = 0; matchNb < nb_matches; matchNb++) { - size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; - size_t const matchML = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? - (size_t)matches[matchNb].len : LZ4_OPT_NUM - cur; + int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; + int const matchML = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? + matches[matchNb].len : (int)(LZ4_OPT_NUM - cur); for ( ; ml <= matchML ; ml++) { - size_t ll, price; + int const pos = cur + ml; + int const offset = matches[matchNb].off; + size_t price; + int ll; if (opt[cur].mlen == 1) { ll = opt[cur].litlen; - price = ((cur > ll) ? opt[cur - ll].price : 0) + LZ4HC_sequencePrice(ll, ml); + price = ((cur > (size_t)ll) ? opt[cur - ll].price : 0) + LZ4HC_sequencePrice(ll, ml); } else { ll = 0; price = opt[cur].price + LZ4HC_sequencePrice(0, ml); } - if (cur + ml > last_match_pos || price < (size_t)opt[cur + ml].price) { - SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price); + if ((size_t)pos > last_match_pos || price < (size_t)opt[pos].price) { + while (last_match_pos < (size_t)pos) opt[++last_match_pos].price = 1<<30; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = (int)price; } } } } } /* for (cur = 1; cur <= last_match_pos; cur++) */ -- cgit v0.12 From fd6bd5107b81243952a4b2e0b3de901f45608b78 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 15:30:50 -0700 Subject: switched many types to int --- lib/lz4opt.h | 75 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index dd4e15d..1b215b4 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -50,24 +50,24 @@ typedef struct { /* price in bytes */ -LZ4_FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen) +LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) { - size_t price = litlen; - if (litlen >= (size_t)RUN_MASK) + int price = litlen; + if (litlen >= (int)RUN_MASK) price += 1 + (litlen-RUN_MASK)/255; return price; } /* requires mlen >= MINMATCH */ -LZ4_FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen) +LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) { - size_t price = 2 + 1; /* 16-bit offset + token */ + int price = 2 + 1; /* 16-bit offset + token */ price += LZ4HC_literalsPrice(litlen); - if (mlen >= (size_t)(ML_MASK+MINMATCH)) - price+= 1 + (mlen-(ML_MASK+MINMATCH))/255; + if (mlen >= (int)(ML_MASK+MINMATCH)) + price += 1 + (mlen-(ML_MASK+MINMATCH))/255; return price; } @@ -224,57 +224,58 @@ static int LZ4HC_compress_optimal ( ip++; /* Main Loop */ + assert(ip - anchor < LZ4_MAX_INPUT_SIZE); while (ip < mflimit) { - size_t const llen = ip - anchor; - size_t last_match_pos = 0; - size_t cur, best_mlen, best_off; + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; - size_t const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); + int const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); if (!nb_matches_initial) { ip++; continue; } if ((size_t)matches[nb_matches_initial-1].len > sufficient_len) { /* good enough solution : immediate encoding */ - int const firstML = (int)matches[nb_matches_initial-1].len; + int const firstML = matches[nb_matches_initial-1].len; const BYTE* const matchPos = ip - matches[nb_matches_initial-1].off; - if ( LZ4HC_encodeSequence(&ip, &op, &anchor, (int)firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ return 0; /* error */ continue; } /* set prices for first positions (literals) */ - { size_t rPos; + { int rPos; for (rPos = 0 ; rPos < MINMATCH ; rPos++) { - int const cost = (int)LZ4HC_literalsPrice(llen + rPos); + int const cost = LZ4HC_literalsPrice(llen + rPos); opt[rPos].mlen = 1; opt[rPos].off = 0; - opt[rPos].litlen = (int)(llen + rPos); + opt[rPos].litlen = llen + rPos; opt[rPos].price = cost; DEBUGLOG(7, "rPos:%3u => cost:%3i (litlen=%i)", (U32)rPos, cost, opt[rPos].litlen); } } /* set prices using matches found for rPos = 0 */ - { size_t matchNb; + { int matchNb; for (matchNb = 0; matchNb < nb_matches_initial; matchNb++) { int mlen = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; int const matchML = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ int const offset = matches[matchNb].off; assert(matchML < LZ4_OPT_NUM); for ( ; mlen <= matchML ; mlen++) { - size_t const cost = LZ4HC_sequencePrice(llen, mlen); + int const cost = LZ4HC_sequencePrice(llen, mlen); opt[mlen].mlen = mlen; opt[mlen].off = offset; - opt[mlen].litlen = (int)llen; - opt[mlen].price = (int)cost; + opt[mlen].litlen = llen; + opt[mlen].price = cost; } } } last_match_pos = matches[nb_matches_initial-1].len; /* check further positions */ for (cur = 1; cur <= last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; - size_t nb_matches; + int nb_matches; /* establish baseline price if cur is literal */ - { size_t price; + { int price; int litlen; if (opt[cur-1].mlen == 1) { /* no match at previous position */ @@ -284,19 +285,16 @@ static int LZ4HC_compress_optimal ( litlen = 1; price = opt[cur - 1].price + LZ4HC_literalsPrice(1); } - if (price < (size_t)opt[cur].price) { + if (price < opt[cur].price) { opt[cur].mlen = 1; opt[cur].off = 0; opt[cur].litlen = litlen; - opt[cur].price = (int)price; + opt[cur].price = price; } } if (cur == last_match_pos || curPtr >= mflimit) break; - //assert(cur+2 <= last_match_pos); - //assert(cur+3 <= last_match_pos); - nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); if ((nb_matches > 0) && (size_t)matches[nb_matches-1].len > sufficient_len) { /* immediate encoding */ @@ -307,7 +305,7 @@ static int LZ4HC_compress_optimal ( } /* set prices using matches at position = cur */ - { size_t matchNb; + { int matchNb; for (matchNb = 0; matchNb < nb_matches; matchNb++) { int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; int const matchML = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? @@ -316,23 +314,24 @@ static int LZ4HC_compress_optimal ( for ( ; ml <= matchML ; ml++) { int const pos = cur + ml; int const offset = matches[matchNb].off; - size_t price; + int price; int ll; if (opt[cur].mlen == 1) { ll = opt[cur].litlen; - price = ((cur > (size_t)ll) ? opt[cur - ll].price : 0) + LZ4HC_sequencePrice(ll, ml); + price = ((cur > ll) ? opt[cur - ll].price : 0) + LZ4HC_sequencePrice(ll, ml); } else { ll = 0; price = opt[cur].price + LZ4HC_sequencePrice(0, ml); } - if ((size_t)pos > last_match_pos || price < (size_t)opt[pos].price) { - while (last_match_pos < (size_t)pos) opt[++last_match_pos].price = 1<<30; + if (pos > last_match_pos || price < opt[pos].price) { + while (last_match_pos < pos) opt[++last_match_pos].price = 1<<30; opt[pos].mlen = ml; opt[pos].off = offset; opt[pos].litlen = ll; - opt[pos].price = (int)price; + opt[pos].price = price; } } } } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ best_mlen = opt[last_match_pos].mlen; @@ -343,9 +342,9 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(cur < LZ4_OPT_NUM); assert(last_match_pos >= 1); /* == 1 when only one candidate */ opt[0].mlen = 1; - { int candidate_pos = (int)cur; - int selected_matchLength = (int)best_mlen; - int selected_offset = (int)best_off; + { int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; while (1) { /* from end to beginning */ int const next_matchLength = opt[candidate_pos].mlen; int const next_offset = opt[candidate_pos].off; @@ -359,7 +358,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ } } /* encode all recorded sequences in order */ - { size_t rPos = 0; /* relative position (to ip) */ + { int rPos = 0; /* relative position (to ip) */ while (rPos < last_match_pos) { int const ml = opt[rPos].mlen; int const offset = opt[rPos].off; @@ -377,7 +376,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ if ( (limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ - if (lastRun>=(int)RUN_MASK) { + if (lastRun >= (int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; -- cgit v0.12 From a12cdf00c3d0a0e3e7b2638d4a256f14b15c0072 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 17:04:29 -0700 Subject: lz4opt: added hash chain search --- lib/lz4hc.c | 27 ++++++++++++++++++--------- lib/lz4hc.h | 2 +- lib/lz4opt.h | 58 ++++++++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 63 insertions(+), 24 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 47474d3..adabd9c 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -183,7 +183,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( const BYTE* const dictBase = hc4->dictBase; int const delta = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; - reg_t const pattern = LZ4_read_ARCH(ip); + U32 const pattern = LZ4_read32(ip); U32 matchIndex; repeat_state_e repeat = rep_untested; size_t srcPatternLength = 0; @@ -195,10 +195,16 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( while ((matchIndex>=lowLimit) && (nbAttempts)) { nbAttempts--; + int trace = 0; + if (nbAttempts==0) { + trace = 1; + DEBUGLOG(2, "reached max nb of attempts ! : %08X => %08X ", + pattern, HASH_FUNCTION(pattern)); + } if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) { - if (LZ4_read32(matchPtr) == (U32)pattern) { + if (LZ4_read32(matchPtr) == pattern) { int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); #if 0 /* more generic but unfortunately slower ... */ @@ -221,9 +227,9 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( } } else { /* matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; - if (LZ4_read32(matchPtr) == (U32)pattern) { + if (LZ4_read32(matchPtr) == pattern) { int mlt; - int back=0; + int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; @@ -243,22 +249,25 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); matchIndex -= nextOffset; if (1 && (nextOffset==1)) { + if (trace) DEBUGLOG(2, "check repeat mode : %u", repeat); /* may be a repeated pattern */ if (repeat == rep_untested) { - if (LZ4_read32(ip+4) == (U32)pattern) { /* should check ip limit */ + if ((pattern & 0xFFFF) == (pattern >> 16)) { /* is it enough ? */ repeat = rep_confirmed; - srcPatternLength = LZ4HC_countPattern(ip+8, iHighLimit, pattern) + 8; + srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4; } else { repeat = rep_not; } } if ( (repeat == rep_confirmed) /* proven repeated pattern (1-2-4) */ && (matchIndex >= dictLimit) ) { /* same segment only */ const BYTE* const matchPtr = base + matchIndex; - if (LZ4_read_ARCH(matchPtr) == pattern) { /* good candidate */ + if (trace) DEBUGLOG(2, "search direct pattern position"); + if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; - size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, (U32)pattern); + size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); size_t const currentSegmentLength = backLength + forwardPatternLength; + if (trace) DEBUGLOG(2, "good start position (match == pattern)"); if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ @@ -633,7 +642,7 @@ static int LZ4HC_getSearchNum(int compressionLevel) switch (compressionLevel) { default: return 0; /* unused */ case 11: return 128; - case 12: return 1<<10; + case 12: return 1<<13; } } diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 66d5636..a9cefbb 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -129,7 +129,7 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in * They are exposed to allow static allocation of `LZ4_streamHC_t`. * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4 **************************************/ -#define LZ4HC_DICTIONARY_LOGSIZE 17 /* because of btopt, hc would only need 16 */ +#define LZ4HC_DICTIONARY_LOGSIZE 17 /* due to btree, hc would only need 16 */ #define LZ4HC_MAXD (1<searchNum); + if (matchLength < MINMATCH) return 0; + assert(matches != NULL); + matches[0].len = matchLength; + matches[0].off = (int)(ip-matchPtr); + (void)fullUpdate; + return 1; +} + + static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, const char* const source, - char* dest, + char* dst, int inputSize, - int maxOutputSize, + int dstCapacity, limitedOutput_directive limit, size_t sufficient_len, const int fullUpdate @@ -214,8 +241,8 @@ static int LZ4HC_compress_optimal ( const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; const BYTE* const matchlimit = (iend - LASTLITERALS); - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + dstCapacity; /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal"); @@ -230,7 +257,8 @@ static int LZ4HC_compress_optimal ( int best_mlen, best_off; int cur, last_match_pos = 0; - int const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); + //int const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); + int const nb_matches_initial = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); if (!nb_matches_initial) { ip++; continue; } if ((size_t)matches[nb_matches_initial-1].len > sufficient_len) { @@ -274,7 +302,9 @@ static int LZ4HC_compress_optimal ( const BYTE* const curPtr = ip + cur; int nb_matches; - /* establish baseline price if cur is literal */ + /* establish baseline price for cur as a literal. + * fixes unused positions (between 2 matches) + * and inefficient match stack */ { int price; int litlen; if (opt[cur-1].mlen == 1) { @@ -295,7 +325,8 @@ static int LZ4HC_compress_optimal ( if (cur == last_match_pos || curPtr >= mflimit) break; - nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); if ((nb_matches > 0) && (size_t)matches[nb_matches-1].len > sufficient_len) { /* immediate encoding */ best_mlen = matches[nb_matches-1].len; @@ -307,9 +338,9 @@ static int LZ4HC_compress_optimal ( /* set prices using matches at position = cur */ { int matchNb; for (matchNb = 0; matchNb < nb_matches; matchNb++) { - int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; int const matchML = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? matches[matchNb].len : (int)(LZ4_OPT_NUM - cur); + int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; for ( ; ml <= matchML ; ml++) { int const pos = cur + ml; @@ -341,19 +372,18 @@ static int LZ4HC_compress_optimal ( encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(cur < LZ4_OPT_NUM); assert(last_match_pos >= 1); /* == 1 when only one candidate */ - opt[0].mlen = 1; { int candidate_pos = cur; int selected_matchLength = best_mlen; int selected_offset = best_off; while (1) { /* from end to beginning */ - int const next_matchLength = opt[candidate_pos].mlen; + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ int const next_offset = opt[candidate_pos].off; - assert(next_matchLength > 0); /* note : can be 1, means literal */ opt[candidate_pos].mlen = selected_matchLength; opt[candidate_pos].off = selected_offset; selected_matchLength = next_matchLength; selected_offset = next_offset; if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ candidate_pos -= next_matchLength; } } @@ -362,7 +392,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ while (rPos < last_match_pos) { int const ml = opt[rPos].mlen; int const offset = opt[rPos].off; - if (ml == 1) { ip++; rPos++; continue; } /* literal */ + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ rPos += ml; assert(ml >= MINMATCH); assert((offset >= 1) && (offset <=65535)); @@ -374,7 +404,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ /* Encode Last Literals */ { int lastRun = (int)(iend - anchor); if ( (limit) - && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) + && (((char*)op - dst) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)dstCapacity)) return 0; /* Check output limit */ if (lastRun >= (int)RUN_MASK) { *op++=(RUN_MASK< Date: Wed, 25 Oct 2017 07:07:08 +0200 Subject: added hash chain with conditional length not a success yet --- lib/lz4hc.c | 9 --------- lib/lz4opt.h | 3 ++- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index adabd9c..4532ba3 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -195,12 +195,6 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( while ((matchIndex>=lowLimit) && (nbAttempts)) { nbAttempts--; - int trace = 0; - if (nbAttempts==0) { - trace = 1; - DEBUGLOG(2, "reached max nb of attempts ! : %08X => %08X ", - pattern, HASH_FUNCTION(pattern)); - } if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) { @@ -249,7 +243,6 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); matchIndex -= nextOffset; if (1 && (nextOffset==1)) { - if (trace) DEBUGLOG(2, "check repeat mode : %u", repeat); /* may be a repeated pattern */ if (repeat == rep_untested) { if ((pattern & 0xFFFF) == (pattern >> 16)) { /* is it enough ? */ @@ -261,13 +254,11 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( if ( (repeat == rep_confirmed) /* proven repeated pattern (1-2-4) */ && (matchIndex >= dictLimit) ) { /* same segment only */ const BYTE* const matchPtr = base + matchIndex; - if (trace) DEBUGLOG(2, "search direct pattern position"); if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); size_t const currentSegmentLength = backLength + forwardPatternLength; - if (trace) DEBUGLOG(2, "good start position (match == pattern)"); if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ diff --git a/lib/lz4opt.h b/lib/lz4opt.h index ef7b725..bd956ee 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -213,7 +213,7 @@ LZ4_FORCE_INLINE int LZ4HC_HashChain_GetAllMatches ( { const BYTE* matchPtr; int matchLength = LZ4HC_FindLongerMatch(ctx, ip, iHighLimit, (int)best_mlen, &matchPtr, ctx->searchNum); - if (matchLength < MINMATCH) return 0; + if ((size_t)matchLength <= best_mlen) return 0; assert(matches != NULL); matches[0].len = matchLength; matches[0].off = (int)(ip-matchPtr); @@ -327,6 +327,7 @@ static int LZ4HC_compress_optimal ( //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur + 1, matches, fullUpdate); /* only works if last_match_pos is really the last match pos */ if ((nb_matches > 0) && (size_t)matches[nb_matches-1].len > sufficient_len) { /* immediate encoding */ best_mlen = matches[nb_matches-1].len; -- cgit v0.12 From ab4bd93f59dce5d116f1c95373a64cc39ccd6990 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 30 Oct 2017 16:10:25 -0700 Subject: fixed minor initialization warning --- lib/lz4opt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index bd956ee..2daf17e 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -211,7 +211,7 @@ LZ4_FORCE_INLINE int LZ4HC_HashChain_GetAllMatches ( const BYTE* const ip, const BYTE* const iHighLimit, size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate) { - const BYTE* matchPtr; + const BYTE* matchPtr = NULL; int matchLength = LZ4HC_FindLongerMatch(ctx, ip, iHighLimit, (int)best_mlen, &matchPtr, ctx->searchNum); if ((size_t)matchLength <= best_mlen) return 0; assert(matches != NULL); -- cgit v0.12 From 931c5c20d0d87e706e3dd9bf57e089500455c987 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 30 Oct 2017 17:47:54 -0700 Subject: fixed minor overflow mistake in optimal parser saving 20 bytes on calgary.tar --- lib/lz4opt.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 2daf17e..40592df 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -328,7 +328,10 @@ static int LZ4HC_compress_optimal ( //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur + 1, matches, fullUpdate); /* only works if last_match_pos is really the last match pos */ - if ((nb_matches > 0) && (size_t)matches[nb_matches-1].len > sufficient_len) { + if (!nb_matches) continue; + + if ( ((size_t)matches[nb_matches-1].len > sufficient_len) + || (matches[nb_matches-1].len + cur >= LZ4_OPT_NUM) ) { /* immediate encoding */ best_mlen = matches[nb_matches-1].len; best_off = matches[nb_matches-1].off; @@ -357,6 +360,7 @@ static int LZ4HC_compress_optimal ( } if (pos > last_match_pos || price < opt[pos].price) { + assert(pos < LZ4_OPT_NUM); while (last_match_pos < pos) opt[++last_match_pos].price = 1<<30; opt[pos].mlen = ml; opt[pos].off = offset; -- cgit v0.12 From 0ff4df1b941145d5b728b4b4a936391537b8ea11 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 2 Nov 2017 13:44:57 -0700 Subject: changed strategy : opt[] path is complete after each match previous strategy would leave a few "bad choices" on the ground they would be fixed later, but that requires passing through each position to make the fix and cannot give the end position of the last useful match. --- lib/lz4hc.c | 21 +++++++++----- lib/lz4opt.h | 90 ++++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 71 insertions(+), 40 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 4532ba3..eb31a9c 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -342,10 +342,6 @@ typedef enum { limitedDestSize = 2, } limitedOutput_directive; -#ifndef LZ4HC_DEBUG -# define LZ4HC_DEBUG 0 -#endif - /* LZ4HC_encodeSequence() : * @return : 0 if ok, * 1 if buffer issue detected */ @@ -361,9 +357,19 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( size_t length; BYTE* const token = (*op)++; -#if LZ4HC_DEBUG - printf("literal : %u -- match : %u -- offset : %u\n", - (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match)); +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2) + static const BYTE* start = NULL; + static U32 totalCost = 0; + U32 const ll = (U32)(*ip - *anchor); + U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; + U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; + U32 const cost = 1 + llAdd + ll + 2 + mlAdd; + if (start==NULL) start = *anchor; /* only works for single segment */ + totalCost += cost; + DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u/%7u", + (U32)(*anchor - start), + (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), + cost, totalCost); #endif /* Encode Literal length */ @@ -386,6 +392,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; /* Encode MatchLength */ + assert(matchLength >= MINMATCH); length = (size_t)(matchLength - MINMATCH); if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ if (length >= ML_MASK) { diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 40592df..8a223ec 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -233,14 +233,14 @@ static int LZ4HC_compress_optimal ( const int fullUpdate ) { - LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1]; /* this uses a bit too much stack memory to my taste ... */ + LZ4HC_optimal_t opt[LZ4_OPT_NUM + 3]; /* this uses a bit too much stack memory to my taste ... */ LZ4HC_match_t matches[LZ4_OPT_NUM + 1]; const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); + const BYTE* const matchlimit = iend - LASTLITERALS; BYTE* op = (BYTE*) dst; BYTE* const oend = op + dstCapacity; @@ -278,8 +278,8 @@ static int LZ4HC_compress_optimal ( opt[rPos].off = 0; opt[rPos].litlen = llen + rPos; opt[rPos].price = cost; - DEBUGLOG(7, "rPos:%3u => cost:%3i (litlen=%i)", - (U32)rPos, cost, opt[rPos].litlen); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); } } /* set prices using matches found for rPos = 0 */ { int matchNb; @@ -294,36 +294,26 @@ static int LZ4HC_compress_optimal ( opt[mlen].off = offset; opt[mlen].litlen = llen; opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); } } } last_match_pos = matches[nb_matches_initial-1].len; + { int addLit; + for (addLit = 1; addLit <= 2; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } /* check further positions */ - for (cur = 1; cur <= last_match_pos; cur++) { + for (cur = 1; cur < last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; int nb_matches; - /* establish baseline price for cur as a literal. - * fixes unused positions (between 2 matches) - * and inefficient match stack */ - { int price; - int litlen; - if (opt[cur-1].mlen == 1) { - /* no match at previous position */ - litlen = opt[cur-1].litlen + 1; - price = opt[cur-1].price - LZ4HC_literalsPrice(litlen-1) + LZ4HC_literalsPrice(litlen); - } else { - litlen = 1; - price = opt[cur - 1].price + LZ4HC_literalsPrice(1); - } - if (price < opt[cur].price) { - opt[cur].mlen = 1; - opt[cur].off = 0; - opt[cur].litlen = litlen; - opt[cur].price = price; - } - } - - if (cur == last_match_pos || curPtr >= mflimit) break; + if (curPtr >= mflimit) break; //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); @@ -339,11 +329,26 @@ static int LZ4HC_compress_optimal ( goto encode; } + /* before first match : set price with literals at beginning */ + { int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = baseLitlen+litlen; + opt[pos].price = price; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", + pos, price, opt[pos].litlen); + } } } + /* set prices using matches at position = cur */ { int matchNb; + assert(cur + matches[nb_matches-1].len < LZ4_OPT_NUM); for (matchNb = 0; matchNb < nb_matches; matchNb++) { - int const matchML = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? - matches[matchNb].len : (int)(LZ4_OPT_NUM - cur); + int const matchML = matches[matchNb].len; int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; for ( ; ml <= matchML ; ml++) { @@ -351,23 +356,39 @@ static int LZ4HC_compress_optimal ( int const offset = matches[matchNb].off; int price; int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); if (opt[cur].mlen == 1) { ll = opt[cur].litlen; - price = ((cur > ll) ? opt[cur - ll].price : 0) + LZ4HC_sequencePrice(ll, ml); + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); } else { ll = 0; price = opt[cur].price + LZ4HC_sequencePrice(0, ml); } - if (pos > last_match_pos || price < opt[pos].price) { + if (pos > last_match_pos+2 || price <= opt[pos].price) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); assert(pos < LZ4_OPT_NUM); - while (last_match_pos < pos) opt[++last_match_pos].price = 1<<30; + if ( (matchNb == nb_matches-1) /* last match */ + && (ml == matchML) /* last post of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; opt[pos].mlen = ml; opt[pos].off = offset; opt[pos].litlen = ll; opt[pos].price = price; } } } } - + /* complete following positions with literals */ + { int addLit; + for (addLit = 1; addLit <= 2; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } } /* for (cur = 1; cur <= last_match_pos; cur++) */ best_mlen = opt[last_match_pos].mlen; @@ -377,12 +398,15 @@ static int LZ4HC_compress_optimal ( encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(cur < LZ4_OPT_NUM); assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG(6, "reverse traversal, looking for shortest path") + DEBUGLOG(6, "last_match_pos = %i", last_match_pos); { int candidate_pos = cur; int selected_matchLength = best_mlen; int selected_offset = best_off; while (1) { /* from end to beginning */ int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ int const next_offset = opt[candidate_pos].off; + DEBUGLOG(6, "pos %i: sequence length %i", candidate_pos, selected_matchLength); opt[candidate_pos].mlen = selected_matchLength; opt[candidate_pos].off = selected_offset; selected_matchLength = next_matchLength; -- cgit v0.12 From 8e16eb0cd164071d3fc4c18e0f10527487649e93 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 2 Nov 2017 14:53:06 -0700 Subject: fixed last lost bytes in maximal mode even gained 2 bytes on calgary.tar... added conditional traces `g_debuglog_enable` --- lib/lz4.c | 12 +++++++----- lib/lz4hc.c | 13 +++++++++---- lib/lz4opt.h | 7 ++++--- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index e3e9172..e21822d 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -324,11 +324,12 @@ static const int LZ4_minLength = (MFLIMIT+1); #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) # include -# define DEBUGLOG(l, ...) { \ - if (l<=LZ4_DEBUG) { \ - fprintf(stderr, __FILE__ ": "); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, " \n"); \ +static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ } } #else # define DEBUGLOG(l, ...) {} /* disabled */ @@ -978,6 +979,7 @@ LZ4_stream_t* LZ4_createStream(void) void LZ4_resetStream (LZ4_stream_t* LZ4_stream) { + DEBUGLOG(4, "LZ4_resetStream"); MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); } diff --git a/lib/lz4hc.c b/lib/lz4hc.c index eb31a9c..884f5d7 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -188,12 +188,15 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( repeat_state_e repeat = rep_untested; size_t srcPatternLength = 0; - + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); /* First Match */ LZ4HC_Insert(hc4, ip); matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG(7, "First match at index %u / %u (lowLimit)", + matchIndex, lowLimit); while ((matchIndex>=lowLimit) && (nbAttempts)) { + DEBUGLOG(7, "remaining attempts : %i", nbAttempts); nbAttempts--; if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; @@ -360,16 +363,18 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2) static const BYTE* start = NULL; static U32 totalCost = 0; + U32 const pos = (U32)(*anchor - start); U32 const ll = (U32)(*ip - *anchor); U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; U32 const cost = 1 + llAdd + ll + 2 + mlAdd; if (start==NULL) start = *anchor; /* only works for single segment */ - totalCost += cost; - DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u/%7u", - (U32)(*anchor - start), + //g_debuglog_enable = (pos >= 112705) & (pos <= 112760); + DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u / %u", + pos, (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), cost, totalCost); + totalCost += cost; #endif /* Encode Literal length */ diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 8a223ec..25ceaad 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -299,7 +299,7 @@ static int LZ4HC_compress_optimal ( } } } last_match_pos = matches[nb_matches_initial-1].len; { int addLit; - for (addLit = 1; addLit <= 2; addLit ++) { + for (addLit = 1; addLit <= 3; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; @@ -315,6 +315,7 @@ static int LZ4HC_compress_optimal ( if (curPtr >= mflimit) break; + DEBUGLOG(7, "search at rPos:%u", cur); //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur + 1, matches, fullUpdate); /* only works if last_match_pos is really the last match pos */ @@ -367,7 +368,7 @@ static int LZ4HC_compress_optimal ( price = opt[cur].price + LZ4HC_sequencePrice(0, ml); } - if (pos > last_match_pos+2 || price <= opt[pos].price) { + if (pos > last_match_pos+3 || price <= opt[pos].price) { DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", pos, price, ml); assert(pos < LZ4_OPT_NUM); @@ -382,7 +383,7 @@ static int LZ4HC_compress_optimal ( } } } } /* complete following positions with literals */ { int addLit; - for (addLit = 1; addLit <= 2; addLit ++) { + for (addLit = 1; addLit <= 3; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; -- cgit v0.12 From bd992f12e4be48b4eed48b3898153f2c80bc014b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 2 Nov 2017 15:05:45 -0700 Subject: searching match leading strictly farther does not work sometimes, it's better to re-use same match but start it later, in order to get shorter matchlength code --- lib/lz4opt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 25ceaad..1e696f9 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -318,7 +318,7 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "search at rPos:%u", cur); //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); - //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur + 1, matches, fullUpdate); /* only works if last_match_pos is really the last match pos */ + //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur - 1, matches, fullUpdate); /* only test matches of a minimum length */ if (!nb_matches) continue; if ( ((size_t)matches[nb_matches-1].len > sufficient_len) -- cgit v0.12 From 4b8188580084112f01f0d0913ac022162fd71f25 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 2 Nov 2017 15:37:18 -0700 Subject: partial search, while preserving compression ratio tag interesting places --- lib/lz4hc.c | 4 ++-- lib/lz4opt.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 884f5d7..44e0b0a 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -363,14 +363,14 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2) static const BYTE* start = NULL; static U32 totalCost = 0; - U32 const pos = (U32)(*anchor - start); + U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start); U32 const ll = (U32)(*ip - *anchor); U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; U32 const cost = 1 + llAdd + ll + 2 + mlAdd; if (start==NULL) start = *anchor; /* only works for single segment */ //g_debuglog_enable = (pos >= 112705) & (pos <= 112760); - DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u / %u", + DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u", pos, (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), cost, totalCost); diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 1e696f9..37cc73a 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -46,6 +46,7 @@ typedef struct { int off; int mlen; int litlen; + int toSearch; } LZ4HC_optimal_t; @@ -278,6 +279,7 @@ static int LZ4HC_compress_optimal ( opt[rPos].off = 0; opt[rPos].litlen = llen + rPos; opt[rPos].price = cost; + opt[rPos].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", rPos, cost, opt[rPos].litlen); } } @@ -294,16 +296,21 @@ static int LZ4HC_compress_optimal ( opt[mlen].off = offset; opt[mlen].litlen = llen; opt[mlen].price = cost; + opt[mlen].toSearch = (((mlen - 18) % 255) == 0); DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", mlen, cost, mlen); } } } last_match_pos = matches[nb_matches_initial-1].len; + opt[last_match_pos-2].toSearch = 1; + opt[last_match_pos-1].toSearch = 1; + opt[last_match_pos].toSearch = 1; { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + opt[last_match_pos+addLit].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); } } @@ -314,6 +321,7 @@ static int LZ4HC_compress_optimal ( int nb_matches; if (curPtr >= mflimit) break; + if (opt[cur].toSearch == 0) continue; DEBUGLOG(7, "search at rPos:%u", cur); //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); @@ -341,6 +349,7 @@ static int LZ4HC_compress_optimal ( opt[pos].off = 0; opt[pos].litlen = baseLitlen+litlen; opt[pos].price = price; + opt[pos].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", pos, price, opt[pos].litlen); } } } @@ -380,14 +389,19 @@ static int LZ4HC_compress_optimal ( opt[pos].off = offset; opt[pos].litlen = ll; opt[pos].price = price; + opt[pos].toSearch = (((ml-18) % 255) == 0); } } } } /* complete following positions with literals */ + opt[last_match_pos-2].toSearch = 1; + opt[last_match_pos-1].toSearch = 1; + opt[last_match_pos].toSearch = 1; { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + opt[last_match_pos+addLit].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); } } } /* for (cur = 1; cur <= last_match_pos; cur++) */ -- cgit v0.12 From e06cb03c11ee8821a10fd8496f029c64192c9bc8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 2 Nov 2017 16:25:10 -0700 Subject: small adaptations for intermediate level 11 --- lib/lz4hc.c | 2 +- lib/lz4opt.h | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 44e0b0a..941cda0 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -644,7 +644,7 @@ static int LZ4HC_getSearchNum(int compressionLevel) { switch (compressionLevel) { default: return 0; /* unused */ - case 11: return 128; + case 11: return 256; case 12: return 1<<13; } } diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 37cc73a..c9fab04 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -301,8 +301,8 @@ static int LZ4HC_compress_optimal ( mlen, cost, mlen); } } } last_match_pos = matches[nb_matches_initial-1].len; - opt[last_match_pos-2].toSearch = 1; - opt[last_match_pos-1].toSearch = 1; + if (fullUpdate) opt[last_match_pos-2].toSearch = 1; /* 1 byte on calgary */ + if (fullUpdate) opt[last_match_pos-1].toSearch = 1; /* 1 byte on calgary */ opt[last_match_pos].toSearch = 1; { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { @@ -349,7 +349,6 @@ static int LZ4HC_compress_optimal ( opt[pos].off = 0; opt[pos].litlen = baseLitlen+litlen; opt[pos].price = price; - opt[pos].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", pos, price, opt[pos].litlen); } } } @@ -392,8 +391,8 @@ static int LZ4HC_compress_optimal ( opt[pos].toSearch = (((ml-18) % 255) == 0); } } } } /* complete following positions with literals */ - opt[last_match_pos-2].toSearch = 1; - opt[last_match_pos-1].toSearch = 1; + if (fullUpdate) opt[last_match_pos-2].toSearch = 1; /* 2 bytes on enwik7 */ + if (fullUpdate) opt[last_match_pos-1].toSearch = 1; /* 53 bytes on enwik7, 13 bytes on calgary */ opt[last_match_pos].toSearch = 1; { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { @@ -439,7 +438,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ rPos += ml; assert(ml >= MINMATCH); - assert((offset >= 1) && (offset <=65535)); + assert((offset >= 1) && (offset <= MAX_DISTANCE)); if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ return 0; /* error */ } } -- cgit v0.12 From a1c5343d89cb854462d8458c609a4f61a6aeb808 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 2 Nov 2017 18:54:18 -0700 Subject: more generic skip formula improving speed --- lib/lz4hc.c | 2 +- lib/lz4opt.h | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 941cda0..2d8671d 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -369,7 +369,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; U32 const cost = 1 + llAdd + ll + 2 + mlAdd; if (start==NULL) start = *anchor; /* only works for single segment */ - //g_debuglog_enable = (pos >= 112705) & (pos <= 112760); + //g_debuglog_enable = (pos >= 2228) & (pos <= 2262); DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u", pos, (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), diff --git a/lib/lz4opt.h b/lib/lz4opt.h index c9fab04..0463cb1 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -46,7 +46,6 @@ typedef struct { int off; int mlen; int litlen; - int toSearch; } LZ4HC_optimal_t; @@ -244,6 +243,7 @@ static int LZ4HC_compress_optimal ( const BYTE* const matchlimit = iend - LASTLITERALS; BYTE* op = (BYTE*) dst; BYTE* const oend = op + dstCapacity; + int const front = fullUpdate ? 2 : 1; /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal"); @@ -279,7 +279,6 @@ static int LZ4HC_compress_optimal ( opt[rPos].off = 0; opt[rPos].litlen = llen + rPos; opt[rPos].price = cost; - opt[rPos].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", rPos, cost, opt[rPos].litlen); } } @@ -296,21 +295,16 @@ static int LZ4HC_compress_optimal ( opt[mlen].off = offset; opt[mlen].litlen = llen; opt[mlen].price = cost; - opt[mlen].toSearch = (((mlen - 18) % 255) == 0); DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", mlen, cost, mlen); } } } last_match_pos = matches[nb_matches_initial-1].len; - if (fullUpdate) opt[last_match_pos-2].toSearch = 1; /* 1 byte on calgary */ - if (fullUpdate) opt[last_match_pos-1].toSearch = 1; /* 1 byte on calgary */ - opt[last_match_pos].toSearch = 1; { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - opt[last_match_pos+addLit].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); } } @@ -321,7 +315,9 @@ static int LZ4HC_compress_optimal ( int nb_matches; if (curPtr >= mflimit) break; - if (opt[cur].toSearch == 0) continue; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", + cur, opt[cur].price, opt[cur+1].price, cur+1); + if (opt[cur+front].price <= opt[cur].price) continue; DEBUGLOG(7, "search at rPos:%u", cur); //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); @@ -388,19 +384,14 @@ static int LZ4HC_compress_optimal ( opt[pos].off = offset; opt[pos].litlen = ll; opt[pos].price = price; - opt[pos].toSearch = (((ml-18) % 255) == 0); } } } } /* complete following positions with literals */ - if (fullUpdate) opt[last_match_pos-2].toSearch = 1; /* 2 bytes on enwik7 */ - if (fullUpdate) opt[last_match_pos-1].toSearch = 1; /* 53 bytes on enwik7, 13 bytes on calgary */ - opt[last_match_pos].toSearch = 1; { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - opt[last_match_pos+addLit].toSearch = 1; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); } } } /* for (cur = 1; cur <= last_match_pos; cur++) */ -- cgit v0.12 From 05d78eb817fc3eea13bd89377e771bbe19bb02c3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 2 Nov 2017 19:50:08 -0700 Subject: new level 11 uses 512 attempts --- lib/lz4hc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 2d8671d..a7238c7 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -644,7 +644,7 @@ static int LZ4HC_getSearchNum(int compressionLevel) { switch (compressionLevel) { default: return 0; /* unused */ - case 11: return 256; + case 11: return 512; case 12: return 1<<13; } } -- cgit v0.12 From 890c0553d08ad6a6a4f2801523667413b5477512 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 00:15:52 -0700 Subject: optimized skip strategy for level 12 --- lib/lz4opt.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 0463cb1..edcfc10 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -243,7 +243,6 @@ static int LZ4HC_compress_optimal ( const BYTE* const matchlimit = iend - LASTLITERALS; BYTE* op = (BYTE*) dst; BYTE* const oend = op + dstCapacity; - int const front = fullUpdate ? 2 : 1; /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal"); @@ -317,12 +316,16 @@ static int LZ4HC_compress_optimal ( if (curPtr >= mflimit) break; DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", cur, opt[cur].price, opt[cur+1].price, cur+1); - if (opt[cur+front].price <= opt[cur].price) continue; + if (fullUpdate) { + if ((opt[cur+1].price <= opt[cur].price) && (opt[cur+4].price < opt[cur].price+3)) continue; + } else { + if (opt[cur+1].price <= opt[cur].price) continue; + } DEBUGLOG(7, "search at rPos:%u", cur); //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); - //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur - 1, matches, fullUpdate); /* only test matches of a minimum length */ + //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, matches, fullUpdate); /* only test matches of a minimum length */ if (!nb_matches) continue; if ( ((size_t)matches[nb_matches-1].len > sufficient_len) -- cgit v0.12 From 82c1aed41927d0d3400ab91f975c2e163ecdcc1d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 00:59:05 -0700 Subject: improved level 11 speed --- lib/lz4opt.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index edcfc10..c754865 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -324,8 +324,10 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "search at rPos:%u", cur); //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); - nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); - //nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, matches, fullUpdate); /* only test matches of a minimum length */ + if (fullUpdate) + nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + else + nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, matches, fullUpdate); /* only test matches of a minimum length; slightly faster, but misses a few bytes */ if (!nb_matches) continue; if ( ((size_t)matches[nb_matches-1].len > sufficient_len) -- cgit v0.12 From 81667a1e966469de0f307bfda141ccb6e89a3115 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 01:18:12 -0700 Subject: removed code and reference to binary tree match finder reduced size of LZ4HC state --- lib/lz4hc.c | 10 +---- lib/lz4hc.h | 4 +- lib/lz4opt.h | 124 +---------------------------------------------------------- 3 files changed, 6 insertions(+), 132 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index a7238c7..c1f8da6 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -761,10 +761,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int } LZ4HC_init (ctxPtr, (const BYTE*)dictionary); ctxPtr->end = (const BYTE*)dictionary + dictSize; - if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) - LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS); - else - if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); + if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); return dictSize; } @@ -773,10 +770,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) { - if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) - LZ4HC_updateBinTree(ctxPtr, ctxPtr->end - MFLIMIT, ctxPtr->end - LASTLITERALS); - else - if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ + if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ ctxPtr->lowLimit = ctxPtr->dictLimit; diff --git a/lib/lz4hc.h b/lib/lz4hc.h index a9cefbb..191ab0c 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -129,7 +129,7 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in * They are exposed to allow static allocation of `LZ4_streamHC_t`. * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4 **************************************/ -#define LZ4HC_DICTIONARY_LOGSIZE 17 /* due to btree, hc would only need 16 */ +#define LZ4HC_DICTIONARY_LOGSIZE 16 #define LZ4HC_MAXD (1<= MINMATCH */ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) { - int price = 2 + 1; /* 16-bit offset + token */ + int price = 1 + 2 ; /* token + 16-bit offset */ price += LZ4HC_literalsPrice(litlen); @@ -74,126 +74,8 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) /*-************************************* -* Binary Tree search +* Match finder ***************************************/ -LZ4_FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches ( - LZ4HC_CCtx_internal* ctx, - const BYTE* const ip, - const BYTE* const iHighLimit, - size_t best_mlen, - LZ4HC_match_t* matches, - int* matchNum) -{ - U16* const chainTable = ctx->chainTable; - U32* const HashTable = ctx->hashTable; - const BYTE* const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const U32 current = (U32)(ip - base); - const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1); - const BYTE* const dictBase = ctx->dictBase; - const BYTE* match; - int nbAttempts = ctx->searchNum; - int mnum = 0; - U16 *ptr0, *ptr1, delta0, delta1; - U32 matchIndex; - size_t matchLength = 0; - U32* HashPos; - - if (ip + MINMATCH > iHighLimit) return 1; - - /* HC4 match finder */ - HashPos = &HashTable[LZ4HC_hashPtr(ip)]; - matchIndex = *HashPos; - *HashPos = current; - - ptr0 = &DELTANEXTMAXD(current*2+1); - ptr1 = &DELTANEXTMAXD(current*2); - delta0 = delta1 = (U16)(current - matchIndex); - - while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) { - nbAttempts--; - if (matchIndex >= dictLimit) { - match = base + matchIndex; - matchLength = LZ4_count(ip, match, iHighLimit); - } else { - const BYTE* vLimit = ip + (dictLimit - matchIndex); - match = dictBase + matchIndex; - if (vLimit > iHighLimit) vLimit = iHighLimit; - matchLength = LZ4_count(ip, match, vLimit); - if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) - matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > best_mlen) { - best_mlen = matchLength; - if (matches) { - if (matchIndex >= dictLimit) - matches[mnum].off = (int)(ip - match); - else - matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */ - matches[mnum].len = (int)matchLength; - mnum++; - } - if (best_mlen > LZ4_OPT_NUM) break; - } - - if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ - - DEBUGLOG(6, "ip :%016llX", (U64)ip); - DEBUGLOG(6, "match:%016llX", (U64)match); - if (*(ip+matchLength) < *(match+matchLength)) { - *ptr0 = delta0; - ptr0 = &DELTANEXTMAXD(matchIndex*2); - if (*ptr0 == (U16)-1) break; - delta0 = *ptr0; - delta1 += delta0; - matchIndex -= delta0; - } else { - *ptr1 = delta1; - ptr1 = &DELTANEXTMAXD(matchIndex*2+1); - if (*ptr1 == (U16)-1) break; - delta1 = *ptr1; - delta0 += delta1; - matchIndex -= delta1; - } - } - - *ptr0 = (U16)-1; - *ptr1 = (U16)-1; - if (matchNum) *matchNum = mnum; - /* if (best_mlen > 8) return best_mlen-8; */ - if (!matchNum) return 1; - return 1; -} - - -LZ4_FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit) -{ - const BYTE* const base = ctx->base; - const U32 target = (U32)(ip - base); - U32 idx = ctx->nextToUpdate; - while(idx < target) - idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL); -} - - -/** Tree updater, providing best match */ -LZ4_FORCE_INLINE int LZ4HC_BinTree_GetAllMatches ( - LZ4HC_CCtx_internal* ctx, - const BYTE* const ip, const BYTE* const iHighLimit, - size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate) -{ - int mnum = 0; - if (ip < ctx->base + ctx->nextToUpdate) return 0; /* skipped area */ - if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit); - best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum); - ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen); - return mnum; -} - LZ4_FORCE_INLINE int LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* Index table will be updated */ @@ -257,7 +139,6 @@ static int LZ4HC_compress_optimal ( int best_mlen, best_off; int cur, last_match_pos = 0; - //int const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); int const nb_matches_initial = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); if (!nb_matches_initial) { ip++; continue; } @@ -323,7 +204,6 @@ static int LZ4HC_compress_optimal ( } DEBUGLOG(7, "search at rPos:%u", cur); - //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); if (fullUpdate) nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); else -- cgit v0.12 From 320e1d51ac6fd456107c18ed9d6b2c71d7ffa49f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 01:20:30 -0700 Subject: removed useless parameter from hash chain matchfinder used to be present for compatibility with binary tree matchfinder --- lib/lz4opt.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 841e00c..6560ec5 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -91,7 +91,7 @@ int LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* Index table will LZ4_FORCE_INLINE int LZ4HC_HashChain_GetAllMatches ( LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit, - size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate) + size_t best_mlen, LZ4HC_match_t* matches) { const BYTE* matchPtr = NULL; int matchLength = LZ4HC_FindLongerMatch(ctx, ip, iHighLimit, (int)best_mlen, &matchPtr, ctx->searchNum); @@ -99,7 +99,6 @@ LZ4_FORCE_INLINE int LZ4HC_HashChain_GetAllMatches ( assert(matches != NULL); matches[0].len = matchLength; matches[0].off = (int)(ip-matchPtr); - (void)fullUpdate; return 1; } @@ -139,7 +138,7 @@ static int LZ4HC_compress_optimal ( int best_mlen, best_off; int cur, last_match_pos = 0; - int const nb_matches_initial = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); + int const nb_matches_initial = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches); if (!nb_matches_initial) { ip++; continue; } if ((size_t)matches[nb_matches_initial-1].len > sufficient_len) { @@ -205,9 +204,9 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "search at rPos:%u", cur); if (fullUpdate) - nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches); else - nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, matches, fullUpdate); /* only test matches of a minimum length; slightly faster, but misses a few bytes */ + nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, matches); /* only test matches of a minimum length; slightly faster, but misses a few bytes */ if (!nb_matches) continue; if ( ((size_t)matches[nb_matches-1].len > sufficient_len) -- cgit v0.12 From 3b222d2d963fe6144498a3cb306943566ddb6922 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 01:37:43 -0700 Subject: removes matches[] table saves stack space clearer match finder interface (no more table to fill) --- lib/lz4opt.h | 140 ++++++++++++++++++++++++++++------------------------------- 1 file changed, 67 insertions(+), 73 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 6560ec5..a1627f1 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -88,18 +88,18 @@ int LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* Index table will return LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, longest, matchpos, &uselessPtr, maxNbAttempts); } -LZ4_FORCE_INLINE int LZ4HC_HashChain_GetAllMatches ( - LZ4HC_CCtx_internal* ctx, +LZ4_FORCE_INLINE +LZ4HC_match_t LZ4HC_HashChain_GetAllMatches (LZ4HC_CCtx_internal* const ctx, const BYTE* const ip, const BYTE* const iHighLimit, - size_t best_mlen, LZ4HC_match_t* matches) + size_t best_mlen) { + LZ4HC_match_t match = {0 , 0}; const BYTE* matchPtr = NULL; int matchLength = LZ4HC_FindLongerMatch(ctx, ip, iHighLimit, (int)best_mlen, &matchPtr, ctx->searchNum); - if ((size_t)matchLength <= best_mlen) return 0; - assert(matches != NULL); - matches[0].len = matchLength; - matches[0].off = (int)(ip-matchPtr); - return 1; + if ((size_t)matchLength <= best_mlen) return match; + match.len = matchLength; + match.off = (int)(ip-matchPtr); + return match; } @@ -115,7 +115,6 @@ static int LZ4HC_compress_optimal ( ) { LZ4HC_optimal_t opt[LZ4_OPT_NUM + 3]; /* this uses a bit too much stack memory to my taste ... */ - LZ4HC_match_t matches[LZ4_OPT_NUM + 1]; const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; @@ -138,13 +137,13 @@ static int LZ4HC_compress_optimal ( int best_mlen, best_off; int cur, last_match_pos = 0; - int const nb_matches_initial = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches); - if (!nb_matches_initial) { ip++; continue; } + LZ4HC_match_t const firstMatch = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1); + if (firstMatch.len==0) { ip++; continue; } - if ((size_t)matches[nb_matches_initial-1].len > sufficient_len) { + if ((size_t)firstMatch.len > sufficient_len) { /* good enough solution : immediate encoding */ - int const firstML = matches[nb_matches_initial-1].len; - const BYTE* const matchPos = ip - matches[nb_matches_initial-1].off; + int const firstML = firstMatch.len; + const BYTE* const matchPos = ip - firstMatch.off; if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ return 0; /* error */ continue; @@ -162,22 +161,20 @@ static int LZ4HC_compress_optimal ( rPos, cost, opt[rPos].litlen); } } /* set prices using matches found for rPos = 0 */ - { int matchNb; - for (matchNb = 0; matchNb < nb_matches_initial; matchNb++) { - int mlen = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; - int const matchML = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ - int const offset = matches[matchNb].off; - assert(matchML < LZ4_OPT_NUM); - for ( ; mlen <= matchML ; mlen++) { - int const cost = LZ4HC_sequencePrice(llen, mlen); - opt[mlen].mlen = mlen; - opt[mlen].off = offset; - opt[mlen].litlen = llen; - opt[mlen].price = cost; - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", - mlen, cost, mlen); - } } } - last_match_pos = matches[nb_matches_initial-1].len; + { int mlen = MINMATCH; + int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + assert(matchML < LZ4_OPT_NUM); + for ( ; mlen <= matchML ; mlen++) { + int const cost = LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } } + last_match_pos = firstMatch.len; { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ @@ -191,7 +188,7 @@ static int LZ4HC_compress_optimal ( /* check further positions */ for (cur = 1; cur < last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; - int nb_matches; + LZ4HC_match_t newMatch; if (curPtr >= mflimit) break; DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", @@ -204,16 +201,16 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "search at rPos:%u", cur); if (fullUpdate) - nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches); + newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1); else - nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, matches); /* only test matches of a minimum length; slightly faster, but misses a few bytes */ - if (!nb_matches) continue; + newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur); /* only test matches of a minimum length; slightly faster, but misses a few bytes */ + if (!newMatch.len) continue; - if ( ((size_t)matches[nb_matches-1].len > sufficient_len) - || (matches[nb_matches-1].len + cur >= LZ4_OPT_NUM) ) { + if ( ((size_t)newMatch.len > sufficient_len) + || (newMatch.len + cur >= LZ4_OPT_NUM) ) { /* immediate encoding */ - best_mlen = matches[nb_matches-1].len; - best_off = matches[nb_matches-1].off; + best_mlen = newMatch.len; + best_off = newMatch.off; last_match_pos = cur + 1; goto encode; } @@ -234,41 +231,38 @@ static int LZ4HC_compress_optimal ( } } } /* set prices using matches at position = cur */ - { int matchNb; - assert(cur + matches[nb_matches-1].len < LZ4_OPT_NUM); - for (matchNb = 0; matchNb < nb_matches; matchNb++) { - int const matchML = matches[matchNb].len; - int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; - - for ( ; ml <= matchML ; ml++) { - int const pos = cur + ml; - int const offset = matches[matchNb].off; - int price; - int ll; - DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", - pos, last_match_pos); - if (opt[cur].mlen == 1) { - ll = opt[cur].litlen; - price = ((cur > ll) ? opt[cur - ll].price : 0) - + LZ4HC_sequencePrice(ll, ml); - } else { - ll = 0; - price = opt[cur].price + LZ4HC_sequencePrice(0, ml); - } - - if (pos > last_match_pos+3 || price <= opt[pos].price) { - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", - pos, price, ml); - assert(pos < LZ4_OPT_NUM); - if ( (matchNb == nb_matches-1) /* last match */ - && (ml == matchML) /* last post of last match */ - && (last_match_pos < pos) ) - last_match_pos = pos; - opt[pos].mlen = ml; - opt[pos].off = offset; - opt[pos].litlen = ll; - opt[pos].price = price; - } } } } + { int const matchML = newMatch.len; + int ml = MINMATCH; + + assert(cur + newMatch.len < LZ4_OPT_NUM); + for ( ; ml <= matchML ; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } + + if (pos > last_match_pos+3 || price <= opt[pos].price) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ( (ml == matchML) /* last post of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } } } /* complete following positions with literals */ { int addLit; for (addLit = 1; addLit <= 3; addLit ++) { -- cgit v0.12 From e2eca62046e500a95ab34e913e939aa68acb2cd4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 02:01:20 -0700 Subject: LZ4_compress_HC_continue_destSize() now compatible with optimal parser levels 11+ --- lib/lz4hc.c | 11 ++++++----- lib/lz4hc.h | 4 ++-- lib/lz4opt.h | 10 +++++----- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index c1f8da6..643fbb2 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -642,8 +642,11 @@ _dest_overflow: static int LZ4HC_getSearchNum(int compressionLevel) { + assert(compressionLevel >= 1); + assert(compressionLevel <= LZ4HC_CLEVEL_MAX); switch (compressionLevel) { - default: return 0; /* unused */ + default: return 1 << (compressionLevel-1); + case 10: return 1 << 12; case 11: return 512; case 12: return 1<<13; } @@ -709,8 +712,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in } /* LZ4_compress_HC_destSize() : - * currently, only compatible with Hash Chain implementation, - * hence limit compression level to LZ4HC_CLEVEL_OPT_MIN-1*/ + * only compatible with Hash Chain match finder */ int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) { LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse; @@ -744,6 +746,7 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) { + /* note : 1-10 / 11-12 separation might no longer be necessary since optimal parser uses hash chain too */ int const currentCLevel = LZ4_streamHCPtr->internal_donotuse.compressionLevel; int const minCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? 1 : LZ4HC_CLEVEL_OPT_MIN; int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX; @@ -824,8 +827,6 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) { - LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - if (ctxPtr->compressionLevel >= LZ4HC_CLEVEL_OPT_MIN) LZ4HC_init(ctxPtr, (const BYTE*)src); /* not compatible with btopt implementation */ return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize); } diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 191ab0c..4fbe91e 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -266,8 +266,8 @@ int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, int* srcSizePtr, int targetDstSize); /*! LZ4_setCompressionLevel() : v1.8.0 (experimental) - * It's possible to change compression level after LZ4_resetStreamHC(), between 2 invocations of LZ4_compress_HC_continue*(), - * but that requires to stay in the same mode (aka 1-10 or 11-12). + * It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*(), + * but it requires to stay in the same mode (aka 1-10 or 11-12). * This function ensures this condition. */ void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); diff --git a/lib/lz4opt.h b/lib/lz4opt.h index a1627f1..18bcd31 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -160,7 +160,7 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", rPos, cost, opt[rPos].litlen); } } - /* set prices using matches found for rPos = 0 */ + /* set prices using initial match */ { int mlen = MINMATCH; int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ int const offset = firstMatch.off; @@ -215,7 +215,7 @@ static int LZ4HC_compress_optimal ( goto encode; } - /* before first match : set price with literals at beginning */ + /* before match : set price with literals at beginning */ { int const baseLitlen = opt[cur].litlen; int litlen; for (litlen = 1; litlen < MINMATCH; litlen++) { @@ -230,10 +230,10 @@ static int LZ4HC_compress_optimal ( pos, price, opt[pos].litlen); } } } - /* set prices using matches at position = cur */ + /* set prices using match at position = cur */ { int const matchML = newMatch.len; int ml = MINMATCH; - + assert(cur + newMatch.len < LZ4_OPT_NUM); for ( ; ml <= matchML ; ml++) { int const pos = cur + ml; @@ -255,7 +255,7 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", pos, price, ml); assert(pos < LZ4_OPT_NUM); - if ( (ml == matchML) /* last post of last match */ + if ( (ml == matchML) /* last pos of last match */ && (last_match_pos < pos) ) last_match_pos = pos; opt[pos].mlen = ml; -- cgit v0.12 From c9bbad53ffda4a4f4595e3a2b6e3994710ee2324 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 10:30:52 -0700 Subject: removed ctx->searchNum nbSearches now transmitted directly as function parameter easier to track and debug --- lib/lz4hc.c | 20 +++----------------- lib/lz4hc.h | 4 +--- lib/lz4opt.h | 14 ++++++++------ 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 643fbb2..a7577c5 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -640,17 +640,6 @@ _dest_overflow: return 0; } -static int LZ4HC_getSearchNum(int compressionLevel) -{ - assert(compressionLevel >= 1); - assert(compressionLevel <= LZ4HC_CLEVEL_MAX); - switch (compressionLevel) { - default: return 1 << (compressionLevel-1); - case 10: return 1 << 12; - case 11: return 512; - case 12: return 1<<13; - } -} static int LZ4HC_compress_generic ( LZ4HC_CCtx_internal* const ctx, @@ -667,16 +656,14 @@ static int LZ4HC_compress_generic ( if (limit == limitedDestSize) cLevel = 10; switch (cLevel) { case 10: - return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << 12, limit); + return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1<<12, limit); case 11: - ctx->searchNum = LZ4HC_getSearchNum(cLevel); - return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 128, 0); + return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 512, 128, 0); default: cLevel = 12; /* fall-through */ case 12: - ctx->searchNum = LZ4HC_getSearchNum(cLevel); - return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, LZ4_OPT_NUM, 1); + return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 1<<13, LZ4_OPT_NUM, 1); } } return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << (cLevel-1), limit); /* levels 1-9 */ @@ -741,7 +728,6 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) LZ4_streamHCPtr->internal_donotuse.base = NULL; if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; /* cap compression level */ LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel; - LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel); } void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 4fbe91e..13a0179 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -152,8 +152,7 @@ typedef struct uint32_t dictLimit; /* below that point, need extDict */ uint32_t lowLimit; /* below that point, no more dict */ uint32_t nextToUpdate; /* index from which to continue dictionary update */ - uint32_t searchNum; /* only for optimal parser */ - uint32_t compressionLevel; + int compressionLevel; } LZ4HC_CCtx_internal; #else @@ -169,7 +168,6 @@ typedef struct unsigned int dictLimit; /* below that point, need extDict */ unsigned int lowLimit; /* below that point, no more dict */ unsigned int nextToUpdate; /* index from which to continue dictionary update */ - unsigned int searchNum; /* only for optimal parser */ int compressionLevel; } LZ4HC_CCtx_internal; diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 18bcd31..7aec7dd 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -91,11 +91,11 @@ int LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* Index table will LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_HashChain_GetAllMatches (LZ4HC_CCtx_internal* const ctx, const BYTE* const ip, const BYTE* const iHighLimit, - size_t best_mlen) + size_t best_mlen, int nbSearches) { LZ4HC_match_t match = {0 , 0}; const BYTE* matchPtr = NULL; - int matchLength = LZ4HC_FindLongerMatch(ctx, ip, iHighLimit, (int)best_mlen, &matchPtr, ctx->searchNum); + int matchLength = LZ4HC_FindLongerMatch(ctx, ip, iHighLimit, (int)best_mlen, &matchPtr, nbSearches); if ((size_t)matchLength <= best_mlen) return match; match.len = matchLength; match.off = (int)(ip-matchPtr); @@ -110,8 +110,9 @@ static int LZ4HC_compress_optimal ( int inputSize, int dstCapacity, limitedOutput_directive limit, + int const nbSearches, size_t sufficient_len, - const int fullUpdate + int const fullUpdate ) { LZ4HC_optimal_t opt[LZ4_OPT_NUM + 3]; /* this uses a bit too much stack memory to my taste ... */ @@ -137,7 +138,7 @@ static int LZ4HC_compress_optimal ( int best_mlen, best_off; int cur, last_match_pos = 0; - LZ4HC_match_t const firstMatch = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1); + LZ4HC_match_t const firstMatch = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, nbSearches); if (firstMatch.len==0) { ip++; continue; } if ((size_t)firstMatch.len > sufficient_len) { @@ -201,9 +202,10 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "search at rPos:%u", cur); if (fullUpdate) - newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1); + newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches); else - newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur); /* only test matches of a minimum length; slightly faster, but misses a few bytes */ + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches); if (!newMatch.len) continue; if ( ((size_t)newMatch.len > sufficient_len) -- cgit v0.12 From a1f4a0d98361c6ff95833dedba07a9d5a15cfa5e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 10:48:55 -0700 Subject: moved ctx->end handling from parsers responsibility better handled one layer above (LZ4HC_compress_generic()) --- lib/lz4hc.c | 4 ++-- lib/lz4opt.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index a7577c5..cea83f2 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -457,7 +457,6 @@ static int LZ4HC_compress_hashChain ( if (limit == limitedDestSize && maxOutputSize < 1) return 0; /* Impossible to store anything */ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ - ctx->end += inputSize; if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */ if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ @@ -651,7 +650,8 @@ static int LZ4HC_compress_generic ( limitedOutput_directive limit ) { - if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe to reconsider */ + ctx->end += *srcSizePtr; + if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ if (cLevel > 9) { if (limit == limitedDestSize) cLevel = 10; switch (cLevel) { diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 7aec7dd..df8ecd6 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -128,7 +128,6 @@ static int LZ4HC_compress_optimal ( /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal"); if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; - ctx->end += inputSize; ip++; /* Main Loop */ -- cgit v0.12 From 1025546347d75ec94f584294a36132527a45d46c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 11:28:28 -0700 Subject: unified HC levels LZ4_setCompressionLevel() can be users accross the whole range of HC levels No more transition issue between Optimal and HC modes --- lib/lz4hc.c | 12 ++++-------- lib/lz4hc.h | 3 +-- tests/fuzzer.c | 12 ++++++------ 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index cea83f2..042e034 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -49,6 +49,7 @@ /*=== Dependency ===*/ +#define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" @@ -726,18 +727,13 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) { LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ LZ4_streamHCPtr->internal_donotuse.base = NULL; - if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; /* cap compression level */ - LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel; + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); } void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) { - /* note : 1-10 / 11-12 separation might no longer be necessary since optimal parser uses hash chain too */ - int const currentCLevel = LZ4_streamHCPtr->internal_donotuse.compressionLevel; - int const minCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? 1 : LZ4HC_CLEVEL_OPT_MIN; - int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX; - compressionLevel = MIN(compressionLevel, minCLevel); - compressionLevel = MAX(compressionLevel, maxCLevel); + if (compressionLevel < 1) compressionLevel = 1; + if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel; } diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 13a0179..04153e6 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -265,8 +265,7 @@ int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, /*! LZ4_setCompressionLevel() : v1.8.0 (experimental) * It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*(), - * but it requires to stay in the same mode (aka 1-10 or 11-12). - * This function ensures this condition. + * though it requires to stay in the same mode (aka fast or HC). */ void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 2e22912..ddd293c 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -376,9 +376,9 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe() failed on data compressed by LZ4_compressHC_destSize"); FUZ_CHECKTEST(ret!=srcSize, "LZ4_decompress_safe() failed : did not fully decompressed data"); FUZ_CHECKTEST(decodedBuffer[srcSize] != canary, "LZ4_decompress_safe() overwrite dst buffer !"); - { U32 const crcDec = XXH32(decodedBuffer, srcSize, 0); - FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data"); } - + { U32 const crcDec = XXH32(decodedBuffer, srcSize, 0); + FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data"); + } DISPLAYLEVEL(5, " OK \n"); } else @@ -460,8 +460,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c // Test decoding with output size being 10 bytes too short => must fail FUZ_DISPLAYTEST; - if (blockSize>10) - { + if (blockSize>10) { decodedBuffer[blockSize-10] = 0; ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-10); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being 10 bytes too short"); @@ -632,6 +631,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c if (dict < (char*)CNBuffer) dict = (char*)CNBuffer; LZ4_resetStreamHC (&LZ4dictHC, compressionLevel); LZ4_loadDictHC(&LZ4dictHC, dict, dictSize); + LZ4_setCompressionLevel(&LZ4dictHC, compressionLevel-1); blockContinueCompressedSize = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, (int)compressedBufferSize); FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue failed"); @@ -657,7 +657,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data"); /* Compress HC continue destSize */ - FUZ_DISPLAYTEST; + FUZ_DISPLAYTEST; { int const availableSpace = (FUZ_rand(&randState) % blockSize) + 5; int consumedSize = blockSize; FUZ_DISPLAYTEST; -- cgit v0.12 From 89821ac7a1e9bdeef33c747321b6deec66cbde1e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 3 Nov 2017 11:49:56 -0700 Subject: minor comment edit --- lib/lz4frame.c | 8 ++++---- lib/lz4hc.h | 41 +++++++++++++++++++---------------------- lib/lz4opt.h | 13 ++++++------- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/lib/lz4frame.c b/lib/lz4frame.c index 3adbdd9..ebd1089 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -322,7 +322,7 @@ size_t LZ4F_compressFrame_usingCDict(void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* preferencesPtr) { LZ4F_cctx_t cctxI; - LZ4_stream_t lz4ctx; + LZ4_stream_t lz4ctx; /* pretty large on stack */ LZ4F_preferences_t prefs; LZ4F_compressOptions_t options; BYTE* const dstStart = (BYTE*) dstBuffer; @@ -504,15 +504,15 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr, cctxPtr->prefs = *preferencesPtr; /* Ctx Management */ - { U32 const tableID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2; /* 0:nothing ; 1:LZ4 table ; 2:HC tables */ - if (cctxPtr->lz4CtxLevel < tableID) { + { U32 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2; /* 0:nothing ; 1:LZ4 table ; 2:HC tables */ + if (cctxPtr->lz4CtxLevel < ctxTypeID) { FREEMEM(cctxPtr->lz4CtxPtr); if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) cctxPtr->lz4CtxPtr = (void*)LZ4_createStream(); else cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC(); if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed); - cctxPtr->lz4CtxLevel = tableID; + cctxPtr->lz4CtxLevel = ctxTypeID; } } /* Buffer Management */ diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 04153e6..d791062 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -39,7 +39,7 @@ extern "C" { #endif /* --- Dependency --- */ -/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ +/* note : lz4hc requires lz4.h/lz4.c for compilation */ #include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ @@ -54,12 +54,12 @@ extern "C" { * Block Compression **************************************/ /*! LZ4_compress_HC() : - * Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm. + * Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm. * `dst` must be already allocated. - * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") - * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") - * `compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_CLEVEL_MAX will work. - * Values >LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. + * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") + * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") + * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. + * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. * @return : the number of bytes written into 'dst' * or 0 if compression fails. */ @@ -72,12 +72,12 @@ LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dst /*! LZ4_compress_HC_extStateHC() : - * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. + * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. * `state` size is provided by LZ4_sizeofStateHC(). - * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() will do properly). + * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). */ -LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); LZ4LIB_API int LZ4_sizeofStateHC(void); +LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); /*-************************************ @@ -87,10 +87,10 @@ LZ4LIB_API int LZ4_sizeofStateHC(void); typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ /*! LZ4_createStreamHC() and LZ4_freeStreamHC() : - * These functions create and release memory for LZ4 HC streaming state. - * Newly created states are automatically initialized. - * Existing states can be re-used several times, using LZ4_resetStreamHC(). - * These methods are API and ABI stable, they can be used in combination with a DLL. + * These functions create and release memory for LZ4 HC streaming state. + * Newly created states are automatically initialized. + * Existing states can be re-used several times, using LZ4_resetStreamHC(). + * These methods are API and ABI stable, they can be used in combination with a DLL. */ LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); @@ -123,12 +123,12 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in */ - /*-************************************* +/*-************************************************************** * PRIVATE DEFINITIONS : * Do not use these definitions. * They are exposed to allow static allocation of `LZ4_streamHC_t`. * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4 - **************************************/ + ****************************************************************/ #define LZ4HC_DICTIONARY_LOGSIZE 16 #define LZ4HC_MAXD (1< Date: Fri, 3 Nov 2017 12:33:55 -0700 Subject: fixed minor static analyzer warning dead assignment --- lib/lz4hc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 042e034..0cda77c 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -661,7 +661,6 @@ static int LZ4HC_compress_generic ( case 11: return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 512, 128, 0); default: - cLevel = 12; /* fall-through */ case 12: return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 1<<13, LZ4_OPT_NUM, 1); -- cgit v0.12 From a004c1fbee4e5a8793f9dfc1deeafea1d59be486 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 7 Nov 2017 10:53:29 -0800 Subject: fixed LZ4HC_countPattern() - works with byte values other than `0` - works for any repetitive pattern of length 1, 2 or 4 (but not 3!) - works for little and big endian systems - preserve speed of previous implementation --- lib/lz4hc.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 0cda77c..fbcec98 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -130,20 +130,34 @@ static int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, return back; } -static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, reg_t pattern) +/* LZ4HC_countPattern() : + * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ +static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) { const BYTE* const iStart = ip; + reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32; - while (likely(ip>= 8; + } + } else { /* big endian */ + U32 bitOffset = (sizeof(pattern)*8) - 8; + while (ip < iEnd) { + BYTE const byte = (BYTE)(pattern >> bitOffset); + if (*ip != byte) break; + ip ++; bitOffset -= 8; + } + } + return (unsigned)(ip - iStart); } -- cgit v0.12 From 7130bfe5733347d8c84b8f6b8fb34c1397d4a173 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 7 Nov 2017 11:05:48 -0800 Subject: improved LZ4HC_reverseCountPattern() : works for any repetitive pattern of length 1, 2 or 4 (but not 3!) works for any endianess --- lib/lz4hc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index fbcec98..1880d53 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -161,17 +161,21 @@ static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 c return (unsigned)(ip - iStart); } +/* LZ4HC_reverseCountPattern() : + * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) + * read using natural platform endianess */ static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) { const BYTE* const iStart = ip; - while (likely(ip>=iLow+4)) { + while (likely(ip >= iLow+4)) { if (LZ4_read32(ip-4) != pattern) break; ip -= 4; } while (likely(ip>iLow)) { - if (ip[-1] != (BYTE)pattern) break; - ip--; + const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */ + if (ip[-1] != *bytePtr) break; + ip--; bytePtr--; } return (unsigned)(iStart - ip); -- cgit v0.12 From 5512a5f1a932d4d906f3bbecf1932863bcac7b2b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 7 Nov 2017 11:22:57 -0800 Subject: removed useless `(1 && ...)` condition as reported by @terrelln --- lib/lz4hc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 1880d53..5882109 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -119,8 +119,9 @@ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) /** LZ4HC_countBack() : * @return : negative value, nb of common bytes before ip/match */ -static int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, - const BYTE* const iMin, const BYTE* const mMin) +LZ4_FORCE_INLINE +int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) { int back=0; while ( (ip+back > iMin) @@ -264,7 +265,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); matchIndex -= nextOffset; - if (1 && (nextOffset==1)) { + if (nextOffset==1) { /* may be a repeated pattern */ if (repeat == rep_untested) { if ((pattern & 0xFFFF) == (pattern >> 16)) { /* is it enough ? */ -- cgit v0.12 From c49f66f2adf27af499cf7efacbad656bf30d671a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 7 Nov 2017 11:29:28 -0800 Subject: ensure `pattern` is a 1-byte repetition --- lib/lz4hc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 5882109..93017f1 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -268,7 +268,8 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( if (nextOffset==1) { /* may be a repeated pattern */ if (repeat == rep_untested) { - if ((pattern & 0xFFFF) == (pattern >> 16)) { /* is it enough ? */ + if ( ((pattern & 0xFFFF) == (pattern >> 16)) + & ((pattern & 0xFF) == (pattern >> 24)) ) { repeat = rep_confirmed; srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4; } else { -- cgit v0.12 From 71fd08c17daa3deee0c2c3c64c8f009729055bad Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 7 Nov 2017 11:33:40 -0800 Subject: removed legacy version of LZ4HC_InsertAndFindBestMatch() --- lib/lz4hc.c | 54 +----------------------------------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 93017f1..3498391 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -275,7 +275,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( } else { repeat = rep_not; } } - if ( (repeat == rep_confirmed) /* proven repeated pattern (1-2-4) */ + if ( (repeat == rep_confirmed) && (matchIndex >= dictLimit) ) { /* same segment only */ const BYTE* const matchPtr = base + matchIndex; if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ @@ -296,7 +296,6 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( return longest; } -#if 1 LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, @@ -307,57 +306,6 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts); } -#else - -LZ4_FORCE_INLINE -int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - const BYTE** matchpos, - const int maxNbAttempts) -{ - U16* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - const BYTE* const dictBase = hc4->dictBase; - const U32 dictLimit = hc4->dictLimit; - const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); - U32 matchIndex; - int nbAttempts = maxNbAttempts; - size_t ml = 0; - - /* HC4 match finder */ - LZ4HC_Insert(hc4, ip); - matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - - while ((matchIndex>=lowLimit) && (nbAttempts)) { - nbAttempts--; - if (matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - if ( (*(match+ml) == *(ip+ml)) /* can be longer */ - && (LZ4_read32(match) == LZ4_read32(ip)) ) - { - size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = match; } - } - } else { - const BYTE* const match = dictBase + matchIndex; - if (LZ4_read32(match) == LZ4_read32(ip)) { - size_t mlt; - const BYTE* vLimit = ip + (dictLimit - matchIndex); - if (vLimit > iLimit) vLimit = iLimit; - mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; - if ((ip+mlt == vLimit) && (vLimit < iLimit)) - mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ - } - } - matchIndex -= DELTANEXTU16(chainTable, matchIndex); - } - - return (int)ml; -} -#endif - typedef enum { -- cgit v0.12 From 897f5e9834ed4e8b4e34380b12a6b9e7b911af90 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 7 Nov 2017 17:37:31 -0800 Subject: removed the ip++ at the beginning of block The first byte used to be skipped to avoid a infinite self-comparison. This is no longer necessary, since init() ensures that index starts at 64K. The first byte is also useless to search when each block is independent, but it's no longer the case when blocks are linked. Removing the first-byte-skip saves about 10 bytes / MB on files compressed with -BD4 (linked blocks 64Kb), which feels correct as each MB has 16 blocks of 64KB. --- lib/lz4hc.c | 2 -- lib/lz4opt.h | 1 - 2 files changed, 3 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 3498391..5e2dd2a 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -429,8 +429,6 @@ static int LZ4HC_compress_hashChain ( if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */ if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ - ip++; - /* Main Loop */ while (ip < mflimit) { ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); diff --git a/lib/lz4opt.h b/lib/lz4opt.h index dd22b7a..03ab825 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -127,7 +127,6 @@ static int LZ4HC_compress_optimal ( /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal"); if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; - ip++; /* Main Loop */ assert(ip - anchor < LZ4_MAX_INPUT_SIZE); -- cgit v0.12 From b07d36245a52048df233a90c382e523d4560fc64 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 7 Nov 2017 17:58:59 -0800 Subject: fixed LZ4HC_reverseCountPattern() for multi-bytes patterns (which is not useful for the time being) --- lib/lz4hc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 5e2dd2a..02eafaf 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -173,12 +173,11 @@ static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow if (LZ4_read32(ip-4) != pattern) break; ip -= 4; } - while (likely(ip>iLow)) { - const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */ - if (ip[-1] != *bytePtr) break; - ip--; bytePtr--; - } - + { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */ + while (likely(ip>iLow)) { + if (ip[-1] != *bytePtr) break; + ip--; bytePtr--; + } } return (unsigned)(iStart - ip); } -- cgit v0.12 From fa03a9d3d983c0bc4a6ea843493d85d434188090 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 8 Nov 2017 08:42:59 -0800 Subject: added code comments --- lib/lz4hc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 02eafaf..b8d8a78 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -223,7 +223,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( if (LZ4_read32(matchPtr) == pattern) { int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); #if 0 - /* more generic but unfortunately slower ... */ + /* more generic but unfortunately slower on clang */ int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr); #else int back = 0; @@ -297,11 +297,14 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - const BYTE** matchpos, - const int maxNbAttempts) + const BYTE* const ip, const BYTE* const iLimit, + const BYTE** matchpos, + const int maxNbAttempts) { const BYTE* uselessPtr = ip; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() will not be allowed to search past ip */ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts); } @@ -430,7 +433,7 @@ static int LZ4HC_compress_hashChain ( /* Main Loop */ while (ip < mflimit) { - ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); + ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts); if (ml Date: Wed, 8 Nov 2017 17:11:51 -0800 Subject: lz4opt: simplified match finder invocation to LZ4HC_FindLongerMatch() --- lib/lz4hc.c | 2 +- lib/lz4opt.h | 31 +++++++++++-------------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index b8d8a78..60690a0 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -304,7 +304,7 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl const BYTE* uselessPtr = ip; /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, - * so LZ4HC_InsertAndGetWiderMatch() will not be allowed to search past ip */ + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts); } diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 03ab825..6db8586 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -70,32 +70,23 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) /*-************************************* * Match finder ***************************************/ - -LZ4_FORCE_INLINE -int LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iHighLimit, - int longest, - const BYTE** matchpos, - const int maxNbAttempts) -{ - const BYTE* uselessPtr = ip; - return LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, longest, matchpos, &uselessPtr, maxNbAttempts); -} - typedef struct { int off; int len; } LZ4HC_match_t; LZ4_FORCE_INLINE -LZ4HC_match_t LZ4HC_HashChain_GetAllMatches (LZ4HC_CCtx_internal* const ctx, - const BYTE* const ip, const BYTE* const iHighLimit, - size_t best_mlen, int nbSearches) +LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, + const BYTE* ip, const BYTE* const iHighLimit, + int minLen, int nbSearches) { LZ4HC_match_t match = { 0 , 0 }; const BYTE* matchPtr = NULL; - int matchLength = LZ4HC_FindLongerMatch(ctx, ip, iHighLimit, (int)best_mlen, &matchPtr, nbSearches); - if ((size_t)matchLength <= best_mlen) return match; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches); + if (matchLength <= minLen) return match; match.len = matchLength; match.off = (int)(ip-matchPtr); return match; @@ -135,7 +126,7 @@ static int LZ4HC_compress_optimal ( int best_mlen, best_off; int cur, last_match_pos = 0; - LZ4HC_match_t const firstMatch = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, nbSearches); + LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches); if (firstMatch.len==0) { ip++; continue; } if ((size_t)firstMatch.len > sufficient_len) { @@ -199,10 +190,10 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "search at rPos:%u", cur); if (fullUpdate) - newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches); + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches); else /* only test matches of minimum length; slightly faster, but misses a few bytes */ - newMatch = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches); + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches); if (!newMatch.len) continue; if ( ((size_t)newMatch.len > sufficient_len) -- cgit v0.12 From 63f6039fb37bd561c776a1fc4eb04bd9ea46dc83 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 8 Nov 2017 17:47:24 -0800 Subject: added constant TRAILING_LITERALS which is more explicit than its value `3`. reported by @terrelln --- lib/lz4opt.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 6db8586..7f74df9 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -105,7 +105,8 @@ static int LZ4HC_compress_optimal ( int const fullUpdate ) { - LZ4HC_optimal_t opt[LZ4_OPT_NUM + 3]; /* this uses a bit too much stack memory to my taste ... */ +#define TRAILING_LITERALS 3 + LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* this uses a bit too much stack memory to my taste ... */ const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; @@ -165,7 +166,7 @@ static int LZ4HC_compress_optimal ( } } last_match_pos = firstMatch.len; { int addLit; - for (addLit = 1; addLit <= 3; addLit ++) { + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; @@ -183,7 +184,7 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", cur, opt[cur].price, opt[cur+1].price, cur+1); if (fullUpdate) { - if ((opt[cur+1].price <= opt[cur].price) && (opt[cur+4].price < opt[cur].price+3)) continue; + if ((opt[cur+1].price <= opt[cur].price) && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/)) continue; } else { if (opt[cur+1].price <= opt[cur].price) continue; } @@ -241,7 +242,7 @@ static int LZ4HC_compress_optimal ( price = opt[cur].price + LZ4HC_sequencePrice(0, ml); } - if (pos > last_match_pos+3 || price <= opt[pos].price) { + if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) { DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", pos, price, ml); assert(pos < LZ4_OPT_NUM); @@ -255,7 +256,7 @@ static int LZ4HC_compress_optimal ( } } } /* complete following positions with literals */ { int addLit; - for (addLit = 1; addLit <= 3; addLit ++) { + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; -- cgit v0.12 From dc3ed5b6a7d9cc7d251a1942558e37793d5575b8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 8 Nov 2017 17:56:20 -0800 Subject: added code comments --- lib/lz4opt.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/lz4opt.h b/lib/lz4opt.h index 7f74df9..9917851 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -184,8 +184,13 @@ static int LZ4HC_compress_optimal ( DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", cur, opt[cur].price, opt[cur+1].price, cur+1); if (fullUpdate) { - if ((opt[cur+1].price <= opt[cur].price) && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/)) continue; + /* not useful to search here if next position has same (or lower) cost */ + if ( (opt[cur+1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) + continue; } else { + /* not useful to search here if next position has same (or lower) cost */ if (opt[cur+1].price <= opt[cur].price) continue; } -- cgit v0.12