From dc4270710739296602235caa5cfd065feafd87b7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 3 May 2018 15:38:32 -0700 Subject: created LZ4HC_FindLongestMatch() simplified match finder only searching forward and within current buffer, for easier testing of optimizations. --- lib/lz4hc.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 0859ea6..132673e 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -1083,6 +1083,80 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) } +int +LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, + const BYTE* const ip, + const BYTE* const iHighLimit, + int longest, + const BYTE** matchpos, + const int maxNbAttempts) +{ + U16* const chainTable = hc4->chainTable; + U32* const HashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + const U32 ipIndex = (U32)(ip - base); + const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE; + int nbAttempts = maxNbAttempts; + U32 const pattern = LZ4_read32(ip); + U32 matchIndex; + + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + /* First Match */ + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", + matchIndex, lowestMatchIndex); + + /* find first match */ + while ( (longest <= MINMATCH) + && (matchIndex>=lowestMatchIndex) + && (nbAttempts) ) { + const BYTE* const matchPtr = base + matchIndex; + assert(matchIndex < ipIndex); + assert(matchIndex >= dictLimit); + assert(matchPtr >= lowPrefixPtr); + assert(matchPtr < ip); + assert(longest >= 1); + nbAttempts--; + if (LZ4_read32(matchPtr) == pattern) { + int const mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + if (mlt > longest) { + longest = mlt; + *matchpos = matchPtr; + break; + } } + + { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); + matchIndex -= nextOffset; + } + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ + + assert(longest > MINMATCH); + while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { + const BYTE* const matchPtr = base + matchIndex; + assert(matchIndex < ipIndex); + assert(matchIndex >= dictLimit); + assert(matchPtr >= lowPrefixPtr); + assert(matchPtr < ip); + assert(longest >= 1); + nbAttempts--; + if (LZ4_read16(ip + longest - 1) == LZ4_read16(matchPtr + longest - 1)) { + if (LZ4_read32(matchPtr) == pattern) { + int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + if (mlt > longest) { + longest = mlt; + *matchpos = matchPtr; + } } } + + { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); + matchIndex -= nextOffset; + } + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ + + return longest; +} + + typedef struct { int off; int len; @@ -1100,9 +1174,8 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, - ip, ip, iHighLimit, minLen, &matchPtr, &ip, - nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); + //int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); + int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict; if (matchLength <= minLen) return match; if (favorDecSpeed) { if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ @@ -1112,19 +1185,18 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, return match; } -static int LZ4HC_compress_optimal ( - LZ4HC_CCtx_internal* ctx, - const char* const source, - char* dst, - int* srcSizePtr, - int dstCapacity, - int const nbSearches, - size_t sufficient_len, - const limitedOutput_directive limit, - int const fullUpdate, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed - ) + +static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, + const char* const source, + char* dst, + int* srcSizePtr, + int dstCapacity, + int const nbSearches, + size_t sufficient_len, + const limitedOutput_directive limit, + int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) { #define TRAILING_LITERALS 3 LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ -- cgit v0.12 From 434ace7244b063c418b8d9f5c29330411a1df237 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 3 May 2018 16:31:41 -0700 Subject: implemented search accelerator greatly improves speed compared to non-accelerated, especially for slower files. On my laptop, -b12 : ``` calgary.tar : 4.3 MB/s => 9.0 MB/s enwik7 : 10.2 MB/s => 13.3 MB/s silesia.tar : 4.0 MB/s => 8.7 MB/s ``` Note : this is the simplified version, without handling dictionaries, external buffer, nor pattern analyzer. Current `dev` branch on these samples gives : ``` calgary.tar : 4.2 MB/s enwik7 : 9.7 MB/s silesia.tar : 3.5 MB/s ``` interestingly, it's slower, presumably due to handling of dictionaries. --- lib/lz4hc.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 132673e..6c50db5 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -1099,6 +1099,7 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, int nbAttempts = maxNbAttempts; U32 const pattern = LZ4_read32(ip); U32 matchIndex; + int fromMStart = 0; DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); /* First Match */ @@ -1132,8 +1133,8 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ assert(longest > MINMATCH); - while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { - const BYTE* const matchPtr = base + matchIndex; + while ((matchIndex-fromMStart>=lowestMatchIndex) && (nbAttempts)) { + const BYTE* const matchPtr = base + matchIndex - fromMStart; assert(matchIndex < ipIndex); assert(matchIndex >= dictLimit); assert(matchPtr >= lowPrefixPtr); @@ -1146,6 +1147,21 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, if (mlt > longest) { longest = mlt; *matchpos = matchPtr; + matchIndex -= fromMStart; /* beginning of match */ + if (1 && matchIndex + longest <= ipIndex) { + U32 distanceToNextMatch = 1; + int pos; + for (pos = 0; pos <= longest - MINMATCH; pos++) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + fromMStart = pos; + } + } + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch - fromMStart; + continue; + } } } } { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); -- cgit v0.12 From 9699ba5ddfa8485bb4ad36cc9ec78cbed542f28b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 4 May 2018 19:13:33 -0700 Subject: integrated chain swapper into HC match finder slower than expected Pattern analyzer and Chain Swapper work slower when both activated. Reasons unclear. --- lib/lz4hc.c | 121 ++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 45 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 6c50db5..c58cd9d 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -227,6 +227,7 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE* const dictBase = hc4->dictBase; int const lookBackLength = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; + int matchChainPos = 0; U32 const pattern = LZ4_read32(ip); U32 matchIndex; U32 dictMatchIndex; @@ -241,31 +242,30 @@ LZ4HC_InsertAndGetWiderMatch ( matchIndex, lowestMatchIndex); while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { - DEBUGLOG(7, "remaining attempts : %i", nbAttempts); + int mlt=0; nbAttempts--; assert(matchIndex < ipIndex); if (favorDecSpeed && (ipIndex - matchIndex < 8)) { /* do nothing */ - } else if (matchIndex >= dictLimit) { + } else if (matchIndex >= dictLimit) { /* within current Prefix */ const BYTE* const matchPtr = base + matchIndex; assert(matchPtr >= lowPrefixPtr); assert(matchPtr < ip); assert(longest >= 1); if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { - int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; + mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); mlt -= back; if (mlt > longest) { longest = mlt; - *matchpos = matchPtr+back; - *startpos = ip+back; + *matchpos = matchPtr + back; + *startpos = ip + back; } } } - } else { /* matchIndex < dictLimit */ + } else { /* lowestMatchIndex <= matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; if (LZ4_read32(matchPtr) == pattern) { const BYTE* const dictStart = dictBase + hc4->lowLimit; - int mlt; int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; @@ -280,9 +280,10 @@ LZ4HC_InsertAndGetWiderMatch ( *startpos = ip + back; } } } - { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); - matchIndex -= nextOffset; - if (patternAnalysis && nextOffset==1) { + { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); + if (1 && patternAnalysis && distNextMatch==1 && matchChainPos==0) { + U32 const matchCandidateIdx = matchIndex-1; + //static U64 g_nbPatternAnalysis = 0; g_nbPatternAnalysis++; if (g_nbPatternAnalysis % (((g_nbPatternAnalysis/1000000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis=%llu ", g_nbPatternAnalysis); /* may be a repeated pattern */ if (repeat == rep_untested) { if ( ((pattern & 0xFFFF) == (pattern >> 16)) @@ -293,21 +294,48 @@ LZ4HC_InsertAndGetWiderMatch ( repeat = rep_not; } } if ( (repeat == rep_confirmed) - && (matchIndex >= dictLimit) ) { /* same segment only */ - const BYTE* const matchPtr = base + matchIndex; + && (matchCandidateIdx >= dictLimit) ) { /* same segment only */ + const BYTE* const matchPtr = base + matchCandidateIdx; if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); size_t const currentSegmentLength = backLength + forwardPatternLength; + //static U64 g_nbPatternAnalysis_confirmed = 0; g_nbPatternAnalysis_confirmed++; if (g_nbPatternAnalysis_confirmed%(((g_nbPatternAnalysis_confirmed/100000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis_confirmed=%llu ", g_nbPatternAnalysis_confirmed); if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ - matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ } else { - matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ + matchIndex = matchCandidateIdx - (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ } - } } } } + matchChainPos = 0; + continue; + } } + } } + + if (mlt == longest) { /* better match => select a better chain */ + assert(longest >= MINMATCH); + if (1 && matchIndex + longest <= ipIndex) { + U32 distanceToNextMatch = 1; + int pos; + //static U64 g_nbChainSwapper = 0; g_nbChainSwapper++; if (g_nbChainSwapper%(((g_nbChainSwapper/100000)+1)*100)==0) DEBUGLOG(2, "g_nbChainSwapper=%llu ", g_nbChainSwapper); + for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = pos; + } + } + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } + } + + /* follow current chain */ + matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos); + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) { @@ -339,6 +367,7 @@ LZ4HC_InsertAndGetWiderMatch ( } } } + return longest; } @@ -1099,7 +1128,7 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, int nbAttempts = maxNbAttempts; U32 const pattern = LZ4_read32(ip); U32 matchIndex; - int fromMStart = 0; + int matchChainPos = 0; DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); /* First Match */ @@ -1109,13 +1138,11 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, matchIndex, lowestMatchIndex); /* find first match */ - while ( (longest <= MINMATCH) + while ( 1 && (longest < MINMATCH) && (matchIndex>=lowestMatchIndex) && (nbAttempts) ) { const BYTE* const matchPtr = base + matchIndex; assert(matchIndex < ipIndex); - assert(matchIndex >= dictLimit); - assert(matchPtr >= lowPrefixPtr); assert(matchPtr < ip); assert(longest >= 1); nbAttempts--; @@ -1124,7 +1151,6 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, if (mlt > longest) { longest = mlt; *matchpos = matchPtr; - break; } } { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); @@ -1132,39 +1158,44 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, } } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - assert(longest > MINMATCH); - while ((matchIndex-fromMStart>=lowestMatchIndex) && (nbAttempts)) { - const BYTE* const matchPtr = base + matchIndex - fromMStart; + /* search longer matches */ + while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { + const BYTE* const matchPtr = base + matchIndex; + int mlt = 0; assert(matchIndex < ipIndex); - assert(matchIndex >= dictLimit); - assert(matchPtr >= lowPrefixPtr); assert(matchPtr < ip); - assert(longest >= 1); + assert(longest >= MINMATCH); nbAttempts--; if (LZ4_read16(ip + longest - 1) == LZ4_read16(matchPtr + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { - int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); if (mlt > longest) { longest = mlt; *matchpos = matchPtr; - matchIndex -= fromMStart; /* beginning of match */ - if (1 && matchIndex + longest <= ipIndex) { - U32 distanceToNextMatch = 1; - int pos; - for (pos = 0; pos <= longest - MINMATCH; pos++) { - U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); - if (candidateDist > distanceToNextMatch) { - distanceToNextMatch = candidateDist; - fromMStart = pos; - } - } - if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ - matchIndex -= distanceToNextMatch - fromMStart; - continue; + } + } + } + + /* search more promising chain */ + if (mlt == longest) { + if (1 && matchIndex + longest <= ipIndex) { + U32 distanceToNextMatch = 1; + int pos; + for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = pos; } - } } } + } + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } + } - { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); + /* continue current chain */ + { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex+matchChainPos); matchIndex -= nextOffset; } } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ @@ -1190,8 +1221,8 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - //int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); - int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict; + int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); + //int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict; if (matchLength <= minLen) return match; if (favorDecSpeed) { if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ -- cgit v0.12 From fa89a9e18b93741c65fa84c293d15298169d1ecb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 5 May 2018 13:31:03 -0700 Subject: lz4hc: fixed performance issue when combining both PA and CS optimizations --- lib/lz4hc.c | 134 +++++++++--------------------------------------------------- 1 file changed, 20 insertions(+), 114 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index c58cd9d..7e2fcb3 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -280,10 +280,29 @@ LZ4HC_InsertAndGetWiderMatch ( *startpos = ip + back; } } } + if (mlt == longest) { /* better match => select a better chain */ + assert(longest >= MINMATCH); + if (1 && matchIndex + longest <= ipIndex) { + U32 distanceToNextMatch = 1; + int pos; + for (pos = 0; pos <= longest - MINMATCH; pos++) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = pos; + } + } + if (distanceToNextMatch > 1) { + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } + } + } + { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); if (1 && patternAnalysis && distNextMatch==1 && matchChainPos==0) { U32 const matchCandidateIdx = matchIndex-1; - //static U64 g_nbPatternAnalysis = 0; g_nbPatternAnalysis++; if (g_nbPatternAnalysis % (((g_nbPatternAnalysis/1000000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis=%llu ", g_nbPatternAnalysis); /* may be a repeated pattern */ if (repeat == rep_untested) { if ( ((pattern & 0xFFFF) == (pattern >> 16)) @@ -301,7 +320,6 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); size_t const currentSegmentLength = backLength + forwardPatternLength; - //static U64 g_nbPatternAnalysis_confirmed = 0; g_nbPatternAnalysis_confirmed++; if (g_nbPatternAnalysis_confirmed%(((g_nbPatternAnalysis_confirmed/100000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis_confirmed=%llu ", g_nbPatternAnalysis_confirmed); if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ @@ -314,25 +332,6 @@ LZ4HC_InsertAndGetWiderMatch ( } } } } - if (mlt == longest) { /* better match => select a better chain */ - assert(longest >= MINMATCH); - if (1 && matchIndex + longest <= ipIndex) { - U32 distanceToNextMatch = 1; - int pos; - //static U64 g_nbChainSwapper = 0; g_nbChainSwapper++; if (g_nbChainSwapper%(((g_nbChainSwapper/100000)+1)*100)==0) DEBUGLOG(2, "g_nbChainSwapper=%llu ", g_nbChainSwapper); - for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) { - U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); - if (candidateDist > distanceToNextMatch) { - distanceToNextMatch = candidateDist; - matchChainPos = pos; - } - } - if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ - matchIndex -= distanceToNextMatch; - continue; - } - } - /* follow current chain */ matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos); @@ -1112,98 +1111,6 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) } -int -LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, - const BYTE* const ip, - const BYTE* const iHighLimit, - int longest, - const BYTE** matchpos, - const int maxNbAttempts) -{ - U16* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - const U32 ipIndex = (U32)(ip - base); - const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE; - int nbAttempts = maxNbAttempts; - U32 const pattern = LZ4_read32(ip); - U32 matchIndex; - int matchChainPos = 0; - - DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); - /* First Match */ - LZ4HC_Insert(hc4, ip); - matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", - matchIndex, lowestMatchIndex); - - /* find first match */ - while ( 1 && (longest < MINMATCH) - && (matchIndex>=lowestMatchIndex) - && (nbAttempts) ) { - const BYTE* const matchPtr = base + matchIndex; - assert(matchIndex < ipIndex); - assert(matchPtr < ip); - assert(longest >= 1); - nbAttempts--; - if (LZ4_read32(matchPtr) == pattern) { - int const mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - if (mlt > longest) { - longest = mlt; - *matchpos = matchPtr; - } } - - { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); - matchIndex -= nextOffset; - } - } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - - /* search longer matches */ - while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { - const BYTE* const matchPtr = base + matchIndex; - int mlt = 0; - assert(matchIndex < ipIndex); - assert(matchPtr < ip); - assert(longest >= MINMATCH); - nbAttempts--; - if (LZ4_read16(ip + longest - 1) == LZ4_read16(matchPtr + longest - 1)) { - if (LZ4_read32(matchPtr) == pattern) { - mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - if (mlt > longest) { - longest = mlt; - *matchpos = matchPtr; - } - } - } - - /* search more promising chain */ - if (mlt == longest) { - if (1 && matchIndex + longest <= ipIndex) { - U32 distanceToNextMatch = 1; - int pos; - for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) { - U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); - if (candidateDist > distanceToNextMatch) { - distanceToNextMatch = candidateDist; - matchChainPos = pos; - } - } - if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ - matchIndex -= distanceToNextMatch; - continue; - } - } - - /* continue current chain */ - { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex+matchChainPos); - matchIndex -= nextOffset; - } - } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - - return longest; -} - - typedef struct { int off; int len; @@ -1222,7 +1129,6 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); - //int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict; if (matchLength <= minLen) return match; if (favorDecSpeed) { if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ -- cgit v0.12 From d097bf93f853ddefdd05321d65cf7b90e3171c14 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 5 May 2018 13:46:45 -0700 Subject: fixed SC.opt integration with regular HC parser Only enabled when searching forward. note : it slighly improves compression ratio, but measurably decreases speed. Trade-off to analyse. --- lib/lz4hc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 7e2fcb3..0f37f42 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -280,9 +280,9 @@ LZ4HC_InsertAndGetWiderMatch ( *startpos = ip + back; } } } - if (mlt == longest) { /* better match => select a better chain */ - assert(longest >= MINMATCH); - if (1 && matchIndex + longest <= ipIndex) { + if ( lookBackLength==0 /* search forward only */ + && mlt==longest) { /* better match => select a better chain */ + if (matchIndex + longest <= ipIndex) { U32 distanceToNextMatch = 1; int pos; for (pos = 0; pos <= longest - MINMATCH; pos++) { @@ -301,7 +301,7 @@ LZ4HC_InsertAndGetWiderMatch ( } { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); - if (1 && patternAnalysis && distNextMatch==1 && matchChainPos==0) { + if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { U32 const matchCandidateIdx = matchIndex-1; /* may be a repeated pattern */ if (repeat == rep_untested) { -- cgit v0.12 From a4e918d7a6e85f825f4cb1ac1c08a2cf2fc22194 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 5 May 2018 14:10:30 -0700 Subject: lz4hc: SC only enabled for opt parser the trade off is not good for regular HC parser : compression is a little bit better, but speed cost is too large in comparison. --- lib/lz4hc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 0f37f42..d29a989 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -213,6 +213,7 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE** startpos, const int maxNbAttempts, const int patternAnalysis, + const int chainSwap, const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { @@ -280,8 +281,8 @@ LZ4HC_InsertAndGetWiderMatch ( *startpos = ip + back; } } } - if ( lookBackLength==0 /* search forward only */ - && mlt==longest) { /* better match => select a better chain */ + if (chainSwap && mlt==longest) { /* better match => select a better chain */ + assert(lookBackLength==0); /* search forward only */ if (matchIndex + longest <= ipIndex) { U32 distanceToNextMatch = 1; int pos; @@ -327,7 +328,6 @@ LZ4HC_InsertAndGetWiderMatch ( } else { matchIndex = matchCandidateIdx - (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ } - matchChainPos = 0; continue; } } } } @@ -382,7 +382,7 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, dict, favorCompressionRatio); + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, 0 /* chainSwap */, patternAnalysis, dict, favorCompressionRatio); } @@ -515,7 +515,7 @@ _Search2: if (ip+ml <= mflimit) { ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, - maxNbAttempts, patternAnalysis, dict, favorCompressionRatio); + maxNbAttempts, 0, patternAnalysis, dict, favorCompressionRatio); } else { ml2 = ml; } @@ -560,7 +560,7 @@ _Search3: if (start2 + ml2 <= mflimit) { ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, - maxNbAttempts, patternAnalysis, dict, favorCompressionRatio); + maxNbAttempts, 0, patternAnalysis, dict, favorCompressionRatio); } else { ml3 = ml2; } @@ -1128,7 +1128,7 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); + int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*chainSwap*/, 1 /*patternAnalysis*/, dict, favorDecSpeed); if (matchLength <= minLen) return match; if (favorDecSpeed) { if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ -- cgit v0.12 From cdb0275b7f7513e13a05c27adea658614c0174d9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 5 May 2018 14:32:57 -0700 Subject: lz4hc: fixed PA / SC parameter order also : reserved PA for levels 9+ (instead of 8+). In most cases, speed is lower, and compression benefit is not worth. --- lib/lz4hc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index d29a989..1b3596f 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -382,7 +382,7 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, 0 /* chainSwap */, patternAnalysis, dict, favorCompressionRatio); + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); } @@ -477,7 +477,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( ) { const int inputSize = *srcSizePtr; - const int patternAnalysis = (maxNbAttempts > 64); /* levels 8+ */ + const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; @@ -515,7 +515,7 @@ _Search2: if (ip+ml <= mflimit) { ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, - maxNbAttempts, 0, patternAnalysis, dict, favorCompressionRatio); + maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); } else { ml2 = ml; } @@ -560,7 +560,7 @@ _Search3: if (start2 + ml2 <= mflimit) { ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, - maxNbAttempts, 0, patternAnalysis, dict, favorCompressionRatio); + maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); } else { ml3 = ml2; } @@ -1128,7 +1128,7 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*chainSwap*/, 1 /*patternAnalysis*/, dict, favorDecSpeed); + int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); if (matchLength <= minLen) return match; if (favorDecSpeed) { if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ -- cgit v0.12 From 24b9c485db34743f83496038d61598d41c3c497e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 6 May 2018 16:47:31 -0700 Subject: small PA optimization which measurably improves speed on levels 9+ --- lib/lz4hc.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 1b3596f..059c4b5 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -291,15 +291,12 @@ LZ4HC_InsertAndGetWiderMatch ( if (candidateDist > distanceToNextMatch) { distanceToNextMatch = candidateDist; matchChainPos = pos; - } - } + } } if (distanceToNextMatch > 1) { if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ matchIndex -= distanceToNextMatch; continue; - } - } - } + } } } { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { @@ -309,7 +306,7 @@ LZ4HC_InsertAndGetWiderMatch ( if ( ((pattern & 0xFFFF) == (pattern >> 16)) & ((pattern & 0xFF) == (pattern >> 24)) ) { repeat = rep_confirmed; - srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4; + srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); } else { repeat = rep_not; } } @@ -318,19 +315,29 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE* const matchPtr = base + matchCandidateIdx; if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); - const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; - size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); + const BYTE* const lowestMatchPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; + size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); size_t const currentSegmentLength = backLength + forwardPatternLength; if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ } else { - matchIndex = matchCandidateIdx - (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ - } + matchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ + if (lookBackLength==0) { /* no back possible */ + size_t const maxML = MIN(currentSegmentLength, srcPatternLength); + if ((size_t)longest < maxML) { + longest = maxML; + *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip; + } + { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); + if (distToNextPattern > matchIndex) break; /* avoid overflow */ + matchIndex -= distToNextPattern; + } } } continue; } } - } } + } } /* PA optimization */ /* follow current chain */ matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos); -- cgit v0.12 From 200b2960d5a5d0fe76d34bb826c236c7c2941563 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 6 May 2018 18:26:14 -0700 Subject: fixed minor conversion warning --- lib/lz4hc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 059c4b5..b89983f 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -327,7 +327,8 @@ LZ4HC_InsertAndGetWiderMatch ( if (lookBackLength==0) { /* no back possible */ size_t const maxML = MIN(currentSegmentLength, srcPatternLength); if ((size_t)longest < maxML) { - longest = maxML; + assert(maxML < 2 GB); + longest = (int)maxML; *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ *startpos = ip; } -- cgit v0.12 From ba1c7148a56029aa01795bfcf8cc365a2551f968 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 7 May 2018 12:14:26 -0700 Subject: renamed variable for clarity --- lib/lz4hc.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index b89983f..8108ea0 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -243,7 +243,7 @@ LZ4HC_InsertAndGetWiderMatch ( matchIndex, lowestMatchIndex); while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { - int mlt=0; + int matchLength=0; nbAttempts--; assert(matchIndex < ipIndex); if (favorDecSpeed && (ipIndex - matchIndex < 8)) { @@ -256,10 +256,10 @@ LZ4HC_InsertAndGetWiderMatch ( if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; - mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - mlt -= back; - if (mlt > longest) { - longest = mlt; + matchLength = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; *matchpos = matchPtr + back; *startpos = ip + back; } } } @@ -270,18 +270,18 @@ LZ4HC_InsertAndGetWiderMatch ( int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) - mlt += LZ4_count(ip+mlt, lowPrefixPtr, iHighLimit); + matchLength = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) + matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit); back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; - mlt -= back; - if (mlt > longest) { - longest = mlt; + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ *startpos = ip + back; } } } - if (chainSwap && mlt==longest) { /* better match => select a better chain */ + if (chainSwap && matchLength==longest) { /* better match => select a better chain */ assert(lookBackLength==0); /* search forward only */ if (matchIndex + longest <= ipIndex) { U32 distanceToNextMatch = 1; -- cgit v0.12