From 9699ba5ddfa8485bb4ad36cc9ec78cbed542f28b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 4 May 2018 19:13:33 -0700 Subject: integrated chain swapper into HC match finder slower than expected Pattern analyzer and Chain Swapper work slower when both activated. Reasons unclear. --- lib/lz4hc.c | 121 ++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 45 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 6c50db5..c58cd9d 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -227,6 +227,7 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE* const dictBase = hc4->dictBase; int const lookBackLength = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; + int matchChainPos = 0; U32 const pattern = LZ4_read32(ip); U32 matchIndex; U32 dictMatchIndex; @@ -241,31 +242,30 @@ LZ4HC_InsertAndGetWiderMatch ( matchIndex, lowestMatchIndex); while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { - DEBUGLOG(7, "remaining attempts : %i", nbAttempts); + int mlt=0; nbAttempts--; assert(matchIndex < ipIndex); if (favorDecSpeed && (ipIndex - matchIndex < 8)) { /* do nothing */ - } else if (matchIndex >= dictLimit) { + } else if (matchIndex >= dictLimit) { /* within current Prefix */ const BYTE* const matchPtr = base + matchIndex; assert(matchPtr >= lowPrefixPtr); assert(matchPtr < ip); assert(longest >= 1); if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { - int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; + mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); mlt -= back; if (mlt > longest) { longest = mlt; - *matchpos = matchPtr+back; - *startpos = ip+back; + *matchpos = matchPtr + back; + *startpos = ip + back; } } } - } else { /* matchIndex < dictLimit */ + } else { /* lowestMatchIndex <= matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; if (LZ4_read32(matchPtr) == pattern) { const BYTE* const dictStart = dictBase + hc4->lowLimit; - int mlt; int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; @@ -280,9 +280,10 @@ LZ4HC_InsertAndGetWiderMatch ( *startpos = ip + back; } } } - { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); - matchIndex -= nextOffset; - if (patternAnalysis && nextOffset==1) { + { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); + if (1 && patternAnalysis && distNextMatch==1 && matchChainPos==0) { + U32 const matchCandidateIdx = matchIndex-1; + //static U64 g_nbPatternAnalysis = 0; g_nbPatternAnalysis++; if (g_nbPatternAnalysis % (((g_nbPatternAnalysis/1000000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis=%llu ", g_nbPatternAnalysis); /* may be a repeated pattern */ if (repeat == rep_untested) { if ( ((pattern & 0xFFFF) == (pattern >> 16)) @@ -293,21 +294,48 @@ LZ4HC_InsertAndGetWiderMatch ( repeat = rep_not; } } if ( (repeat == rep_confirmed) - && (matchIndex >= dictLimit) ) { /* same segment only */ - const BYTE* const matchPtr = base + matchIndex; + && (matchCandidateIdx >= dictLimit) ) { /* same segment only */ + const BYTE* const matchPtr = base + matchCandidateIdx; if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); size_t const currentSegmentLength = backLength + forwardPatternLength; + //static U64 g_nbPatternAnalysis_confirmed = 0; g_nbPatternAnalysis_confirmed++; if (g_nbPatternAnalysis_confirmed%(((g_nbPatternAnalysis_confirmed/100000)+1)*100)==0) DEBUGLOG(2, "g_nbPatternAnalysis_confirmed=%llu ", g_nbPatternAnalysis_confirmed); if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ - matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ } else { - matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ + matchIndex = matchCandidateIdx - (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ } - } } } } + matchChainPos = 0; + continue; + } } + } } + + if (mlt == longest) { /* better match => select a better chain */ + assert(longest >= MINMATCH); + if (1 && matchIndex + longest <= ipIndex) { + U32 distanceToNextMatch = 1; + int pos; + //static U64 g_nbChainSwapper = 0; g_nbChainSwapper++; if (g_nbChainSwapper%(((g_nbChainSwapper/100000)+1)*100)==0) DEBUGLOG(2, "g_nbChainSwapper=%llu ", g_nbChainSwapper); + for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = pos; + } + } + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } + } + + /* follow current chain */ + matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos); + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) { @@ -339,6 +367,7 @@ LZ4HC_InsertAndGetWiderMatch ( } } } + return longest; } @@ -1099,7 +1128,7 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, int nbAttempts = maxNbAttempts; U32 const pattern = LZ4_read32(ip); U32 matchIndex; - int fromMStart = 0; + int matchChainPos = 0; DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); /* First Match */ @@ -1109,13 +1138,11 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, matchIndex, lowestMatchIndex); /* find first match */ - while ( (longest <= MINMATCH) + while ( 1 && (longest < MINMATCH) && (matchIndex>=lowestMatchIndex) && (nbAttempts) ) { const BYTE* const matchPtr = base + matchIndex; assert(matchIndex < ipIndex); - assert(matchIndex >= dictLimit); - assert(matchPtr >= lowPrefixPtr); assert(matchPtr < ip); assert(longest >= 1); nbAttempts--; @@ -1124,7 +1151,6 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, if (mlt > longest) { longest = mlt; *matchpos = matchPtr; - break; } } { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); @@ -1132,39 +1158,44 @@ LZ4HC_FindLongestMatch (LZ4HC_CCtx_internal* hc4, } } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - assert(longest > MINMATCH); - while ((matchIndex-fromMStart>=lowestMatchIndex) && (nbAttempts)) { - const BYTE* const matchPtr = base + matchIndex - fromMStart; + /* search longer matches */ + while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { + const BYTE* const matchPtr = base + matchIndex; + int mlt = 0; assert(matchIndex < ipIndex); - assert(matchIndex >= dictLimit); - assert(matchPtr >= lowPrefixPtr); assert(matchPtr < ip); - assert(longest >= 1); + assert(longest >= MINMATCH); nbAttempts--; if (LZ4_read16(ip + longest - 1) == LZ4_read16(matchPtr + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { - int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); if (mlt > longest) { longest = mlt; *matchpos = matchPtr; - matchIndex -= fromMStart; /* beginning of match */ - if (1 && matchIndex + longest <= ipIndex) { - U32 distanceToNextMatch = 1; - int pos; - for (pos = 0; pos <= longest - MINMATCH; pos++) { - U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); - if (candidateDist > distanceToNextMatch) { - distanceToNextMatch = candidateDist; - fromMStart = pos; - } - } - if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ - matchIndex -= distanceToNextMatch - fromMStart; - continue; + } + } + } + + /* search more promising chain */ + if (mlt == longest) { + if (1 && matchIndex + longest <= ipIndex) { + U32 distanceToNextMatch = 1; + int pos; + for (pos = matchChainPos; pos <= longest - MINMATCH; pos++) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = pos; } - } } } + } + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } + } - { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); + /* continue current chain */ + { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex+matchChainPos); matchIndex -= nextOffset; } } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ @@ -1190,8 +1221,8 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - //int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); - int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict; + int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /* patternAnalysis */, dict, favorDecSpeed); + //int matchLength = LZ4HC_FindLongestMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict; if (matchLength <= minLen) return match; if (favorDecSpeed) { if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ -- cgit v0.12