From b9459faeb24afe8669640aef8861ce32f73d5aae Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 8 Oct 2017 23:55:42 -0700 Subject: improved search of rep-1 patterns --- lib/lz4hc.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 131 insertions(+), 27 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index f4a0981..e28d682 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -165,6 +165,54 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* const hc } #endif +/** LZ4HC_countBack() : + * @return : negative value, nb of common bytes before ip/match */ +static int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) +{ + int back=0; + while ( (ip+back > iMin) + && (match+back > mMin) + && (ip[back-1] == match[back-1])) + back--; + return back; +} + +static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, reg_t pattern) +{ + const BYTE* const iStart = ip; + + while (likely(ip=iLow+4)) { + if (LZ4_read32(ip-4) != pattern) break; + ip -= 4; + } + while (likely(ip>iLow)) { + if (ip[-1] != (BYTE)pattern) break; + ip--; + } + + return (unsigned)(iStart - ip); +} + +typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; + LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( LZ4HC_CCtx_internal* hc4, const BYTE* const ip, @@ -180,11 +228,13 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( const BYTE* const base = hc4->base; const U32 dictLimit = hc4->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; - const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); + const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE; const BYTE* const dictBase = hc4->dictBase; - int const delta = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; + reg_t const pattern = LZ4_read_ARCH(ip); U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; /* First Match */ @@ -195,27 +245,29 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( nbAttempts--; if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; - if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) { - if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { - int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - int back = 0; - - while ( (ip+back > iLowLimit) - && (matchPtr+back > lowPrefixPtr) - && (ip[back-1] == matchPtr[back-1])) { - back--; - } - - mlt -= back; + if (LZ4_read32(matchPtr) == (U32)pattern) { + int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); +#if 0 + /* more generic but unfortunately slower ... */ + int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr); +#else + int back = 0; + while ( (ip+back > iLowLimit) + && (matchPtr+back > lowPrefixPtr) + && (ip[back-1] == matchPtr[back-1])) { + back--; + } +#endif + mlt -= back; - if (mlt > longest) { - longest = mlt; - *matchpos = matchPtr+back; - *startpos = ip+back; - } } } - } else { + if (mlt > longest) { + longest = mlt; + *matchpos = matchPtr+back; + *startpos = ip+back; + } } + } else { /* matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; - if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { + if (LZ4_read32(matchPtr) == (U32)pattern) { int mlt; int back=0; const BYTE* vLimit = ip + (dictLimit - matchIndex); @@ -223,13 +275,65 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); - while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--; + while ( (ip+back > iLowLimit) + && (matchIndex+back > lowLimit) + && (ip[back-1] == matchPtr[back-1])) + back--; mlt -= back; - if (mlt > longest) { longest = mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } - } - } - matchIndex -= DELTANEXTU16(chainTable, matchIndex); - } + if (mlt > longest) { + longest = mlt; + *matchpos = base + matchIndex + back; + *startpos = ip + back; + } } } + + { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); + matchIndex -= nextOffset; + if (1 && (nextOffset==1)) { + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if (LZ4_read32(ip+4) == (U32)pattern) { /* should check ip limit */ + repeat = rep_confirmed; + srcPatternLength = LZ4HC_countPattern(ip+8, iHighLimit, pattern) + 8; + } else { + repeat = rep_not; + } } + if ( (repeat == rep_confirmed) /* proven repeated pattern (1-2-4) */ + && (matchIndex >= dictLimit) ) { /* same segment only */ + const BYTE* const matchPtr = base + matchIndex; + if (LZ4_read_ARCH(matchPtr) == pattern) { /* good candidate */ + size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); + const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; + size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, (U32)pattern); + size_t const currentSegmentLength = backLength + forwardPatternLength; + + if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ +#if 1 + matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ +#else + const BYTE* const matchCandidate = matchPtr + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, pattern might be followed by more match */ + int matchLength = (int)(LZ4_count(ip + srcPatternLength, matchCandidate + srcPatternLength, iHighLimit) + srcPatternLength); + int back = 0; + while ( (ip+back > iLowLimit) + && (matchPtr+back > lowPrefixPtr) + && (ip[back-1] == matchPtr[back-1])) { + back--; + } + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = base + matchIndex + back; + *startpos = ip + back; + } + matchIndex -= (U32)backLength; + matchIndex -= DELTANEXTU16(chainTable, matchIndex); /* skip directly to next potential pattern segment */ +#endif + } else { + matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ + //matchIndex -= DELTANEXTU16(chainTable, matchIndex); /* skip directly to following candidate; slightly faster, but miss some rare corner cases (likely when back is useful)*/ + } + } } } } + } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */ return longest; } -- cgit v0.12