From 7e97bf377d3c3a6d0d71a3ce9116e1f0143a8740 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 30 Jul 2019 15:16:35 -0700 Subject: [lz4hc] Improve pattern detection in ext dict It is important to continue to look backwards if the current pattern reaches `lowPrefixPtr`. If the pattern detection doesn't go all the way to the beginning of the pattern, or the end of the pattern it slows down the search instead of speeding it up. The slow unit in `round_trip_stream_fuzzer` used to take 12 seconds to run with -O3, now it takes 0.2 seconds. Credit to OSS-Fuzz --- lib/lz4hc.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index b8ff72a..4122af4 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -158,9 +158,11 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, #endif -static U32 LZ4HC_rotatePattern(size_t const length, U32 const pattern) +static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) { - size_t const bitsToRotate = (length & (sizeof(pattern) - 1)) << 3; + size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; + if (bitsToRotate == 0) + return pattern; return LZ4HC_rotl32(pattern, (int)bitsToRotate); } @@ -338,9 +340,15 @@ LZ4HC_InsertAndGetWiderMatch ( forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern); } { const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr; - size_t const backLengthRaw = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); - size_t const backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLengthRaw, lowestMatchIndex); - size_t const currentSegmentLength = backLength + forwardPatternLength; + size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); + size_t currentSegmentLength; + if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) { + U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); + backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern); + } + /* Limit backLength not go further than lowestMatchIndex */ + backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); + currentSegmentLength = backLength + forwardPatternLength; if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ -- cgit v0.12 From 58ea585878008db36fa20025703757f0b70836eb Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 30 Jul 2019 15:21:52 -0700 Subject: [lz4hc] Fix minor pessimization in extDict pattern matching We should be comparing `matchPtr` not `ip`. This bug just means that this branch was not taken, so we might miss some of the forward length. --- lib/lz4hc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 4122af4..a0b8c48 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -335,7 +335,7 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE* const dictStart = dictBase + hc4->lowLimit; const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit; size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); - if (extDict && ip + forwardPatternLength == iLimit) { + if (extDict && matchPtr + forwardPatternLength == iLimit) { U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern); } -- cgit v0.12 From be1738aa46326e86e9c3bb1029abaadce45b8e72 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 30 Jul 2019 23:39:39 -0700 Subject: [lz4hc] Fix pattern detection end of dictionary The pattern detection in extDict mode could put `matchIndex` within the last 3 bytes of the dictionary. This would cause a read out of bounds. --- lib/lz4hc.c | 65 ++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index a0b8c48..596888a 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -218,6 +218,16 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) return (unsigned)(iStart - ip); } +/* LZ4HC_protectDictEnd() : + * Checks if the match is in the last 3 bytes of the dictionary, so reading the + * 4 byte MINMATCH would overflow. + * @returns true if the match index is okay. + */ +static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) +{ + return ((U32)((dictLimit - 1) - matchIndex) >= 3); +} + typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; @@ -304,7 +314,7 @@ LZ4HC_InsertAndGetWiderMatch ( if (matchIndex + (U32)longest <= ipIndex) { U32 distanceToNextMatch = 1; int pos; - for (pos = 0; pos <= longest - MINMATCH; pos++) { + for (pos = 0; pos <= longest - MINMATCH; ++pos) { U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); if (candidateDist > distanceToNextMatch) { distanceToNextMatch = candidateDist; @@ -314,7 +324,8 @@ LZ4HC_InsertAndGetWiderMatch ( if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ matchIndex -= distanceToNextMatch; continue; - } } } + } + } } { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { @@ -328,7 +339,8 @@ LZ4HC_InsertAndGetWiderMatch ( } else { repeat = rep_not; } } - if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) ) { + if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) + && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) { const int extDict = matchCandidateIdx < dictLimit; const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx; if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ @@ -348,27 +360,40 @@ LZ4HC_InsertAndGetWiderMatch ( } /* Limit backLength not go further than lowestMatchIndex */ backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); + assert(matchCandidateIdx - backLength >= lowestMatchIndex); currentSegmentLength = backLength + forwardPatternLength; - + /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ - matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) + matchIndex = newMatchIndex; + else { + /* Can only happen if started in the prefix */ + assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); + matchIndex = dictLimit; + } } else { - matchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ - if (lookBackLength==0) { /* no back possible */ - size_t const maxML = MIN(currentSegmentLength, srcPatternLength); - if ((size_t)longest < maxML) { - assert(base + matchIndex < ip); - if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break; - assert(maxML < 2 GB); - longest = (int)maxML; - *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ - *startpos = ip; - } - { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); - if (distToNextPattern > matchIndex) break; /* avoid overflow */ - matchIndex -= distToNextPattern; - } } } } + U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ + if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) { + assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); + matchIndex = dictLimit; + } else { + matchIndex = newMatchIndex; + if (lookBackLength==0) { /* no back possible */ + size_t const maxML = MIN(currentSegmentLength, srcPatternLength); + if ((size_t)longest < maxML) { + assert(base + matchIndex < ip); + if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break; + assert(maxML < 2 GB); + longest = (int)maxML; + *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip; + } + { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); + if (distToNextPattern > matchIndex) break; /* avoid overflow */ + matchIndex -= distToNextPattern; + } } } } } continue; } } } } /* PA optimization */ -- cgit v0.12 From 38c3945de300851757d0dd76182ee28aaf8253a4 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 30 Jul 2019 23:40:58 -0700 Subject: [lz4hc] Only allow chain swapping forwards When the match is very long and found quickly, we can do matchLength * nbCompares iterations through the chain swapping, which can really slow down compression. --- lib/lz4hc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 596888a..0608ec6 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -314,7 +314,7 @@ LZ4HC_InsertAndGetWiderMatch ( if (matchIndex + (U32)longest <= ipIndex) { U32 distanceToNextMatch = 1; int pos; - for (pos = 0; pos <= longest - MINMATCH; ++pos) { + for (pos = matchChainPos; pos <= longest - MINMATCH; ++pos) { U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); if (candidateDist > distanceToNextMatch) { distanceToNextMatch = candidateDist; -- cgit v0.12 From 064adb2e8d95698168d436afc223e8ba44e56831 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 31 Jul 2019 00:57:16 -0700 Subject: [lz4hc] Chain swap with acceleration --- lib/lz4hc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 0608ec6..5922ed7 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -312,20 +312,26 @@ LZ4HC_InsertAndGetWiderMatch ( if (chainSwap && matchLength==longest) { /* better match => select a better chain */ assert(lookBackLength==0); /* search forward only */ if (matchIndex + (U32)longest <= ipIndex) { + int const kTrigger = 4; U32 distanceToNextMatch = 1; + int const end = longest - MINMATCH + 1; + int step = 1; + int accel = 1 << kTrigger; int pos; - for (pos = matchChainPos; pos <= longest - MINMATCH; ++pos) { + for (pos = 0; pos < end; pos += step) { U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); + step = (accel++ >> kTrigger); if (candidateDist > distanceToNextMatch) { distanceToNextMatch = candidateDist; matchChainPos = (U32)pos; - } } + accel = 1 << kTrigger; + } + } if (distanceToNextMatch > 1) { if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ matchIndex -= distanceToNextMatch; continue; - } - } } + } } } { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { -- cgit v0.12