summaryrefslogtreecommitdiffstats
path: root/lib/lz4hc.c
diff options
context:
space:
mode:
authorNick Terrell <terrelln@fb.com>2019-07-30 22:16:35 (GMT)
committerNick Terrell <terrelln@fb.com>2019-07-31 17:16:21 (GMT)
commit7e97bf377d3c3a6d0d71a3ce9116e1f0143a8740 (patch)
tree6456eeed7e4073c4c260429c3ca1744d17b39cf2 /lib/lz4hc.c
parentce9176a68d13345d38e8362608850c64ebb6d896 (diff)
downloadlz4-7e97bf377d3c3a6d0d71a3ce9116e1f0143a8740.zip
lz4-7e97bf377d3c3a6d0d71a3ce9116e1f0143a8740.tar.gz
lz4-7e97bf377d3c3a6d0d71a3ce9116e1f0143a8740.tar.bz2
[lz4hc] Improve pattern detection in ext dict
It is important to continue to look backwards if the current pattern reaches `lowPrefixPtr`. If the pattern detection doesn't go all the way to the beginning of the pattern, or the end of the pattern it slows down the search instead of speeding it up. The slow unit in `round_trip_stream_fuzzer` used to take 12 seconds to run with -O3, now it takes 0.2 seconds. Credit to OSS-Fuzz
Diffstat (limited to 'lib/lz4hc.c')
-rw-r--r--lib/lz4hc.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index b8ff72a..4122af4 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -158,9 +158,11 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
#endif
-static U32 LZ4HC_rotatePattern(size_t const length, U32 const pattern)
+static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
{
- size_t const bitsToRotate = (length & (sizeof(pattern) - 1)) << 3;
+ size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
+ if (bitsToRotate == 0)
+ return pattern;
return LZ4HC_rotl32(pattern, (int)bitsToRotate);
}
@@ -338,9 +340,15 @@ LZ4HC_InsertAndGetWiderMatch (
forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
}
{ const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
- size_t const backLengthRaw = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
- size_t const backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLengthRaw, lowestMatchIndex);
- size_t const currentSegmentLength = backLength + forwardPatternLength;
+ size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
+ size_t currentSegmentLength;
+ if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
+ U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
+ backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
+ }
+ /* Limit backLength not go further than lowestMatchIndex */
+ backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
+ currentSegmentLength = backLength + forwardPatternLength;
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */