summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <cyan@fb.com>2017-12-22 07:07:25 (GMT)
committerYann Collet <cyan@fb.com>2017-12-22 07:07:25 (GMT)
commit9753ac4c91b926114ae636d1d7103dd24e5c0f57 (patch)
tree84b09d9af6a2d1edc9493ed7fe9e89ac324e3f93
parent8a9c8e73241672c1db29be454a9b8388bfde5034 (diff)
downloadlz4-9753ac4c91b926114ae636d1d7103dd24e5c0f57.zip
lz4-9753ac4c91b926114ae636d1d7103dd24e5c0f57.tar.gz
lz4-9753ac4c91b926114ae636d1d7103dd24e5c0f57.tar.bz2
conditional pattern analysis
Pattern analysis (currently limited to long ranges of identical bytes) is actually detrimental to performance when `nbSearches` is low. Reason is : `nbSearches` provides a built-in protection for these cases. The problem with patterns is that they dramatically increase the number of candidates to visit. But with a low nbSearches, the match finder just aborts early. In such cases, pattern analysis adds some complexity without reducing total nb of candidates. It actually increases compression ratio a little bit, by filtering only "good" candidates, but at a measurable speed cost, so it's not a good trade-off. This patch makes pattern analysis optional. It's enabled for levels 8+ only.
-rw-r--r--lib/lz4hc.c21
-rw-r--r--lib/lz4opt.h4
2 files changed, 17 insertions, 8 deletions
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 388eb40..2fff29b 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -191,7 +191,8 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
int longest,
const BYTE** matchpos,
const BYTE** startpos,
- const int maxNbAttempts)
+ const int maxNbAttempts,
+ const int patternAnalysis)
{
U16* const chainTable = hc4->chainTable;
U32* const HashTable = hc4->hashTable;
@@ -264,7 +265,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
{ U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
matchIndex -= nextOffset;
- if (nextOffset==1) {
+ if (patternAnalysis && nextOffset==1) {
/* may be a repeated pattern */
if (repeat == rep_untested) {
if ( ((pattern & 0xFFFF) == (pattern >> 16))
@@ -299,13 +300,14 @@ LZ4_FORCE_INLINE
int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
const BYTE* const ip, const BYTE* const iLimit,
const BYTE** matchpos,
- const int maxNbAttempts)
+ const int maxNbAttempts,
+ const int patternAnalysis)
{
const BYTE* uselessPtr = ip;
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
* but this won't be the case here, as we define iLowLimit==ip,
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
- return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts);
+ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis);
}
@@ -403,6 +405,7 @@ static int LZ4HC_compress_hashChain (
)
{
const int inputSize = *srcSizePtr;
+ const int patternAnalysis = (maxNbAttempts > 64); /* levels 8+ */
const BYTE* ip = (const BYTE*) source;
const BYTE* anchor = ip;
@@ -433,7 +436,7 @@ static int LZ4HC_compress_hashChain (
/* Main Loop */
while (ip < mflimit) {
- ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts);
+ ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
if (ml<MINMATCH) { ip++; continue; }
/* saved, in case we would skip too much */
@@ -443,7 +446,9 @@ static int LZ4HC_compress_hashChain (
_Search2:
if (ip+ml < mflimit)
- ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
+ maxNbAttempts, patternAnalysis);
else
ml2 = ml;
@@ -488,7 +493,9 @@ _Search3:
/* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
if (start2 + ml2 < mflimit)
- ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
+ maxNbAttempts, patternAnalysis);
else
ml3 = ml2;
diff --git a/lib/lz4opt.h b/lib/lz4opt.h
index 9917851..6c15598 100644
--- a/lib/lz4opt.h
+++ b/lib/lz4opt.h
@@ -85,7 +85,9 @@ LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
* but this won't be the case here, as we define iLowLimit==ip,
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
- int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches);
+ int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx,
+ ip, ip, iHighLimit, minLen, &matchPtr, &ip,
+ nbSearches, 1 /* patternAnalysis */);
if (matchLength <= minLen) return match;
match.len = matchLength;
match.off = (int)(ip-matchPtr);