From a12cdf00c3d0a0e3e7b2638d4a256f14b15c0072 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Oct 2017 17:04:29 -0700 Subject: lz4opt: added hash chain search --- lib/lz4hc.c | 27 ++++++++++++++++++--------- lib/lz4hc.h | 2 +- lib/lz4opt.h | 58 ++++++++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 63 insertions(+), 24 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 47474d3..adabd9c 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -183,7 +183,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( const BYTE* const dictBase = hc4->dictBase; int const delta = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; - reg_t const pattern = LZ4_read_ARCH(ip); + U32 const pattern = LZ4_read32(ip); U32 matchIndex; repeat_state_e repeat = rep_untested; size_t srcPatternLength = 0; @@ -195,10 +195,16 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( while ((matchIndex>=lowLimit) && (nbAttempts)) { nbAttempts--; + int trace = 0; + if (nbAttempts==0) { + trace = 1; + DEBUGLOG(2, "reached max nb of attempts ! : %08X => %08X ", + pattern, HASH_FUNCTION(pattern)); + } if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) { - if (LZ4_read32(matchPtr) == (U32)pattern) { + if (LZ4_read32(matchPtr) == pattern) { int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); #if 0 /* more generic but unfortunately slower ... */ @@ -221,9 +227,9 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( } } else { /* matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; - if (LZ4_read32(matchPtr) == (U32)pattern) { + if (LZ4_read32(matchPtr) == pattern) { int mlt; - int back=0; + int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; @@ -243,22 +249,25 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex); matchIndex -= nextOffset; if (1 && (nextOffset==1)) { + if (trace) DEBUGLOG(2, "check repeat mode : %u", repeat); /* may be a repeated pattern */ if (repeat == rep_untested) { - if (LZ4_read32(ip+4) == (U32)pattern) { /* should check ip limit */ + if ((pattern & 0xFFFF) == (pattern >> 16)) { /* is it enough ? */ repeat = rep_confirmed; - srcPatternLength = LZ4HC_countPattern(ip+8, iHighLimit, pattern) + 8; + srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4; } else { repeat = rep_not; } } if ( (repeat == rep_confirmed) /* proven repeated pattern (1-2-4) */ && (matchIndex >= dictLimit) ) { /* same segment only */ const BYTE* const matchPtr = base + matchIndex; - if (LZ4_read_ARCH(matchPtr) == pattern) { /* good candidate */ + if (trace) DEBUGLOG(2, "search direct pattern position"); + if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE; - size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, (U32)pattern); + size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern); size_t const currentSegmentLength = backLength + forwardPatternLength; + if (trace) DEBUGLOG(2, "good start position (match == pattern)"); if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ @@ -633,7 +642,7 @@ static int LZ4HC_getSearchNum(int compressionLevel) switch (compressionLevel) { default: return 0; /* unused */ case 11: return 128; - case 12: return 1<<10; + case 12: return 1<<13; } } diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 66d5636..a9cefbb 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -129,7 +129,7 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in * They are exposed to allow static allocation of `LZ4_streamHC_t`. * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4 **************************************/ -#define LZ4HC_DICTIONARY_LOGSIZE 17 /* because of btopt, hc would only need 16 */ +#define LZ4HC_DICTIONARY_LOGSIZE 17 /* due to btree, hc would only need 16 */ #define LZ4HC_MAXD (1<searchNum); + if (matchLength < MINMATCH) return 0; + assert(matches != NULL); + matches[0].len = matchLength; + matches[0].off = (int)(ip-matchPtr); + (void)fullUpdate; + return 1; +} + + static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, const char* const source, - char* dest, + char* dst, int inputSize, - int maxOutputSize, + int dstCapacity, limitedOutput_directive limit, size_t sufficient_len, const int fullUpdate @@ -214,8 +241,8 @@ static int LZ4HC_compress_optimal ( const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; const BYTE* const matchlimit = (iend - LASTLITERALS); - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + dstCapacity; /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal"); @@ -230,7 +257,8 @@ static int LZ4HC_compress_optimal ( int best_mlen, best_off; int cur, last_match_pos = 0; - int const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); + //int const nb_matches_initial = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); + int const nb_matches_initial = LZ4HC_HashChain_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); if (!nb_matches_initial) { ip++; continue; } if ((size_t)matches[nb_matches_initial-1].len > sufficient_len) { @@ -274,7 +302,9 @@ static int LZ4HC_compress_optimal ( const BYTE* const curPtr = ip + cur; int nb_matches; - /* establish baseline price if cur is literal */ + /* establish baseline price for cur as a literal. + * fixes unused positions (between 2 matches) + * and inefficient match stack */ { int price; int litlen; if (opt[cur-1].mlen == 1) { @@ -295,7 +325,8 @@ static int LZ4HC_compress_optimal ( if (cur == last_match_pos || curPtr >= mflimit) break; - nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + nb_matches = LZ4HC_HashChain_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); if ((nb_matches > 0) && (size_t)matches[nb_matches-1].len > sufficient_len) { /* immediate encoding */ best_mlen = matches[nb_matches-1].len; @@ -307,9 +338,9 @@ static int LZ4HC_compress_optimal ( /* set prices using matches at position = cur */ { int matchNb; for (matchNb = 0; matchNb < nb_matches; matchNb++) { - int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; int const matchML = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? matches[matchNb].len : (int)(LZ4_OPT_NUM - cur); + int ml = (matchNb>0) ? matches[matchNb-1].len+1 : MINMATCH; for ( ; ml <= matchML ; ml++) { int const pos = cur + ml; @@ -341,19 +372,18 @@ static int LZ4HC_compress_optimal ( encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(cur < LZ4_OPT_NUM); assert(last_match_pos >= 1); /* == 1 when only one candidate */ - opt[0].mlen = 1; { int candidate_pos = cur; int selected_matchLength = best_mlen; int selected_offset = best_off; while (1) { /* from end to beginning */ - int const next_matchLength = opt[candidate_pos].mlen; + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ int const next_offset = opt[candidate_pos].off; - assert(next_matchLength > 0); /* note : can be 1, means literal */ opt[candidate_pos].mlen = selected_matchLength; opt[candidate_pos].off = selected_offset; selected_matchLength = next_matchLength; selected_offset = next_offset; if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ candidate_pos -= next_matchLength; } } @@ -362,7 +392,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ while (rPos < last_match_pos) { int const ml = opt[rPos].mlen; int const offset = opt[rPos].off; - if (ml == 1) { ip++; rPos++; continue; } /* literal */ + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ rPos += ml; assert(ml >= MINMATCH); assert((offset >= 1) && (offset <=65535)); @@ -374,7 +404,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ /* Encode Last Literals */ { int lastRun = (int)(iend - anchor); if ( (limit) - && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) + && (((char*)op - dst) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)dstCapacity)) return 0; /* Check output limit */ if (lastRun >= (int)RUN_MASK) { *op++=(RUN_MASK<