From 952942d03297ab42512bc084b008a1df5b7a1306 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 29 Sep 2022 18:29:03 -0700 Subject: LZ4 HC matchfinder returns an offset value instead of a virtual past pointer. --- lib/lz4hc.c | 101 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 88fbdc7..34ec97c 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -239,13 +239,17 @@ static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; -LZ4_FORCE_INLINE int +typedef struct { + int off; + int len; +} LZ4HC_match_t; + +LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_InsertAndGetWiderMatch ( LZ4HC_CCtx_internal* const hc4, const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, int longest, - const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts, const int patternAnalysis, const int chainSwap, @@ -270,20 +274,24 @@ LZ4HC_InsertAndGetWiderMatch ( U32 matchIndex; repeat_state_e repeat = rep_untested; size_t srcPatternLength = 0; + int offset = 0; DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + assert(startpos != NULL); + *startpos = ip; /* in case there is no solution */ /* First Match */ LZ4HC_Insert(hc4, ip); matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", - matchIndex, lowestMatchIndex); + DEBUGLOG(7, "First candidate match for pos %u found at index %u / %u (lowestMatchIndex)", + ipIndex, matchIndex, lowestMatchIndex); while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { int matchLength=0; nbAttempts--; assert(matchIndex < ipIndex); if (favorDecSpeed && (ipIndex - matchIndex < 8)) { - /* do nothing */ + /* do nothing: + * favorDecSpeed intentionally skips matches with offset < 8 */ } else if (matchIndex >= prefixIdx) { /* within current Prefix */ const BYTE* const matchPtr = prefixPtr + matchIndex - prefixIdx; assert(matchPtr < ip); @@ -295,10 +303,11 @@ LZ4HC_InsertAndGetWiderMatch ( matchLength -= back; if (matchLength > longest) { longest = matchLength; - *matchpos = matchPtr + back; + offset = (int)(ipIndex - matchIndex); *startpos = ip + back; + DEBUGLOG(7, "Found match of len=%i within prefix, offset=%i, back=%i", longest, offset, -back); } } } - } else { /* lowestMatchIndex <= matchIndex < dictLimit */ + } else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */ const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx); assert(matchIndex >= dictIdx); if ( likely(matchIndex <= prefixIdx - 4) @@ -313,8 +322,9 @@ LZ4HC_InsertAndGetWiderMatch ( matchLength -= back; if (matchLength > longest) { longest = matchLength; - *matchpos = prefixPtr + matchIndex - prefixIdx + back; /* virtual pos, relative to ip, to retrieve offset */ + offset = (int)(ipIndex - matchIndex); *startpos = ip + back; + DEBUGLOG(7, "Found match of len=%i within dict, offset=%i, back=%i", longest, offset, -back); } } } if (chainSwap && matchLength==longest) { /* better match => select a better chain */ @@ -347,6 +357,7 @@ LZ4HC_InsertAndGetWiderMatch ( if (repeat == rep_untested) { if ( ((pattern & 0xFFFF) == (pattern >> 16)) & ((pattern & 0xFF) == (pattern >> 24)) ) { + DEBUGLOG(7, "Repeat pattern detected, char %02X", pattern >> 24); repeat = rep_confirmed; srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); } else { @@ -401,8 +412,9 @@ LZ4HC_InsertAndGetWiderMatch ( if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break; assert(maxML < 2 GB); longest = (int)maxML; - *matchpos = prefixPtr - prefixIdx + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ + offset = (int)(ipIndex - matchIndex); *startpos = ip; + DEBUGLOG(7, "Found repeat pattern match of len=%i, offset=%i", longest, offset); } { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); if (distToNextPattern > matchIndex) break; /* avoid overflow */ @@ -438,9 +450,9 @@ LZ4HC_InsertAndGetWiderMatch ( mlt -= back; if (mlt > longest) { longest = mlt; - *matchpos = prefixPtr - (prefixIdx - matchIndex) + back; + offset = (int)(ipIndex - matchIndex); *startpos = ip + back; - DEBUGLOG(8, "found match of length %i at vPos=%i", longest, (int)matchIndex - (int)prefixIdx + back); + DEBUGLOG(7, "found match of length %i within extDictCtx", longest); } } { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); @@ -448,13 +460,17 @@ LZ4HC_InsertAndGetWiderMatch ( matchIndex -= nextOffset; } } } - return longest; + { LZ4HC_match_t md; + assert(longest >= 0); + md.len = longest; + md.off = offset; + return md; + } } -LZ4_FORCE_INLINE int +LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, - const BYTE** matchpos, const int maxNbAttempts, const int patternAnalysis, const dictCtx_directive dict) @@ -464,7 +480,7 @@ LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table wi /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); } /* LZ4HC_encodeSequence() : @@ -597,22 +613,28 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( /* Main Loop */ while (ip <= mflimit) { - ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict); + { LZ4HC_match_t const md = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, maxNbAttempts, patternAnalysis, dict); + ml = md.len; + ref = ip - md.off; + } if (ml encode ML1 */ + if (ml2 <= ml) { /* No better match => encode ML1 immediately */ optr = op; if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; continue; @@ -627,19 +649,17 @@ _Search2: if ((start2 - ip) < 3) { /* First Match too small : removed */ ml = ml2; ip = start2; - ref =ref2; + ref = ref2; goto _Search2; } _Search3: - /* At this stage, we have : - * ml2 > ml1, and - * ip1+3 <= ip2 (usually < ip1+ml1) */ if ((start2 - ip) < OPTIMAL_ML) { int correction; int new_ml = ml; if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + if (ip+new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; correction = new_ml - (int)(start2 - ip); if (correction > 0) { start2 += correction; @@ -647,22 +667,24 @@ _Search3: ml2 -= correction; } } - /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ if (start2 + ml2 <= mflimit) { - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, - start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, + LZ4HC_match_t const md = LZ4HC_InsertAndGetWiderMatch(ctx, + start2 + ml2 - 3, start2, matchlimit, ml2, &start3, maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); + ml3 = md.len; + ref3 = start3 - md.off; } else { ml3 = ml2; } - if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ + if (ml3 <= ml2) { /* No better match => encode ML1 and ML2 */ /* ip & ref are known; Now for ml */ if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) + goto _dest_overflow; ip = start2; optr = op; if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) { @@ -1308,10 +1330,6 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) } -typedef struct { - int off; - int len; -} LZ4HC_match_t; LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, @@ -1320,19 +1338,16 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { - LZ4HC_match_t match = { 0 , 0 }; - const BYTE* matchPtr = NULL; + LZ4HC_match_t const match0 = { 0 , 0 }; /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, - * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); - if (matchLength <= minLen) return match; + ** so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + LZ4HC_match_t md = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); + if (md.len <= minLen) return match0; if (favorDecSpeed) { - if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ + if ((md.len>18) & (md.len<=36)) md.len=18; /* favor shortcut */ } - match.len = matchLength; - match.off = (int)(ip-matchPtr); - return match; + return md; } -- cgit v0.12 From a0adc616cddfc2b9ecf0fb0c1af7b43edcb4bd42 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 29 Sep 2022 18:36:22 -0700 Subject: sequence encoder accepts offset as a value instead of a virtual pointer --- lib/lz4hc.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 34ec97c..589e611 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -491,7 +491,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( BYTE** _op, const BYTE** _anchor, int matchLength, - const BYTE* const match, + int offset, limitedOutput_directive limit, BYTE* oend) { @@ -512,9 +512,9 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( U32 const cost = 1 + llAdd + ll + 2 + mlAdd; if (start==NULL) start = anchor; /* only works for single segment */ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ - DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u", + DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u", pos, - (U32)(ip - anchor), matchLength, (U32)(ip-match), + (U32)(ip - anchor), matchLength, offset, cost, totalCost); totalCost += cost; #endif @@ -542,8 +542,9 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( op += length; /* Encode Offset */ - assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ - LZ4_writeLE16(op, (U16)(ip - match)); op += 2; + assert(offset <= LZ4_DISTANCE_MAX ); + assert(offset > 0); + LZ4_writeLE16(op, (U16)(offset)); op += 2; /* Encode MatchLength */ assert(matchLength >= MINMATCH); @@ -636,7 +637,7 @@ _Search2: if (ml2 <= ml) { /* No better match => encode ML1 immediately */ optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) goto _dest_overflow; continue; } @@ -683,11 +684,11 @@ _Search3: if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) goto _dest_overflow; ip = start2; optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) { + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, (int)(ip - ref2), limit, oend)) { ml = ml2; ref = ref2; goto _dest_overflow; @@ -710,7 +711,7 @@ _Search3: } optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) goto _dest_overflow; ip = start3; ref = ref3; ml = ml3; @@ -748,7 +749,7 @@ _Search3: } } optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) goto _dest_overflow; /* ML2 becomes ML1 */ ip = start2; ref = ref2; ml = ml2; @@ -808,7 +809,7 @@ _dest_overflow: assert(maxMlSize < INT_MAX); assert(ml >= 0); if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), notLimited, oend); } } goto _last_literals; } @@ -1405,7 +1406,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, int const firstML = firstMatch.len; const BYTE* const matchPos = ip - firstMatch.off; opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */ + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, firstMatch.off, limit, oend) ) { /* updates ip, op and anchor */ ovml = firstML; ovref = matchPos; goto _dest_overflow; @@ -1581,7 +1582,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(ml >= MINMATCH); assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */ + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset, limit, oend) ) { /* updates ip, op and anchor */ ovml = ml; ovref = ip - offset; goto _dest_overflow; @@ -1642,7 +1643,7 @@ if (limit == fillOutput) { if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, (int)(ip - ovref), notLimited, oend); DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); } } goto _last_literals; -- cgit v0.12 From 0a2e406d82b5fbef988a2880a7f7e18e33622394 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 29 Sep 2022 18:42:46 -0700 Subject: removed virtual match pointer from HC parser replaced by direct offset values. --- lib/lz4hc.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 589e611..bdd4d1e 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -599,12 +599,12 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( int ml0, ml, ml2, ml3; const BYTE* start0; - const BYTE* ref0; - const BYTE* ref = NULL; + int offset0; + int offset1; const BYTE* start2 = NULL; - const BYTE* ref2 = NULL; + int offset2; const BYTE* start3 = NULL; - const BYTE* ref3 = NULL; + int offset3; /* init */ DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict); @@ -616,12 +616,12 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( while (ip <= mflimit) { { LZ4HC_match_t const md = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, maxNbAttempts, patternAnalysis, dict); ml = md.len; - ref = ip - md.off; + offset1 = md.off; } if (ml encode ML1 immediately */ optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) goto _dest_overflow; continue; } if (start0 < ip) { /* first match was skipped at least once */ if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ - ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ + ip = start0; offset1 = offset0; ml = ml0; /* restore initial ML1 */ } } /* Here, start0==ip */ if ((start2 - ip) < 3) { /* First Match too small : removed */ ml = ml2; ip = start2; - ref = ref2; + offset1 = offset2; goto _Search2; } @@ -664,7 +664,6 @@ _Search3: correction = new_ml - (int)(start2 - ip); if (correction > 0) { start2 += correction; - ref2 += correction; ml2 -= correction; } } @@ -674,7 +673,7 @@ _Search3: start2 + ml2 - 3, start2, matchlimit, ml2, &start3, maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); ml3 = md.len; - ref3 = start3 - md.off; + offset3 = md.off; } else { ml3 = ml2; } @@ -684,13 +683,13 @@ _Search3: if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) goto _dest_overflow; ip = start2; optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, (int)(ip - ref2), limit, oend)) { + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, offset2, limit, oend)) { ml = ml2; - ref = ref2; + offset1 = offset2; goto _dest_overflow; } continue; @@ -701,29 +700,28 @@ _Search3: if (start2 < ip+ml) { int correction = (int)(ip+ml - start2); start2 += correction; - ref2 += correction; ml2 -= correction; if (ml2 < MINMATCH) { start2 = start3; - ref2 = ref3; + offset2 = offset3; ml2 = ml3; } } optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) goto _dest_overflow; ip = start3; - ref = ref3; + offset1 = offset3; ml = ml3; start0 = start2; - ref0 = ref2; + offset0 = offset2; ml0 = ml2; goto _Search2; } start2 = start3; - ref2 = ref3; + offset2 = offset3; ml2 = ml3; goto _Search3; } @@ -741,7 +739,6 @@ _Search3: correction = ml - (int)(start2 - ip); if (correction > 0) { start2 += correction; - ref2 += correction; ml2 -= correction; } } else { @@ -749,13 +746,13 @@ _Search3: } } optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) goto _dest_overflow; /* ML2 becomes ML1 */ - ip = start2; ref = ref2; ml = ml2; + ip = start2; offset1 = offset2; ml = ml2; /* ML3 becomes ML2 */ - start2 = start3; ref2 = ref3; ml2 = ml3; + start2 = start3; offset2 = offset3; ml2 = ml3; /* let's find a new ML3 */ goto _Search3; @@ -809,7 +806,7 @@ _dest_overflow: assert(maxMlSize < INT_MAX); assert(ml >= 0); if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, (int)(ip - ref), notLimited, oend); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, notLimited, oend); } } goto _last_literals; } -- cgit v0.12 From 2fefb1da619898d996c5a75f98fc1e8cdf28f31c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 29 Sep 2022 18:48:03 -0700 Subject: removed virtual pointer from optimal parser replaced by direct offset value. this virtual pointer was only used in rare _dstSize scenario. --- lib/lz4hc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index bdd4d1e..134d850 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -1378,7 +1378,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, BYTE* opSaved = (BYTE*) dst; BYTE* oend = op + dstCapacity; int ovml = MINMATCH; /* overflow - last sequence */ - const BYTE* ovref = NULL; + int ovoff = 0; /* init */ #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 @@ -1401,11 +1401,10 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, if ((size_t)firstMatch.len > sufficient_len) { /* good enough solution : immediate encoding */ int const firstML = firstMatch.len; - const BYTE* const matchPos = ip - firstMatch.off; opSaved = op; if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, firstMatch.off, limit, oend) ) { /* updates ip, op and anchor */ ovml = firstML; - ovref = matchPos; + ovoff = firstMatch.off; goto _dest_overflow; } continue; @@ -1581,7 +1580,7 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */ opSaved = op; if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset, limit, oend) ) { /* updates ip, op and anchor */ ovml = ml; - ovref = ip - offset; + ovoff = offset; goto _dest_overflow; } } } } /* while (ip <= mflimit) */ @@ -1640,7 +1639,7 @@ if (limit == fillOutput) { if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, (int)(ip - ovref), notLimited, oend); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovoff, notLimited, oend); DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); } } goto _last_literals; -- cgit v0.12 From a3d1762092c624732eb93a1e28c1abdb7566fb87 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 30 Sep 2022 18:00:28 -0700 Subject: use LZ4HC_match_t structure directly to store match candidates this formalized better the coupling between match length and match offset which were 2 separated variables before. --- lib/lz4hc.c | 121 ++++++++++++++++++++++++++---------------------------------- 1 file changed, 53 insertions(+), 68 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 134d850..cf77a0a 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -597,14 +597,11 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( BYTE* op = (BYTE*) dest; BYTE* oend = op + maxOutputSize; - int ml0, ml, ml2, ml3; const BYTE* start0; - int offset0; - int offset1; const BYTE* start2 = NULL; - int offset2; const BYTE* start3 = NULL; - int offset3; + LZ4HC_match_t m0, m1, m2, m3; + const LZ4HC_match_t nomatch = {0, 0}; /* init */ DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict); @@ -614,115 +611,102 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( /* Main Loop */ while (ip <= mflimit) { - { LZ4HC_match_t const md = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, maxNbAttempts, patternAnalysis, dict); - ml = md.len; - offset1 = md.off; - } - if (ml encode ML1 immediately */ + if (m2.len <= m1.len) { /* No better match => encode ML1 immediately */ optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, limit, oend)) goto _dest_overflow; continue; } if (start0 < ip) { /* first match was skipped at least once */ - if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ - ip = start0; offset1 = offset0; ml = ml0; /* restore initial ML1 */ + if (start2 < ip + m0.len) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; m1 = m0; /* restore initial Match1 */ } } /* Here, start0==ip */ if ((start2 - ip) < 3) { /* First Match too small : removed */ - ml = ml2; ip = start2; - offset1 = offset2; + m1 = m2; goto _Search2; } _Search3: if ((start2 - ip) < OPTIMAL_ML) { int correction; - int new_ml = ml; + int new_ml = m1.len; if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) - new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + if (ip+new_ml > start2 + m2.len - MINMATCH) + new_ml = (int)(start2 - ip) + m2.len - MINMATCH; correction = new_ml - (int)(start2 - ip); if (correction > 0) { start2 += correction; - ml2 -= correction; + m2.len -= correction; } } - if (start2 + ml2 <= mflimit) { - LZ4HC_match_t const md = LZ4HC_InsertAndGetWiderMatch(ctx, - start2 + ml2 - 3, start2, matchlimit, ml2, &start3, + if (start2 + m2.len <= mflimit) { + m3 = LZ4HC_InsertAndGetWiderMatch(ctx, + start2 + m2.len - 3, start2, matchlimit, m2.len, &start3, maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); - ml3 = md.len; - offset3 = md.off; } else { - ml3 = ml2; + m3 = nomatch; /* do not search further */ } - if (ml3 <= ml2) { /* No better match => encode ML1 and ML2 */ + if (m3.len <= m2.len) { /* No better match => encode ML1 and ML2 */ /* ip & ref are known; Now for ml */ - if (start2 < ip+ml) ml = (int)(start2 - ip); + if (start2 < ip+m1.len) m1.len = (int)(start2 - ip); /* Now, encode 2 sequences */ optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, limit, oend)) goto _dest_overflow; ip = start2; optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, offset2, limit, oend)) { - ml = ml2; - offset1 = offset2; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m2.len, m2.off, limit, oend)) { + m1 = m2; goto _dest_overflow; } continue; } - if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ - if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ - if (start2 < ip+ml) { - int correction = (int)(ip+ml - start2); + if (start3 < ip+m1.len+3) { /* Not enough space for match 2 : remove it */ + if (start3 >= (ip+m1.len)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + if (start2 < ip+m1.len) { + int correction = (int)(ip+m1.len - start2); start2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) { + m2.len -= correction; + if (m2.len < MINMATCH) { start2 = start3; - offset2 = offset3; - ml2 = ml3; + m2 = m3; } } optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, limit, oend)) goto _dest_overflow; ip = start3; - offset1 = offset3; - ml = ml3; + m1 = m3; start0 = start2; - offset0 = offset2; - ml0 = ml2; + m0 = m2; goto _Search2; } start2 = start3; - offset2 = offset3; - ml2 = ml3; + m2 = m3; goto _Search3; } @@ -731,28 +715,29 @@ _Search3: * let's write the first one ML1. * ip & ref are known; Now decide ml. */ - if (start2 < ip+ml) { + if (start2 < ip+m1.len) { if ((start2 - ip) < OPTIMAL_ML) { int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); + if (m1.len > OPTIMAL_ML) m1.len = OPTIMAL_ML; + if (ip + m1.len > start2 + m2.len - MINMATCH) + m1.len = (int)(start2 - ip) + m2.len - MINMATCH; + correction = m1.len - (int)(start2 - ip); if (correction > 0) { start2 += correction; - ml2 -= correction; + m2.len -= correction; } } else { - ml = (int)(start2 - ip); + m1.len = (int)(start2 - ip); } } optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, limit, oend)) goto _dest_overflow; /* ML2 becomes ML1 */ - ip = start2; offset1 = offset2; ml = ml2; + ip = start2; m1 = m2; /* ML3 becomes ML2 */ - start2 = start3; offset2 = offset3; ml2 = ml3; + start2 = start3; m2 = m3; /* let's find a new ML3 */ goto _Search3; @@ -803,10 +788,10 @@ _dest_overflow: /* ll validated; now adjust match length */ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); - assert(maxMlSize < INT_MAX); assert(ml >= 0); - if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; - if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset1, notLimited, oend); + assert(maxMlSize < INT_MAX); assert(m1.len >= 0); + if ((size_t)m1.len > maxMlSize) m1.len = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + m1.len >= MFLIMIT) { + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, notLimited, oend); } } goto _last_literals; } -- cgit v0.12 From 68848ec601b96c0d9f86e91585309b2738d2c48c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 30 Sep 2022 18:06:53 -0700 Subject: fix another ubsan warning in lz4hc because ubsan complains even about intermediate pointer arithmetic results (in a simple linear formula with 3 factors for example). use parenthesis to make sure calculations go directly from one valid memory address to another valid memory address value. --- lib/lz4hc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index cf77a0a..33b5de4 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -366,7 +366,7 @@ LZ4HC_InsertAndGetWiderMatch ( if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) { const int extDict = matchCandidateIdx < prefixIdx; - const BYTE* const matchPtr = (extDict ? dictStart - dictIdx : prefixPtr - prefixIdx) + matchCandidateIdx; + const BYTE* const matchPtr = extDict ? dictStart + (matchCandidateIdx - dictIdx) : prefixPtr + (matchCandidateIdx - prefixIdx); if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ const BYTE* const iLimit = extDict ? dictEnd : iHighLimit; size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); -- cgit v0.12