From 2620c0924bd69f7fc30cd76239d3808b3b5c5e6d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 13 Sep 2022 16:49:48 -0700 Subject: remove another usage of base within `LZ4_loadDict()` function : it's possible to only employ Indexes directly. --- lib/lz4.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 3d5fba6..ee6d209 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1222,6 +1222,7 @@ _next_match: if (dictDirective == usingDictCtx) { if (matchIndex < startIndex) { /* there was no match, try the dictionary */ + assert(tableType == byU32); matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); match = dictBase + matchIndex; lowLimit = dictionary; /* required for match length counter */ @@ -1542,7 +1543,7 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) const tableType_t tableType = byU32; const BYTE* p = (const BYTE*)dictionary; const BYTE* const dictEnd = p + dictSize; - const BYTE* base; + U32 idx32; DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); @@ -1565,14 +1566,15 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) } if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; - base = dictEnd - dict->currentOffset; dict->dictionary = p; dict->dictSize = (U32)(dictEnd - p); dict->tableType = (U32)tableType; + idx32 = dict->currentOffset - dict->dictSize; while (p <= dictEnd-HASH_UNIT) { - LZ4_putPosition(p, dict->hashTable, tableType, base); - p+=3; + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); + p+=3; idx32+=3; } return (int)dict->dictSize; -- cgit v0.12 From 2c8fd1142a9bf025d14be732be00f34ad33af131 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 13 Sep 2022 17:34:08 -0700 Subject: removed a few more usages of base ptr by making LZ4_putPosition() specific to byPtr strategy. byU32 and byU16 use LZ4_putIndexOnHash() instead. In both cases, it makes it irrelevant to pass` base as reference ptr. --- lib/lz4.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index ee6d209..955e864 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -804,22 +804,16 @@ LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableT } LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, - void* tableBase, tableType_t const tableType, - const BYTE* srcBase) + void* tableBase, tableType_t const tableType) { - switch (tableType) - { - case clearedTable: { /* illegal! */ assert(0); return; } - case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } - } + assert(tableType == byPtr); (void)tableType; + { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } } -LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType) { U32 const h = LZ4_hashPosition(p, tableType); - LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); + LZ4_putPositionOnHash(p, h, tableBase, tableType); } /* LZ4_getIndexOnHash() : @@ -979,7 +973,12 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( if (inputSizehashTable, tableType, base); + { U32 const h = LZ4_hashPosition(ip, tableType); + if (tableType == byPtr) { + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType); + } else { + LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType); + } } ip++; forwardH = LZ4_hashPosition(ip, tableType); /* Main Loop */ @@ -1004,7 +1003,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType); forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType); } while ( (match+LZ4_DISTANCE_MAX < ip) || (LZ4_read32(match) != LZ4_read32(ip)) ); @@ -1202,13 +1201,19 @@ _next_match: if (ip >= mflimitPlusOne) break; /* Fill table */ - LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); + { U32 const h = LZ4_hashPosition(ip-2, tableType); + if (tableType == byPtr) { + LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, tableType); + } else { + U32 const idx = (U32)((ip-2) - base); + LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType); + } } /* Test next position */ if (tableType == byPtr) { match = LZ4_getPosition(ip, cctx->hashTable, tableType); - LZ4_putPosition(ip, cctx->hashTable, tableType, base); + LZ4_putPosition(ip, cctx->hashTable, tableType); if ( (match+LZ4_DISTANCE_MAX >= ip) && (LZ4_read32(match) == LZ4_read32(ip)) ) { token=op++; *token=0; goto _next_match; } -- cgit v0.12 From 7f54a56413c3e09e19925c93cf248b5a9b4acf48 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 13 Sep 2022 18:17:53 -0700 Subject: fixed minor UB warning now, even intermediate ptr arithmetic results can be UB ?? --- lib/lz4hc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index fa3e014..446f55d 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -312,7 +312,7 @@ LZ4HC_InsertAndGetWiderMatch ( matchLength -= back; if (matchLength > longest) { longest = matchLength; - *matchpos = prefixPtr - prefixIdx + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ + *matchpos = prefixPtr + matchIndex - prefixIdx + back; /* virtual pos, relative to ip, to retrieve offset */ *startpos = ip + back; } } } @@ -437,7 +437,7 @@ LZ4HC_InsertAndGetWiderMatch ( mlt -= back; if (mlt > longest) { longest = mlt; - *matchpos = prefixPtr - prefixIdx + matchIndex + back; + *matchpos = prefixPtr - (prefixIdx - matchIndex) + back; *startpos = ip + back; DEBUGLOG(8, "found match of length %i at vPos=%i", longest, (int)matchIndex - (int)prefixIdx + back); } } @@ -803,16 +803,17 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, const HCfavor_e favorDecSpeed); -LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) +LZ4_FORCE_INLINE int +LZ4HC_compress_generic_internal ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) { typedef enum { lz4hc, lz4opt } lz4hc_strat_e; typedef struct { -- cgit v0.12