From 3ad3b0f850ed3bcf5f3faafa414f21f84706461a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 11 Feb 2018 01:42:12 -0800 Subject: slightly improved decompression speed (~+1-2%) by making shortcut slightly more common --- lib/lz4.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 7bf8677..6e5e9f5 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1220,10 +1220,13 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( size_t const ll = token >> ML_BITS; size_t const off = LZ4_readLE16(ip+ll); const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */ - if ((off >= 18) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) { + if ((off >= 8) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) { size_t const ml = (token & ML_MASK) + MINMATCH; memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/; - memcpy(op, matchPtr, 18); op += ml; + memcpy(op+ 0, matchPtr+ 0, 8); + memcpy(op+ 8, matchPtr+ 8, 8); + memcpy(op+16, matchPtr+16, 2); + op += ml; continue; } } -- cgit v0.12 From 219abab74bf8914d3f8761db5b398103cbbd77d7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 11 Feb 2018 22:20:09 -0800 Subject: removed LZ4_copy8 better use memcpy() directly --- lib/lz4.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 6e5e9f5..5d4bb21 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -270,11 +270,6 @@ static void LZ4_writeLE16(void* memPtr, U16 value) } } -static void LZ4_copy8(void* dst, const void* src) -{ - memcpy(dst,src,8); -} - /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ LZ4_FORCE_O2_INLINE_GCC_PPC64LE void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) @@ -283,7 +278,7 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) const BYTE* s = (const BYTE*)srcPtr; BYTE* const e = (BYTE*)dstEnd; - do { LZ4_copy8(d,s); d+=8; s+=8; } while (d= 8) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) { size_t const ml = (token & ML_MASK) + MINMATCH; memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/; - memcpy(op+ 0, matchPtr+ 0, 8); - memcpy(op+ 8, matchPtr+ 8, 8); - memcpy(op+16, matchPtr+16, 2); + memcpy(op + 0, matchPtr + 0, 8); + memcpy(op + 8, matchPtr + 8, 8); + memcpy(op +16, matchPtr +16, 2); op += ml; continue; } @@ -1316,7 +1311,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( match += inc32table[offset]; memcpy(op+4, match, 4); match -= dec64table[offset]; - } else { LZ4_copy8(op, match); match+=8; } + } else { memcpy(op, match, 8); match+=8; } op += 8; if (unlikely(cpy>oend-12)) { @@ -1329,7 +1324,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( } while (op16) LZ4_wildCopy(op+8, match+8, cpy); } op = cpy; /* correction */ -- cgit v0.12