summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Watson <davejwatson@fb.com>2019-01-26 00:19:05 (GMT)
committerDave Watson <davejwatson@fb.com>2019-02-08 21:57:23 (GMT)
commitfaac110e20bef7c5715c068294d1a6950450fad6 (patch)
tree21eef7a5e6005c02c9298817c5efcb324e8401ad
parent1fbaf843066caec21489d178fc348dbe62939a90 (diff)
downloadlz4-faac110e20bef7c5715c068294d1a6950450fad6.zip
lz4-faac110e20bef7c5715c068294d1a6950450fad6.tar.gz
lz4-faac110e20bef7c5715c068294d1a6950450fad6.tar.bz2
decompress_generic: Unroll loops a bit more
Generally we want our wildcopy loops to look like the memcpy loops from our libc, but without the final byte copy checks. We can unroll a bit to make long copies even faster. The only catch is that this affects the value of FASTLOOP_SAFE_DISTANCE.
-rw-r--r--lib/lz4.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 614df2b..993d746 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -297,15 +297,15 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
}
-/* customized variant of memcpy, which can overwrite up to 16 bytes beyond dstEnd */
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd */
LZ4_FORCE_O2_INLINE_GCC_PPC64LE
-void LZ4_wildCopy16(void* dstPtr, const void* srcPtr, void* dstEnd)
+void LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
{
BYTE* d = (BYTE*)dstPtr;
const BYTE* s = (const BYTE*)srcPtr;
BYTE* const e = (BYTE*)dstEnd;
- do { memcpy(d,s,16); d+=16; s+=16; } while (d<e);
+ do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
}
/*-************************************
@@ -317,7 +317,7 @@ void LZ4_wildCopy16(void* dstPtr, const void* srcPtr, void* dstEnd)
#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
-#define FASTLOOP_SAFE_DISTANCE 32
+#define FASTLOOP_SAFE_DISTANCE 64
static const int LZ4_minLength = (MFLIMIT+1);
#define KB *(1 <<10)
@@ -1559,7 +1559,7 @@ LZ4_decompress_generic(
{
goto safe_literal_copy;
}
- LZ4_wildCopy16(op, ip, cpy);
+ LZ4_wildCopy32(op, ip, cpy);
ip += length; op = cpy;
} else {
cpy = op+length;
@@ -1597,7 +1597,7 @@ LZ4_decompress_generic(
goto safe_match_copy;
}
- /* Fastpath check: Avoids a branch in LZ4_wildCopy16 if true */
+ /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
if (!(dict == usingExtDict) || (match >= lowPrefix)) {
if (offset >= 8) {
memcpy(op, match, 8);
@@ -1661,7 +1661,7 @@ LZ4_decompress_generic(
memcpy(op, match, 8);
if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
} else {
- LZ4_wildCopy16(op, match, cpy);
+ LZ4_wildCopy32(op, match, cpy);
}
op = cpy; /* wildcopy correction */