summaryrefslogtreecommitdiffstats
path: root/lib/lz4.c
diff options
context:
space:
mode:
authorDave Watson <davejwatson@fb.com>2019-01-24 22:17:24 (GMT)
committerDave Watson <davejwatson@fb.com>2019-02-08 21:57:23 (GMT)
commit5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a (patch)
treee4f93476a19502e37d5f32e07cd5f812869715a6 /lib/lz4.c
parent28356e02ad6f6dac529302cedf707712c5b628fe (diff)
downloadlz4-5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a.zip
lz4-5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a.tar.gz
lz4-5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a.tar.bz2
decompress_generic: optimize match copy
Add an LZ4_wildCopy16, that will wildcopy, potentially smashing up to 16 bytes, and use it for match copy. On x64, this avoids many blocked loads due to store forwarding, similar to issue #411.
Diffstat (limited to 'lib/lz4.c')
-rw-r--r--lib/lz4.c51
1 files changed, 28 insertions, 23 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index a2b49e9..1e938d0 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -297,6 +297,16 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
}
+/* customized variant of memcpy, which can overwrite up to 16 bytes beyond dstEnd */
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+void LZ4_wildCopy16(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+ BYTE* d = (BYTE*)dstPtr;
+ const BYTE* s = (const BYTE*)srcPtr;
+ BYTE* const e = (BYTE*)dstEnd;
+
+ do { memcpy(d,s,16); d+=16; s+=16; } while (d<e);
+}
/*-************************************
* Common Constants
@@ -1627,33 +1637,28 @@ LZ4_decompress_generic(
continue;
}
- if (unlikely(offset<8)) {
- op[0] = match[0];
- op[1] = match[1];
- op[2] = match[2];
- op[3] = match[3];
- match += inc32table[offset];
- memcpy(op+4, match, 4);
- match -= dec64table[offset];
- } else {
- memcpy(op, match, 8);
- match += 8;
- }
- op += 8;
-
- if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
- BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
- if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
- if (op < oCopyLimit) {
- LZ4_wildCopy(op, match, oCopyLimit);
- match += oCopyLimit - op;
- op = oCopyLimit;
+ if (unlikely(offset<16)) {
+ if (offset < 8) {
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += inc32table[offset];
+ memcpy(op+4, match, 4);
+ match -= dec64table[offset];
+ op += 8;
+ } else {
+ memcpy(op, match, 8);
+ op += 8;
+ match += 8;
}
- while (op < cpy) *op++ = *match++;
- } else {
+
memcpy(op, match, 8);
if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
+ } else {
+ LZ4_wildCopy16(op, match, cpy);
}
+
op = cpy; /* wildcopy correction */
}