diff options
author | Dave Watson <davejwatson@fb.com> | 2019-01-24 22:17:24 (GMT) |
---|---|---|
committer | Dave Watson <davejwatson@fb.com> | 2019-02-08 21:57:23 (GMT) |
commit | 5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a (patch) | |
tree | e4f93476a19502e37d5f32e07cd5f812869715a6 /lib | |
parent | 28356e02ad6f6dac529302cedf707712c5b628fe (diff) | |
download | lz4-5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a.zip lz4-5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a.tar.gz lz4-5dfa7d422ba6c184a7c7694f56bcd36e38e5ed1a.tar.bz2 |
decompress_generic: optimize match copy
Add an LZ4_wildCopy16, that will wildcopy, potentially smashing up
to 16 bytes, and use it for match copy. On x64, this avoids many
blocked loads due to store forwarding, similar to issue #411.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lz4.c | 51 |
1 files changed, 28 insertions, 23 deletions
@@ -297,6 +297,16 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) do { memcpy(d,s,8); d+=8; s+=8; } while (d<e); } +/* customized variant of memcpy, which can overwrite up to 16 bytes beyond dstEnd */ +LZ4_FORCE_O2_INLINE_GCC_PPC64LE +void LZ4_wildCopy16(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { memcpy(d,s,16); d+=16; s+=16; } while (d<e); +} /*-************************************ * Common Constants @@ -1627,33 +1637,28 @@ LZ4_decompress_generic( continue; } - if (unlikely(offset<8)) { - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += inc32table[offset]; - memcpy(op+4, match, 4); - match -= dec64table[offset]; - } else { - memcpy(op, match, 8); - match += 8; - } - op += 8; - - if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { - BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); - if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ - if (op < oCopyLimit) { - LZ4_wildCopy(op, match, oCopyLimit); - match += oCopyLimit - op; - op = oCopyLimit; + if (unlikely(offset<16)) { + if (offset < 8) { + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + memcpy(op+4, match, 4); + match -= dec64table[offset]; + op += 8; + } else { + memcpy(op, match, 8); + op += 8; + match += 8; } - while (op < cpy) *op++ = *match++; - } else { + memcpy(op, match, 8); if (length > 16) LZ4_wildCopy(op+8, match+8, cpy); + } else { + LZ4_wildCopy16(op, match, cpy); } + op = cpy; /* wildcopy correction */ } |