From d51f0466289d9a021291e736b463cf8de7bd60bd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 6 Nov 2017 15:42:50 -0800 Subject: 2-stages LZ4_count separate first branch from the rest of the compare loop to get dedicated prediction. measured a 3-4% compression speed improvement. --- lib/lz4.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/lz4.c b/lib/lz4.c index 64a2e82..ff6496c 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -407,7 +407,15 @@ static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLi { const BYTE* const pStart = pIn; - while (likely(pIn Date: Mon, 6 Nov 2017 17:29:27 -0800 Subject: added LZ4_FORCEINLINE to counter gcc regression as recommended by @terrelln --- lib/lz4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/lz4.c b/lib/lz4.c index ff6496c..6157285 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -403,7 +403,8 @@ static unsigned LZ4_NbCommonBytes (REGISTER reg_t val) } #define STEPSIZE sizeof(reg_t) -static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) { const BYTE* const pStart = pIn; -- cgit v0.12