diff options
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | lib/lz4.c | 62 |
2 files changed, 53 insertions, 15 deletions
@@ -57,12 +57,12 @@ all: allmost manuals .PHONY: allmost allmost: lib lz4 examples -.PHONY: lib lib-release -lib lib-release: +.PHONY: lib lib-release liblz4.a +lib lib-release liblz4.a: @$(MAKE) -C $(LZ4DIR) $@ .PHONY: lz4 lz4-release -lz4 : lib +lz4 : liblz4.a lz4-release : lib-release lz4 lz4-release : @$(MAKE) -C $(PRGDIR) $@ @@ -85,6 +85,17 @@ #endif +/* + * register is ignored when the code built with a C++-17 compiler + * Remove the keyword when built with C++-17 to silent the warning + */ +#if defined(__cplusplus) && __cplusplus > 201402L +# define REGISTER +#else +# define REGISTER register +#endif + + /*-************************************ * Dependency **************************************/ @@ -339,7 +350,7 @@ static int g_debuglog_enable = 1; /*-************************************ * Common functions **************************************/ -static unsigned LZ4_NbCommonBytes (register reg_t val) +static unsigned LZ4_NbCommonBytes (REGISTER reg_t val) { if (LZ4_isLittleEndian()) { if (sizeof(val)==8) { @@ -401,11 +412,20 @@ static unsigned LZ4_NbCommonBytes (register reg_t val) } #define STEPSIZE sizeof(reg_t) -static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) { const BYTE* const pStart = pIn; - while (likely(pIn<pInLimit-(STEPSIZE-1))) { + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } pIn += LZ4_NbCommonBytes(diff); @@ -1156,7 +1176,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( int partialDecoding, /* full, partial */ int targetOutputSize, /* only used if partialDecoding==partial */ int dict, /* noDict, withPrefix64k, usingExtDict */ - const BYTE* const lowPrefix, /* == dst when no prefix */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ const BYTE* const dictStart, /* only if dict==usingExtDict */ const size_t dictSize /* note : = 0 if noDict */ ) @@ -1170,15 +1190,15 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( BYTE* oexit = op + targetOutputSize; const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; - const unsigned dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; - const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; + const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; + const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; const int safeDecode = (endOnInput==endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); /* Special cases */ - if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ + if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */ if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); @@ -1188,8 +1208,27 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( const BYTE* match; size_t offset; - /* get literal length */ unsigned const token = *ip++; + + /* shortcut for common case : + * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes). + * this shortcut was tested on x86 and x64, where it improves decoding speed. + * it has not yet been benchmarked on ARM, Power, mips, etc. */ + if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend) + & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend)) + & ((token < (15<<ML_BITS)) & ((token & ML_MASK) != 15)) ) { + size_t const ll = token >> ML_BITS; + size_t const off = LZ4_readLE16(ip+ll); + const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */ + if ((off >= 18) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) { + size_t const ml = (token & ML_MASK) + MINMATCH; + memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/; + memcpy(op, matchPtr, 18); op += ml; + continue; + } + } + + /* decode literal length */ if ((length=(token>>ML_BITS)) == RUN_MASK) { unsigned s; do { @@ -1267,14 +1306,13 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /* copy match within block */ cpy = op + length; if (unlikely(offset<8)) { - const int dec64 = dec64table[offset]; op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; op[3] = match[3]; - match += dec32table[offset]; + match += inc32table[offset]; memcpy(op+4, match, 4); - match -= dec64; + match -= dec64table[offset]; } else { LZ4_copy8(op, match); match+=8; } op += 8; @@ -1291,7 +1329,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( LZ4_copy8(op, match); if (length>16) LZ4_wildCopy(op+8, match+8, cpy); } - op=cpy; /* correction */ + op = cpy; /* correction */ } /* end of decoding */ |