diff options
author | Yann Collet <Cyan4973@users.noreply.github.com> | 2018-05-03 18:35:50 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-05-03 18:35:50 (GMT) |
commit | 95607a749b8bbe6f9323408ddd740ef4ff248794 (patch) | |
tree | 18b0d16568a2f6b2e046db16362c215d2346ab9a /lib | |
parent | 85be6b8f6d5a91a8834e913b5f547ded49f7f714 (diff) | |
parent | 2e2c9f6ff353e9f1a4d23274eb4e5b7a5f7d654d (diff) | |
download | lz4-95607a749b8bbe6f9323408ddd740ef4ff248794.zip lz4-95607a749b8bbe6f9323408ddd740ef4ff248794.tar.gz lz4-95607a749b8bbe6f9323408ddd740ef4ff248794.tar.bz2 |
Merge pull request #528 from lz4/complexShortcut
Faster decoding speed
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lz4.c | 101 | ||||
-rw-r--r-- | lib/lz4.h | 56 |
2 files changed, 108 insertions, 49 deletions
@@ -1398,6 +1398,9 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( const int safeDecode = (endOnInput==endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; /* Special cases */ if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */ @@ -1407,39 +1410,56 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /* Main Loop : decode sequences */ while (1) { - size_t length; const BYTE* match; size_t offset; unsigned const token = *ip++; + size_t length = token >> ML_BITS; /* literal length */ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ - /* shortcut for common case : - * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes). - * this shortcut was tested on x86 and x64, where it improves decoding speed. - * it has not yet been benchmarked on ARM, Power, mips, etc. - * NOTE: The loop begins with a read, so we must have one byte left at the end. */ - if (endOnInput - && ((ip + 14 /*maxLL*/ + 2 /*offset*/ < iend) - & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend) - & (token < (15<<ML_BITS)) - & ((token & ML_MASK) != 15) ) ) { - size_t const ll = token >> ML_BITS; - size_t const off = LZ4_readLE16(ip+ll); - const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */ - if ((off >= 8) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) { - size_t const ml = (token & ML_MASK) + MINMATCH; - memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/; - memcpy(op + 0, matchPtr + 0, 8); - memcpy(op + 8, matchPtr + 8, 8); - memcpy(op +16, matchPtr +16, 2); - op += ml; + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (endOnInput ? length != RUN_MASK : length <= 8) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { + /* Copy the literals */ + memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + memcpy(op + 0, match + 0, 8); + memcpy(op + 8, match + 8, 8); + memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ continue; } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; } /* decode literal length */ - if ((length=(token>>ML_BITS)) == RUN_MASK) { + if (length == RUN_MASK) { unsigned s; if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error; /* overflow detection */ do { @@ -1473,11 +1493,14 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /* get offset */ offset = LZ4_readLE16(ip); ip+=2; match = op - offset; - if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ - LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */ /* get matchlength */ length = token & ML_MASK; + +_copy_match: + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */ + if (length == ML_MASK) { unsigned s; do { @@ -1664,12 +1687,11 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) return 0; } -/*! - * LZ4_setStreamDecode() : - * Use this function to instruct where to find the dictionary. - * This function is not necessary if previous data is still available where it was decoded. - * Loading a size of 0 is allowed (same effect as no dictionary). - * Return : 1 if OK, 0 if error +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error */ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) { @@ -1681,6 +1703,25 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti return 1; } +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other. + * When not enough space remains for next block (remainingSize < maxBlockSize), + * decoding resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + /* *_continue() : These decoding functions allow decompression of multiple blocks in "streaming" mode. @@ -293,45 +293,62 @@ LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxD * Streaming Decompression Functions * Bufferless synchronous API ************************************************/ -typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* incomplete type (defined later) */ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ /*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : - * creation / destruction of streaming decompression tracking structure. - * A tracking structure can be re-used multiple times sequentially. */ + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); /*! LZ4_setStreamDecode() : - * An LZ4_streamDecode_t structure can be allocated once and re-used multiple times. + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. * Use this function to start decompression of a new stream of blocks. * A dictionary can optionnally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. * @return : 1 if OK, 0 if error */ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); +/*! LZ4_decoderRingBufferSize() : v1.8.2 + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs)) /* for static allocation; mbs presumed valid */ + /*! LZ4_decompress_*_continue() : * These decoding functions allow decompression of consecutive blocks in "streaming" mode. * A block is an unsplittable entity, it must be presented entirely to a decompression function. - * Decompression functions only accept one block at a time. + * Decompression functions only accepts one block at a time. * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. - * If less than 64KB of data has been decoded all the data must be present. + * If less than 64KB of data has been decoded, all the data must be present. * - * Special : if application sets a ring buffer for decompression, it must respect one of the following conditions : + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. - * maxBlockSize is the maximum size of any single block. It is implementation dependent, and can have any value (presumed > 16 bytes). * In which case, encoding and decoding buffers do not need to be synchronized, * and encoding ring buffer can have any size, including small ones ( < 64 KB). - * - Decompression buffer size is _at least_ 64 KB + 8 bytes + maxBlockSize. - * In which case, encoding and decoding buffers do not need to be synchronized, - * and encoding ring buffer can have any size, including larger than decoding buffer. - * - Decompression buffer size is exactly the same as compression buffer size, - * and follows exactly same update rule (block boundaries at same positions). - * If the decoding function is provided with the exact decompressed size of each block, - * then decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). - * If the decoding function only knows the compressed size, - * then buffer size must be a minimum of 64 KB + 8 bytes + maxBlockSize. - * Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, - * and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block. + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. */ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); @@ -341,6 +358,7 @@ LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecod * These decoding functions work the same as * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. */ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); |