From c5d6f8a8be3927c0bec91bcc58667a6cfad244ad Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 27 Aug 2020 00:17:57 -0700 Subject: fix #783 LZ4_decompress_safe_partial() now also supports a scenario where nb_bytes_to_generate is <= block_decompressed_size And nb_bytes_to_read is >= block_compressed_size. Previously, the only supported scenario was nb_bytes_to_read == block_compress_size. Pay attention that, if nb_bytes_to_read is > block_compressed_size, then, necessarily, it requires that nb_bytes_to_generate is <= block_decompress_size. If both are larger, it will generate corrupted data. --- lib/lz4.c | 39 +++++++++++++++++++++++---------------- lib/lz4.h | 34 ++++++++++++++++++++++------------ tests/fuzzer.c | 8 ++++---- 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 06d24da..0628eac 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1813,7 +1813,8 @@ LZ4_decompress_generic( if ((dict==usingExtDict) && (match < lowPrefix)) { if (unlikely(op+length > oend-LASTLITERALS)) { if (partialDecoding) { - length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */ + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); } else { goto _output_error; /* end-of-block condition violated */ } } @@ -1921,29 +1922,34 @@ LZ4_decompress_generic( || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { /* We've either hit the input parsing restriction or the output parsing restriction. - * If we've hit the input parsing condition then this must be the last sequence. - * If we've hit the output parsing condition then we are either using partialDecoding - * or we've hit the output parsing condition. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. */ if (partialDecoding) { /* Since we are partial decoding we may be in this block because of the output parsing * restriction, which is not valid since the output buffer is allowed to be undersized. */ assert(endOnInput); - /* If we're in this block because of the input parsing condition, then we must be on the - * last sequence (or invalid), so we must check that we exactly consume the input. + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. */ - if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend) && (cpy != oend)) { goto _output_error; } - assert(ip+length <= iend); - /* We are finishing in the middle of a literals segment. - * Break after the copy. + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. */ if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); } - assert(ip+length <= iend); } else { /* We must be on the last sequence because of the parsing limitations so check * that we exactly regenerate the original size (must be exact when !endOnInput). @@ -1954,14 +1960,15 @@ LZ4_decompress_generic( */ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; } } - memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */ + memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ ip += length; op += length; - /* Necessarily EOF when !partialDecoding. When partialDecoding - * it is EOF if we've either filled the output buffer or hit - * the input parsing restriction. + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. */ - if (!partialDecoding || (cpy == oend) || (ip == iend)) { + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { break; } } else { diff --git a/lib/lz4.h b/lib/lz4.h index 5209c10..5d2475c 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -221,25 +221,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', * into destination buffer 'dst' of size 'dstCapacity'. * Up to 'targetOutputSize' bytes will be decoded. - * The function stops decoding on reaching this objective, - * which can boost performance when only the beginning of a block is required. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. * - * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) * If source stream is detected malformed, function returns a negative result. * - * Note : @return can be < targetOutputSize, if compressed block contains less data. + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. * - * Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, - * and expects targetOutputSize <= dstCapacity. - * It effectively stops decoding on reaching targetOutputSize, + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, * so dstCapacity is kind of redundant. - * This is because in a previous version of this function, - * decoding operation would not "break" a sequence in the middle. - * As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, * it could write more bytes, though only up to dstCapacity. * Some "margin" used to be required for this operation to work properly. - * This is no longer necessary. - * The function nonetheless keeps its signature, in an effort to not break API. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. */ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index cbb53ca..beeb9d6 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -618,13 +618,13 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c /* Test partial decoding => must work */ FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial"); - { size_t const missingBytes = FUZ_rand(&randState) % (unsigned)blockSize; - int const targetSize = (int)((size_t)blockSize - missingBytes); + { size_t const missingOutBytes = FUZ_rand(&randState) % (unsigned)blockSize; + int const targetSize = (int)((size_t)blockSize - missingOutBytes); size_t const extraneousInBytes = FUZ_rand(&randState) % 2; int const inCSize = (int)((size_t)compressedSize + extraneousInBytes); char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A; - //DISPLAY("compressedSize=%i, inCSize=%i \n", compressedSize, inCSize); - //DISPLAY("decompressedSize=%i, targetDstSize=%i \n", blockSize, targetSize); + DISPLAYLEVEL(6,"compressedSize=%i, inCSize=%i \n", compressedSize, inCSize); + DISPLAYLEVEL(6,"decompressedSize=%i, targetDstSize=%i \n", blockSize, targetSize); int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, inCSize, targetSize, blockSize); FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult); FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize); -- cgit v0.12