fix #783

LZ4_decompress_safe_partial() now also supports a scenario where nb_bytes_to_generate is <= block_decompressed_size And nb_bytes_to_read is >= block_compressed_size. Previously, the only supported scenario was nb_bytes_to_read == block_compress_size. Pay attention that, if nb_bytes_to_read is > block_compressed_size, then, necessarily, it requires that nb_bytes_to_generate is <= block_decompress_size. If both are larger, it will generate corrupted data.
author: Yann Collet <cyan@fb.com> 2020-08-27 07:17:57 (GMT)
committer: Yann Collet <cyan@fb.com> 2020-08-27 07:17:57 (GMT)
commit: c5d6f8a8be3927c0bec91bcc58667a6cfad244ad (patch)
tree: 7ace9c0c38600b241a8775381941b9b634bd3f9d /lib
parent: 3e3a006c6f3337be4298e2c01ba3cb2c0ba04d45 (diff)
download: lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.zip
lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.tar.gz
lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.tar.bz2
2 files changed, 45 insertions, 28 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 06d24da..0628eac 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1813,7 +1813,8 @@ LZ4_decompress_generic(
             if ((dict==usingExtDict) && (match < lowPrefix)) {
                 if (unlikely(op+length > oend-LASTLITERALS)) {
                     if (partialDecoding) {
-                        length = MIN(length, (size_t)(oend-op));  /* reach end of buffer */
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+                        length = MIN(length, (size_t)(oend-op));
                     } else {
                         goto _output_error;  /* end-of-block condition violated */
                 }   }
@@ -1921,29 +1922,34 @@ LZ4_decompress_generic(
               || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
             {
                 /* We've either hit the input parsing restriction or the output parsing restriction.
-                 * If we've hit the input parsing condition then this must be the last sequence.
-                 * If we've hit the output parsing condition then we are either using partialDecoding
-                 * or we've hit the output parsing condition.
+                 * In the normal scenario, decoding a full block, it must be the last sequence,
+                 * otherwise it's an error (invalid input or dimensions).
+                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
                  */
                 if (partialDecoding) {
                     /* Since we are partial decoding we may be in this block because of the output parsing
                      * restriction, which is not valid since the output buffer is allowed to be undersized.
                      */
                     assert(endOnInput);
-                    /* If we're in this block because of the input parsing condition, then we must be on the
-                     * last sequence (or invalid), so we must check that we exactly consume the input.
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
                      */
-                    if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend) && (cpy != oend)) { goto _output_error; }
-                    assert(ip+length <= iend);
-                    /* We are finishing in the middle of a literals segment.
-                     * Break after the copy.
+                    if (ip+length > iend) {
+                        length = (size_t)(iend-ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
                      */
                     if (cpy > oend) {
                         cpy = oend;
                         assert(op<=oend);
                         length = (size_t)(oend-op);
                     }
-                    assert(ip+length <= iend);
                 } else {
                     /* We must be on the last sequence because of the parsing limitations so check
                      * that we exactly regenerate the original size (must be exact when !endOnInput).
@@ -1954,14 +1960,15 @@ LZ4_decompress_generic(
                       */
                     if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
                 }
-                memmove(op, ip, length);  /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
+                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
                 ip += length;
                 op += length;
-                /* Necessarily EOF when !partialDecoding. When partialDecoding
-                 * it is EOF if we've either filled the output buffer or hit
-                 * the input parsing restriction.
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
                  */
-                if (!partialDecoding || (cpy == oend) || (ip == iend)) {
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
                     break;
                 }
             } else {
diff --git a/lib/lz4.h b/lib/lz4.h
index 5209c10..5d2475c 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -221,25 +221,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
  *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
  *  into destination buffer 'dst' of size 'dstCapacity'.
  *  Up to 'targetOutputSize' bytes will be decoded.
- *  The function stops decoding on reaching this objective,
- *  which can boost performance when only the beginning of a block is required.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
  *
- * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
  *           If source stream is detected malformed, function returns a negative result.
  *
- *  Note : @return can be < targetOutputSize, if compressed block contains less data.
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
  *
- *  Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
- *           and expects targetOutputSize <= dstCapacity.
- *           It effectively stops decoding on reaching targetOutputSize,
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
  *           so dstCapacity is kind of redundant.
- *           This is because in a previous version of this function,
- *           decoding operation would not "break" a sequence in the middle.
- *           As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
  *           it could write more bytes, though only up to dstCapacity.
  *           Some "margin" used to be required for this operation to work properly.
- *           This is no longer necessary.
- *           The function nonetheless keeps its signature, in an effort to not break API.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
  */
 LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
author	Yann Collet <cyan@fb.com>	2020-08-27 07:17:57 (GMT)
committer	Yann Collet <cyan@fb.com>	2020-08-27 07:17:57 (GMT)
commit	c5d6f8a8be3927c0bec91bcc58667a6cfad244ad (patch)
tree	7ace9c0c38600b241a8775381941b9b634bd3f9d /lib
parent	3e3a006c6f3337be4298e2c01ba3cb2c0ba04d45 (diff)
download	lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.zip lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.tar.gz lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.tar.bz2