summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorYann Collet <cyan@fb.com>2020-08-27 07:17:57 (GMT)
committerYann Collet <cyan@fb.com>2020-08-27 07:17:57 (GMT)
commitc5d6f8a8be3927c0bec91bcc58667a6cfad244ad (patch)
tree7ace9c0c38600b241a8775381941b9b634bd3f9d /lib
parent3e3a006c6f3337be4298e2c01ba3cb2c0ba04d45 (diff)
downloadlz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.zip
lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.tar.gz
lz4-c5d6f8a8be3927c0bec91bcc58667a6cfad244ad.tar.bz2
fix #783
LZ4_decompress_safe_partial() now also supports a scenario where nb_bytes_to_generate is <= block_decompressed_size And nb_bytes_to_read is >= block_compressed_size. Previously, the only supported scenario was nb_bytes_to_read == block_compress_size. Pay attention that, if nb_bytes_to_read is > block_compressed_size, then, necessarily, it requires that nb_bytes_to_generate is <= block_decompress_size. If both are larger, it will generate corrupted data.
Diffstat (limited to 'lib')
-rw-r--r--lib/lz4.c39
-rw-r--r--lib/lz4.h34
2 files changed, 45 insertions, 28 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 06d24da..0628eac 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1813,7 +1813,8 @@ LZ4_decompress_generic(
if ((dict==usingExtDict) && (match < lowPrefix)) {
if (unlikely(op+length > oend-LASTLITERALS)) {
if (partialDecoding) {
- length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */
+ DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+ length = MIN(length, (size_t)(oend-op));
} else {
goto _output_error; /* end-of-block condition violated */
} }
@@ -1921,29 +1922,34 @@ LZ4_decompress_generic(
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
{
/* We've either hit the input parsing restriction or the output parsing restriction.
- * If we've hit the input parsing condition then this must be the last sequence.
- * If we've hit the output parsing condition then we are either using partialDecoding
- * or we've hit the output parsing condition.
+ * In the normal scenario, decoding a full block, it must be the last sequence,
+ * otherwise it's an error (invalid input or dimensions).
+ * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
*/
if (partialDecoding) {
/* Since we are partial decoding we may be in this block because of the output parsing
* restriction, which is not valid since the output buffer is allowed to be undersized.
*/
assert(endOnInput);
- /* If we're in this block because of the input parsing condition, then we must be on the
- * last sequence (or invalid), so we must check that we exactly consume the input.
+ DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+ DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+ DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+ DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+ /* Finishing in the middle of a literals segment,
+ * due to lack of input.
*/
- if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend) && (cpy != oend)) { goto _output_error; }
- assert(ip+length <= iend);
- /* We are finishing in the middle of a literals segment.
- * Break after the copy.
+ if (ip+length > iend) {
+ length = (size_t)(iend-ip);
+ cpy = op + length;
+ }
+ /* Finishing in the middle of a literals segment,
+ * due to lack of output space.
*/
if (cpy > oend) {
cpy = oend;
assert(op<=oend);
length = (size_t)(oend-op);
}
- assert(ip+length <= iend);
} else {
/* We must be on the last sequence because of the parsing limitations so check
* that we exactly regenerate the original size (must be exact when !endOnInput).
@@ -1954,14 +1960,15 @@ LZ4_decompress_generic(
*/
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
}
- memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
+ memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
ip += length;
op += length;
- /* Necessarily EOF when !partialDecoding. When partialDecoding
- * it is EOF if we've either filled the output buffer or hit
- * the input parsing restriction.
+ /* Necessarily EOF when !partialDecoding.
+ * When partialDecoding, it is EOF if we've either
+ * filled the output buffer or
+ * can't proceed with reading an offset for following match.
*/
- if (!partialDecoding || (cpy == oend) || (ip == iend)) {
+ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
break;
}
} else {
diff --git a/lib/lz4.h b/lib/lz4.h
index 5209c10..5d2475c 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -221,25 +221,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
* Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
* into destination buffer 'dst' of size 'dstCapacity'.
* Up to 'targetOutputSize' bytes will be decoded.
- * The function stops decoding on reaching this objective,
- * which can boost performance when only the beginning of a block is required.
+ * The function stops decoding on reaching this objective.
+ * This can be useful to boost performance
+ * whenever only the beginning of a block is required.
*
- * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
* If source stream is detected malformed, function returns a negative result.
*
- * Note : @return can be < targetOutputSize, if compressed block contains less data.
+ * Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
*
- * Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
- * and expects targetOutputSize <= dstCapacity.
- * It effectively stops decoding on reaching targetOutputSize,
+ * Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ * Note 3 : this function effectively stops decoding on reaching targetOutputSize,
* so dstCapacity is kind of redundant.
- * This is because in a previous version of this function,
- * decoding operation would not "break" a sequence in the middle.
- * As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+ * This is because in older versions of this function,
+ * decoding operation would still write complete sequences.
+ * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
* it could write more bytes, though only up to dstCapacity.
* Some "margin" used to be required for this operation to work properly.
- * This is no longer necessary.
- * The function nonetheless keeps its signature, in an effort to not break API.
+ * Thankfully, this is no longer necessary.
+ * The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ * Note 4 : If srcSize is the exact size of the block,
+ * then targetOutputSize can be any value,
+ * including larger than the block's decompressed size.
+ * The function will, at most, generate block's decompressed size.
+ *
+ * Note 5 : If srcSize is _larger_ than block's compressed size,
+ * then targetOutputSize **MUST** be <= block's decompressed size.
+ * Otherwise, *silent corruption will occur*.
*/
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);