summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <Cyan4973@users.noreply.github.com>2020-08-27 18:00:28 (GMT)
committerGitHub <noreply@github.com>2020-08-27 18:00:28 (GMT)
commit440c8461d71a79ee927ce93077b58ad22d894d28 (patch)
tree6e352872dcbe8543766d33612d12468c1bd8dd04
parentb73cd37baba01229fb67a7aaae9e95fcffd09059 (diff)
parent8b75d403d86eaf9786da89a49ca02444916c462e (diff)
downloadlz4-440c8461d71a79ee927ce93077b58ad22d894d28.zip
lz4-440c8461d71a79ee927ce93077b58ad22d894d28.tar.gz
lz4-440c8461d71a79ee927ce93077b58ad22d894d28.tar.bz2
Merge pull request #910 from lz4/extraInput
Fix issue #783
-rw-r--r--.travis.yml4
-rw-r--r--lib/lz4.c39
-rw-r--r--lib/lz4.h34
-rw-r--r--tests/Makefile18
-rw-r--r--tests/decompress-partial.c49
-rw-r--r--tests/fuzzer.c33
6 files changed, 128 insertions, 49 deletions
diff --git a/.travis.yml b/.travis.yml
index 1474fad..6074f08 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,9 +10,7 @@ matrix:
script:
- make # test library build
- make clean
- - make -C tests test-lz4 MOREFLAGS='-Werror -Wconversion -Wno-sign-conversion' | tee # test scenario where `stdout` is not the console
- - make clean
- - CFLAGS=-m32 make -C tests test-lz4-contentSize
+ - make test MOREFLAGS='-Werror -Wconversion -Wno-sign-conversion' | tee # test scenario where `stdout` is not the console
# Container-based 12.04 LTS Server Edition 64 bit (doesn't support 32-bit includes)
- name: (Precise) benchmark test
diff --git a/lib/lz4.c b/lib/lz4.c
index 0ca7b21..0628eac 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1813,7 +1813,8 @@ LZ4_decompress_generic(
if ((dict==usingExtDict) && (match < lowPrefix)) {
if (unlikely(op+length > oend-LASTLITERALS)) {
if (partialDecoding) {
- length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */
+ DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+ length = MIN(length, (size_t)(oend-op));
} else {
goto _output_error; /* end-of-block condition violated */
} }
@@ -1921,29 +1922,34 @@ LZ4_decompress_generic(
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
{
/* We've either hit the input parsing restriction or the output parsing restriction.
- * If we've hit the input parsing condition then this must be the last sequence.
- * If we've hit the output parsing condition then we are either using partialDecoding
- * or we've hit the output parsing condition.
+ * In the normal scenario, decoding a full block, it must be the last sequence,
+ * otherwise it's an error (invalid input or dimensions).
+ * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
*/
if (partialDecoding) {
/* Since we are partial decoding we may be in this block because of the output parsing
* restriction, which is not valid since the output buffer is allowed to be undersized.
*/
assert(endOnInput);
- /* If we're in this block because of the input parsing condition, then we must be on the
- * last sequence (or invalid), so we must check that we exactly consume the input.
+ DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+ DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+ DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+ DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+ /* Finishing in the middle of a literals segment,
+ * due to lack of input.
*/
- if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; }
- assert(ip+length <= iend);
- /* We are finishing in the middle of a literals segment.
- * Break after the copy.
+ if (ip+length > iend) {
+ length = (size_t)(iend-ip);
+ cpy = op + length;
+ }
+ /* Finishing in the middle of a literals segment,
+ * due to lack of output space.
*/
if (cpy > oend) {
cpy = oend;
assert(op<=oend);
length = (size_t)(oend-op);
}
- assert(ip+length <= iend);
} else {
/* We must be on the last sequence because of the parsing limitations so check
* that we exactly regenerate the original size (must be exact when !endOnInput).
@@ -1954,14 +1960,15 @@ LZ4_decompress_generic(
*/
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
}
- memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
+ memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
ip += length;
op += length;
- /* Necessarily EOF when !partialDecoding. When partialDecoding
- * it is EOF if we've either filled the output buffer or hit
- * the input parsing restriction.
+ /* Necessarily EOF when !partialDecoding.
+ * When partialDecoding, it is EOF if we've either
+ * filled the output buffer or
+ * can't proceed with reading an offset for following match.
*/
- if (!partialDecoding || (cpy == oend) || (ip == iend)) {
+ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
break;
}
} else {
diff --git a/lib/lz4.h b/lib/lz4.h
index 5209c10..5d2475c 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -221,25 +221,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
* Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
* into destination buffer 'dst' of size 'dstCapacity'.
* Up to 'targetOutputSize' bytes will be decoded.
- * The function stops decoding on reaching this objective,
- * which can boost performance when only the beginning of a block is required.
+ * The function stops decoding on reaching this objective.
+ * This can be useful to boost performance
+ * whenever only the beginning of a block is required.
*
- * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
* If source stream is detected malformed, function returns a negative result.
*
- * Note : @return can be < targetOutputSize, if compressed block contains less data.
+ * Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
*
- * Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
- * and expects targetOutputSize <= dstCapacity.
- * It effectively stops decoding on reaching targetOutputSize,
+ * Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ * Note 3 : this function effectively stops decoding on reaching targetOutputSize,
* so dstCapacity is kind of redundant.
- * This is because in a previous version of this function,
- * decoding operation would not "break" a sequence in the middle.
- * As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
+ * This is because in older versions of this function,
+ * decoding operation would still write complete sequences.
+ * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
* it could write more bytes, though only up to dstCapacity.
* Some "margin" used to be required for this operation to work properly.
- * This is no longer necessary.
- * The function nonetheless keeps its signature, in an effort to not break API.
+ * Thankfully, this is no longer necessary.
+ * The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ * Note 4 : If srcSize is the exact size of the block,
+ * then targetOutputSize can be any value,
+ * including larger than the block's decompressed size.
+ * The function will, at most, generate block's decompressed size.
+ *
+ * Note 5 : If srcSize is _larger_ than block's compressed size,
+ * then targetOutputSize **MUST** be <= block's decompressed size.
+ * Otherwise, *silent corruption will occur*.
*/
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
diff --git a/tests/Makefile b/tests/Makefile
index 866ff5d..5a6ec59 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -55,7 +55,7 @@ NB_LOOPS ?= -i1
default: all
-all: fullbench fuzzer frametest roundTripTest datagen checkFrame
+all: fullbench fuzzer frametest roundTripTest datagen checkFrame decompress-partial
all32: CFLAGS+=-m32
all32: all
@@ -104,6 +104,9 @@ datagen : $(PRGDIR)/datagen.c datagencli.c
checkFrame : lz4frame.o lz4.o lz4hc.o xxhash.o checkFrame.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
+decompress-partial: lz4.o decompress-partial.c
+ $(CC) $(FLAGS) $^ -o $@$(EXT)
+
clean:
@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
@@ -114,7 +117,8 @@ clean:
frametest$(EXT) frametest32$(EXT) \
fasttest$(EXT) roundTripTest$(EXT) \
datagen$(EXT) checkTag$(EXT) \
- frameTest$(EXT) lz4_all.c
+ frameTest$(EXT) decompress-partial$(EXT) \
+ lz4_all.c
@$(RM) -rf $(TESTDIR)
@echo Cleaning completed
@@ -158,7 +162,7 @@ list:
check: test-lz4-essentials
.PHONY: test
-test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install test-amalgamation listTest
+test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install test-amalgamation listTest test-decompress-partial
.PHONY: test32
test32: CFLAGS+=-m32
@@ -401,8 +405,8 @@ test-lz4-dict: lz4 datagen
test-lz4-hugefile: lz4 datagen
@echo "\n ---- test huge files compression/decompression ----"
- $(DATAGEN) -g6GB | $(LZ4) -vB5D | $(LZ4) -qt
- $(DATAGEN) -g5GB | $(LZ4) -v4BD | $(LZ4) -qt
+ ./datagen -g6GB | $(LZ4) -vB5D | $(LZ4) -qt
+ ./datagen -g4500MB | $(LZ4) -v3BD | $(LZ4) -qt
# test large file size [2-4] GB
@$(DATAGEN) -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmphf1
@ls -ls tmphf1
@@ -530,4 +534,8 @@ test-mem: lz4 datagen fuzzer frametest fullbench
test-mem32: lz4c32 datagen
# unfortunately, valgrind doesn't seem to work with non-native binary...
+test-decompress-partial : decompress-partial
+ @echo "\n ---- test decompress-partial ----"
+ ./decompress-partial$(EXT)
+
endif
diff --git a/tests/decompress-partial.c b/tests/decompress-partial.c
new file mode 100644
index 0000000..4e124b7
--- /dev/null
+++ b/tests/decompress-partial.c
@@ -0,0 +1,49 @@
+#include "stdio.h"
+#include "string.h"
+#include "lz4.h"
+
+const char source[] =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod\n"
+ "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim\n"
+ "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea\n"
+ "commodo consequat. Duis aute irure dolor in reprehenderit in voluptate\n"
+ "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat\n"
+ "cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id\n"
+ "est laborum.\n"
+ "\n"
+ "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium\n"
+ "doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore\n"
+ "veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim\n"
+ "ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia\n"
+ "consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque\n"
+ "porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur,\n"
+ "adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore\n"
+ "et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis\n"
+ "nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid\n"
+ "ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea\n"
+ "voluptate velit esse quam nihil molestiae consequatur, vel illum qui\n"
+ "dolorem eum fugiat quo voluptas nulla pariatur?\n";
+
+#define BUFFER_SIZE 2048
+
+int main(void)
+{
+ int srcLen = (int)strlen(source);
+ char cmpBuffer[BUFFER_SIZE];
+ char outBuffer[BUFFER_SIZE];
+ int cmpSize;
+ int i;
+
+ cmpSize = LZ4_compress_default(source, cmpBuffer, srcLen, BUFFER_SIZE);
+
+ for (i = cmpSize; i < cmpSize + 10; ++i) {
+ int result = LZ4_decompress_safe_partial(cmpBuffer, outBuffer, i, srcLen, BUFFER_SIZE);
+ if ((result < 0) || (result != srcLen) || memcmp(source, outBuffer, srcLen)) {
+ printf("test decompress-partial error \n");
+ return -1;
+ }
+ }
+
+ printf("test decompress-partial OK \n");
+ return 0;
+}
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 3d7456a..4658d79 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -618,13 +618,16 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
/* Test partial decoding => must work */
FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial");
- { size_t const missingBytes = FUZ_rand(&randState) % (unsigned)blockSize;
- int const targetSize = (int)((size_t)blockSize - missingBytes);
+ { size_t const missingOutBytes = FUZ_rand(&randState) % (unsigned)blockSize;
+ int const targetSize = (int)((size_t)blockSize - missingOutBytes);
+ size_t const extraneousInBytes = FUZ_rand(&randState) % 2;
+ int const inCSize = (int)((size_t)compressedSize + extraneousInBytes);
char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A;
- int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize);
+ int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, inCSize, targetSize, blockSize);
FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult);
FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize);
FUZ_CHECKTEST(decodedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize);
+ FUZ_CHECKTEST(memcmp(block, decodedBuffer, (size_t)targetSize), "LZ4_decompress_safe_partial: corruption detected in regenerated data");
}
/* Test Compression with limited output size */
@@ -856,12 +859,12 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
FUZ_DISPLAYTEST("LZ4_decompress_safe_usingDict with a too small output buffer");
- { U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
- if ((U32)blockSize > missingBytes) {
- decodedBuffer[(U32)blockSize-missingBytes] = 0;
+ { int const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
+ if (blockSize > missingBytes) {
+ decodedBuffer[blockSize-missingBytes] = 0;
ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-missingBytes, dict, dictSize);
- FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%u byte)", missingBytes);
- FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%u byte) (blockSize=%i)", missingBytes, blockSize);
+ FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%i byte)", missingBytes);
+ FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%i byte) (blockSize=%i)", missingBytes, blockSize);
} }
/* Compress HC using External dictionary */
@@ -948,7 +951,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
/* Compress HC continue destSize */
FUZ_DISPLAYTEST();
- { int const availableSpace = (int)(FUZ_rand(&randState) % blockSize) + 5;
+ { int const availableSpace = (int)(FUZ_rand(&randState) % (U32)blockSize) + 5;
int consumedSize = blockSize;
FUZ_DISPLAYTEST();
LZ4_loadDictHC(LZ4dictHC, dict, dictSize);
@@ -974,10 +977,14 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
/* ***** End of tests *** */
/* Fill stats */
- bytes += blockSize;
- cbytes += compressedSize;
- hcbytes += HCcompressedSize;
- ccbytes += blockContinueCompressedSize;
+ assert(blockSize >= 0);
+ bytes += (unsigned)blockSize;
+ assert(compressedSize >= 0);
+ cbytes += (unsigned)compressedSize;
+ assert(HCcompressedSize >= 0);
+ hcbytes += (unsigned)HCcompressedSize;
+ assert(blockContinueCompressedSize >= 0);
+ ccbytes += (unsigned)blockContinueCompressedSize;
}
if (nbCycles<=1) nbCycles = cycleNb; /* end by time */