From bb83cad98fdb15a7ade4cde582b98e836fb8ef11 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 23 Apr 2018 13:14:19 -0700 Subject: Fix input size validation edge cases The bug is a read up to 2 bytes past the end of the buffer. There are three cases for this bug, one for each test case added. * An empty input causes `token = *ip++` to read one byte too far. * A one byte input with `(token >> ML_BITS) == RUN_MASK` causes one extra byte to be read without validation. This could be combined with the first bug to cause 2 extra bytes to be read. * The case pointed out in issue #508, where `ip == iend` at the beginning of the loop after taking the shortcut. Benchmarks show no regressions on clang or gcc-7 on both my mac and devserver. Fixes #508. --- lib/lz4.c | 8 ++++++-- tests/fuzzer.c | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index bb6b619..870ab5a 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1520,6 +1520,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */ if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + if ((endOnInput) && unlikely(srcSize==0)) return -1; /* Main Loop : decode sequences */ while (1) { @@ -1529,11 +1530,13 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( unsigned const token = *ip++; + assert(ip <= iend); /* ip < iend before the increment */ /* shortcut for common case : * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes). * this shortcut was tested on x86 and x64, where it improves decoding speed. - * it has not yet been benchmarked on ARM, Power, mips, etc. */ - if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend) + * it has not yet been benchmarked on ARM, Power, mips, etc. + * NOTE: The loop begins with a read, so we must have one byte left at the end. */ + if (((ip + 14 /*maxLL*/ + 2 /*offset*/ < iend) & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend)) & ((token < (15<> ML_BITS; @@ -1553,6 +1556,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /* decode literal length */ if ((length=(token>>ML_BITS)) == RUN_MASK) { unsigned s; + if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error; /* overflow detection */ do { s = *ip++; length += s; diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 244cc4f..def5230 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -487,6 +487,32 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize+1); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large"); + /* Test decoding with empty input */ + FUZ_DISPLAYTEST("LZ4_decompress_safe() with empty input"); + LZ4_decompress_safe(NULL, decodedBuffer, 0, blockSize); + + /* Test decoding with a one byte input */ + FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input"); + { char const tmp = 0xFF; + LZ4_decompress_safe(&tmp, decodedBuffer, 1, blockSize); + } + + /* Test decoding shortcut edge case */ + FUZ_DISPLAYTEST("LZ4_decompress_safe() with shortcut edge case"); + { char tmp[17]; + unsigned long i; + /* 14 bytes of literals, followed by a 14 byte match. + * Should not read beyond the end of the buffer. + * See https://github.com/lz4/lz4/issues/508. */ + *tmp = 0xEE; + memset(tmp + 1, 0, 14); + tmp[15] = 14; + tmp[16] = 0; + ret = LZ4_decompress_safe(tmp, decodedBuffer, sizeof(tmp), blockSize); + FUZ_CHECKTEST(ret >= 0, "LZ4_decompress_safe() should fail"); + } + + /* Test decoding with output size exactly what's necessary => must work */ FUZ_DISPLAYTEST(); decodedBuffer[blockSize] = 0; -- cgit v0.12