summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorNick Terrell <terrelln@fb.com>2018-04-23 20:14:19 (GMT)
committerNick Terrell <terrelln@fb.com>2018-04-23 20:34:18 (GMT)
commitbb83cad98fdb15a7ade4cde582b98e836fb8ef11 (patch)
tree17bd7c7197b46ca8b8f1dfcc8cd501ec9e36d3bd /lib
parent996d211aca5407c97b0c3736f20ae599f05f0d44 (diff)
downloadlz4-bb83cad98fdb15a7ade4cde582b98e836fb8ef11.zip
lz4-bb83cad98fdb15a7ade4cde582b98e836fb8ef11.tar.gz
lz4-bb83cad98fdb15a7ade4cde582b98e836fb8ef11.tar.bz2
Fix input size validation edge cases
The bug is a read up to 2 bytes past the end of the buffer. There are three cases for this bug, one for each test case added. * An empty input causes `token = *ip++` to read one byte too far. * A one byte input with `(token >> ML_BITS) == RUN_MASK` causes one extra byte to be read without validation. This could be combined with the first bug to cause 2 extra bytes to be read. * The case pointed out in issue #508, where `ip == iend` at the beginning of the loop after taking the shortcut. Benchmarks show no regressions on clang or gcc-7 on both my mac and devserver. Fixes #508.
Diffstat (limited to 'lib')
-rw-r--r--lib/lz4.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index bb6b619..870ab5a 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1520,6 +1520,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */
if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+ if ((endOnInput) && unlikely(srcSize==0)) return -1;
/* Main Loop : decode sequences */
while (1) {
@@ -1529,11 +1530,13 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
unsigned const token = *ip++;
+ assert(ip <= iend); /* ip < iend before the increment */
/* shortcut for common case :
* in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes).
* this shortcut was tested on x86 and x64, where it improves decoding speed.
- * it has not yet been benchmarked on ARM, Power, mips, etc. */
- if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend)
+ * it has not yet been benchmarked on ARM, Power, mips, etc.
+ * NOTE: The loop begins with a read, so we must have one byte left at the end. */
+ if (((ip + 14 /*maxLL*/ + 2 /*offset*/ < iend)
& (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend))
& ((token < (15<<ML_BITS)) & ((token & ML_MASK) != 15)) ) {
size_t const ll = token >> ML_BITS;
@@ -1553,6 +1556,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
/* decode literal length */
if ((length=(token>>ML_BITS)) == RUN_MASK) {
unsigned s;
+ if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error; /* overflow detection */
do {
s = *ip++;
length += s;