From ae199124e5aef12f6e467bc8cf30bffece55d55c Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 18 Apr 2019 18:50:51 -0700
Subject: fixed read-after input in LZ4_decompress_safe()

---
 lib/lz4.c      |  57 ++++++++++-----------
 lib/lz4hc.c    |   2 +-
 tests/fuzzer.c | 156 ++++++++++++++++++++++++++++++---------------------------
 3 files changed, 112 insertions(+), 103 deletions(-)

diff --git a/lib/lz4.c b/lib/lz4.c
index c38932e..e614c45 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -136,7 +136,7 @@
 #endif /* LZ4_FORCE_INLINE */
 
 /* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
- * Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
  * together with a simple 8-byte copy loop as a fall-back path.
  * However, this optimization hurts the decompression speed by >30%,
  * because the execution does not go to the optimized loop
@@ -144,10 +144,10 @@
  * before going to the fall-back path become useless overhead.
  * This optimization happens only with the -O3 flag, and -O2 generates
  * a simple 8-byte copy loop.
- * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
  * functions are annotated with __attribute__((optimize("O2"))),
- * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
- * of LZ4_wildCopy does not affect the compression speed.
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
  */
 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
 #  define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
@@ -301,7 +301,7 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
 
 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
 LZ4_FORCE_O2_INLINE_GCC_PPC64LE
-void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
     BYTE* d = (BYTE*)dstPtr;
     const BYTE* s = (const BYTE*)srcPtr;
@@ -342,7 +342,7 @@ LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con
         srcPtr += 8;
     }
 
-    LZ4_wildCopy(dstPtr, srcPtr, dstEnd);
+    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
 }
 
 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
@@ -946,7 +946,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
             else *token = (BYTE)(litLength<<ML_BITS);
 
             /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op+litLength);
+            LZ4_wildCopy8(op, anchor, op+litLength);
             op+=litLength;
             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
                         (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
@@ -1642,14 +1642,16 @@ LZ4_decompress_generic(
 
 	/* Currently the fast loop shows a regression on qualcomm arm chips. */
 #if LZ4_FAST_DEC_LOOP
-        if ((oend - op) < FASTLOOP_SAFE_DISTANCE)
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+            DEBUGLOG(6, "skip fast decode loop");
             goto safe_decode;
+        }
 
         /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
         while (1) {
             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
-
+            if (endOnInput) assert(ip < iend);
             token = *ip++;
             length = token >> ML_BITS;  /* literal length */
 
@@ -1666,27 +1668,26 @@ LZ4_decompress_generic(
                 /* copy literals */
                 cpy = op+length;
                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
-                if ( ((endOnInput) && ((cpy>oend-FASTLOOP_SAFE_DISTANCE) || (ip+length>iend-(2+1+LASTLITERALS))) )
-                     || ((!endOnInput) && (cpy>oend-FASTLOOP_SAFE_DISTANCE)) )
-                    {
-                        goto safe_literal_copy;
-                    }
-                if (endOnInput)
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    if ((cpy>oend-32) || (ip+length>iend-32)) goto safe_literal_copy;
                     LZ4_wildCopy32(op, ip, cpy);
-                else
-                    LZ4_wildCopy(op, ip, cpy);  /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : it doesn't know input length, and only relies on end-of-block properties */
+                } else {   /* LZ4_decompress_fast() */
+                    if (cpy>oend-8) goto safe_literal_copy;
+                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                                                 * it doesn't know input length, and only relies on end-of-block properties */
+                }
                 ip += length; op = cpy;
             } else {
                 cpy = op+length;
-                /* We don't need to check oend, since we check it once for each loop below */
-                if ( ((endOnInput) && (ip+16>iend-(2+1+LASTLITERALS))))
-                    {
-                        goto safe_literal_copy;
-                    }
-                /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
-                if (endOnInput)
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+                    /* We don't need to check oend, since we check it once for each loop below */
+                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) goto safe_literal_copy;
+                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
                     memcpy(op, ip, 16);
-                else {  /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : it doesn't know input length, and only relies on end-of-block properties */
+                } else {  /* LZ4_decompress_fast() */
+                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                     * it doesn't know input length, and relies on end-of-block properties */
                     memcpy(op, ip, 8);
                     if (length > 8) memcpy(op+8, ip+8, 8);
                 }
@@ -1852,7 +1853,7 @@ LZ4_decompress_generic(
                 }
 
             } else {
-                LZ4_wildCopy(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
                 ip += length; op = cpy;
             }
 
@@ -1947,14 +1948,14 @@ LZ4_decompress_generic(
                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
                 if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
                 if (op < oCopyLimit) {
-                    LZ4_wildCopy(op, match, oCopyLimit);
+                    LZ4_wildCopy8(op, match, oCopyLimit);
                     match += oCopyLimit - op;
                     op = oCopyLimit;
                 }
                 while (op < cpy) *op++ = *match++;
             } else {
                 memcpy(op, match, 8);
-                if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
+                if (length > 16) LZ4_wildCopy8(op+8, match+8, cpy);
             }
             op = cpy;   /* wildcopy correction */
         }
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 031df8f..936f739 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -442,7 +442,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
     }
 
     /* Copy Literals */
-    LZ4_wildCopy(*op, *anchor, (*op) + length);
+    LZ4_wildCopy8(*op, *anchor, (*op) + length);
     *op += length;
 
     /* Encode Offset */
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index ffbeefc..a5b5c93 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -494,9 +494,10 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
 
             /* Test decoding with output size exactly correct => must work */
             FUZ_DISPLAYTEST("LZ4_decompress_fast() with exact output buffer");
-            ret = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize);
-            FUZ_CHECKTEST(ret<0, "LZ4_decompress_fast failed despite correct space");
-            FUZ_CHECKTEST(ret!=compressedSize, "LZ4_decompress_fast failed : did not fully read compressed data");
+            {   int const r = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize);
+                FUZ_CHECKTEST(r<0, "LZ4_decompress_fast failed despite correct space");
+                FUZ_CHECKTEST(r!=compressedSize, "LZ4_decompress_fast failed : did not fully read compressed data");
+            }
             {   U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0);
                 FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast corrupted decoded data");
             }
@@ -504,92 +505,75 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             /* Test decoding with one byte missing => must fail */
             FUZ_DISPLAYTEST("LZ4_decompress_fast() with output buffer 1-byte too short");
             decodedBuffer[blockSize-1] = 0;
-            ret = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize-1);
-            FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small");
+            {   int const r = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize-1);
+                FUZ_CHECKTEST(r>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small");
+            }
             FUZ_CHECKTEST(decodedBuffer[blockSize-1]!=0, "LZ4_decompress_fast overrun specified output buffer");
 
             /* Test decoding with one byte too much => must fail */
             FUZ_DISPLAYTEST();
-            ret = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize+1);
-            FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large");
-
-            free(cBuffer_exact);
-        }
-
-        /* Test decoding with empty input */
-        FUZ_DISPLAYTEST("LZ4_decompress_safe() with empty input");
-        LZ4_decompress_safe(compressedBuffer, decodedBuffer, 0, blockSize);
+            {   int const r = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize+1);
+                FUZ_CHECKTEST(r>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large");
+            }
 
-        /* Test decoding with a one byte input */
-        FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input");
-        {   char const tmp = (char)0xFF;
-            LZ4_decompress_safe(&tmp, decodedBuffer, 1, blockSize);
-        }
+            /* Test decoding with output size exactly what's necessary => must work */
+            FUZ_DISPLAYTEST();
+            decodedBuffer[blockSize] = 0;
+            {   int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize);
+                FUZ_CHECKTEST(r<0, "LZ4_decompress_safe failed despite sufficient space");
+                FUZ_CHECKTEST(r!=blockSize, "LZ4_decompress_safe did not regenerate original data");
+            }
+            FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
+            {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+                FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+            }
 
-        /* Test decoding shortcut edge case */
-        FUZ_DISPLAYTEST("LZ4_decompress_safe() with shortcut edge case");
-        {   char tmp[17];
-            /* 14 bytes of literals, followed by a 14 byte match.
-             * Should not read beyond the end of the buffer.
-             * See https://github.com/lz4/lz4/issues/508. */
-            *tmp = (char)0xEE;
-            memset(tmp + 1, 0, 14);
-            tmp[15] = 14;
-            tmp[16] = 0;
-            ret = LZ4_decompress_safe(tmp, decodedBuffer, sizeof(tmp), blockSize);
-            FUZ_CHECKTEST(ret >= 0, "LZ4_decompress_safe() should fail");
-        }
+            /* Test decoding with more than enough output size => must work */
+            FUZ_DISPLAYTEST();
+            decodedBuffer[blockSize] = 0;
+            decodedBuffer[blockSize+1] = 0;
+            {   int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize+1);
+                FUZ_CHECKTEST(r<0, "LZ4_decompress_safe failed despite amply sufficient space");
+                FUZ_CHECKTEST(r!=blockSize, "LZ4_decompress_safe did not regenerate original data");
+            }
+            FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size");
+            {   U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0);
+                FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+            }
 
+            /* Test decoding with output size being one byte too short => must fail */
+            FUZ_DISPLAYTEST();
+            decodedBuffer[blockSize-1] = 0;
+            {   int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize-1);
+                FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short");
+            }
+            FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe overrun specified output buffer size");
 
-        /* Test decoding with output size exactly what's necessary => must work */
-        FUZ_DISPLAYTEST();
-        decodedBuffer[blockSize] = 0;
-        ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize);
-        FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space");
-        FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
-        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
-        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
-            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
-        }
+            /* Test decoding with output size being 10 bytes too short => must fail */
+            FUZ_DISPLAYTEST();
+            if (blockSize>10) {
+                decodedBuffer[blockSize-10] = 0;
+                {   int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize-10);
+                    FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to Output Size being 10 bytes too short");
+                }
+                FUZ_CHECKTEST(decodedBuffer[blockSize-10], "LZ4_decompress_safe overrun specified output buffer size");
+            }
 
-        // Test decoding with more than enough output size => must work
-        FUZ_DISPLAYTEST();
-        decodedBuffer[blockSize] = 0;
-        decodedBuffer[blockSize+1] = 0;
-        ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize+1);
-        FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite amply sufficient space");
-        FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
-        FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size");
-        {   U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0);
-            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+            free(cBuffer_exact);
         }
 
-        // Test decoding with output size being one byte too short => must fail
+        /* Test decoding with input size being one byte too short => must fail */
         FUZ_DISPLAYTEST();
-        decodedBuffer[blockSize-1] = 0;
-        ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-1);
-        FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short");
-        FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe overrun specified output buffer size");
-
-        // Test decoding with output size being 10 bytes too short => must fail
-        FUZ_DISPLAYTEST();
-        if (blockSize>10) {
-            decodedBuffer[blockSize-10] = 0;
-            ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-10);
-            FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being 10 bytes too short");
-            FUZ_CHECKTEST(decodedBuffer[blockSize-10], "LZ4_decompress_safe overrun specified output buffer size");
+        {   int const r = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize-1, blockSize);
+            FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short (blockSize=%i, ret=%i, compressedSize=%i)", blockSize, ret, compressedSize);
         }
 
-        // Test decoding with input size being one byte too short => must fail
-        FUZ_DISPLAYTEST();
-        ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize-1, blockSize);
-        FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short (blockSize=%i, ret=%i, compressedSize=%i)", blockSize, ret, compressedSize);
-
-        // Test decoding with input size being one byte too large => must fail
+        /* Test decoding with input size being one byte too large => must fail */
         FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
-        ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize+1, blockSize);
-        FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large");
+        {   int const r = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize+1, blockSize);
+            FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to input size being too large");
+        }
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
 
         /* Test partial decoding => must work */
@@ -881,7 +865,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
 
         /* Compress HC using external dictionary stream */
         FUZ_DISPLAYTEST();
-        {   LZ4_streamHC_t* LZ4_streamHC = LZ4_createStreamHC();
+        {   LZ4_streamHC_t* const LZ4_streamHC = LZ4_createStreamHC();
 
             LZ4_loadDictHC(LZ4dictHC, dict, dictSize);
             LZ4_attach_HC_dictionary(LZ4_streamHC, LZ4dictHC);
@@ -1005,6 +989,30 @@ static void FUZ_unitTests(int compressionLevel)
     /* 32-bits address space overflow test */
     FUZ_AddressOverflow();
 
+    /* Test decoding with empty input */
+    DISPLAYLEVEL(3, "LZ4_decompress_safe() with empty input");
+    LZ4_decompress_safe(testCompressed, testVerify, 0, testInputSize);
+
+    /* Test decoding with a one byte input */
+    DISPLAYLEVEL(3, "LZ4_decompress_safe() with one byte input");
+    {   char const tmp = (char)0xFF;
+        LZ4_decompress_safe(&tmp, testVerify, 1, testInputSize);
+    }
+
+    /* Test decoding shortcut edge case */
+    DISPLAYLEVEL(3, "LZ4_decompress_safe() with shortcut edge case");
+    {   char tmp[17];
+        /* 14 bytes of literals, followed by a 14 byte match.
+         * Should not read beyond the end of the buffer.
+         * See https://github.com/lz4/lz4/issues/508. */
+        *tmp = (char)0xEE;
+        memset(tmp + 1, 0, 14);
+        tmp[15] = 14;
+        tmp[16] = 0;
+        {   int const r = LZ4_decompress_safe(tmp, testVerify, sizeof(tmp), testInputSize);
+            FUZ_CHECKTEST(r >= 0, "LZ4_decompress_safe() should fail");
+    }   }
+
     /* LZ4 streaming tests */
     {   LZ4_stream_t  streamingState;
         U64 crcOrig;
-- 
cgit v0.12