Merge pull request #756 from terrelln/destSize

[LZ4_compress_destSize + multi-blocks streaming] Fix rare data corruption bug
author: Yann Collet <Cyan4973@users.noreply.github.com> 2019-07-17 20:25:41 (GMT)
committer: GitHub <noreply@github.com> 2019-07-17 20:25:41 (GMT)
commit: 19b099986aeccc12bb46ad207fe8de5b36bdb7bc (patch)
tree: cbc38b9af41d1b2c21c56d019f0daf0f76b1093b /lib
parent: 7654a5a6d287e15e4c42b7d7386fbbd1b857f9e2 (diff)
parent: 13a2d9e34ffc4170720ce417c73e396d0ac1471a (diff)
download: lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.zip
lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.tar.gz
lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.tar.bz2
1 files changed, 32 insertions, 2 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 5b03e3d..74a9247 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -648,6 +648,18 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
     return LZ4_hash4(LZ4_read32(p), tableType);
 }
 
+static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+    }
+}
+
 static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 {
     switch (tableType)
@@ -848,6 +860,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
     for ( ; ; ) {
         const BYTE* match;
         BYTE* token;
+        const BYTE* filledIp;
 
         /* Find a match */
         if (tableType == byPtr) {
@@ -934,6 +947,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         }
 
         /* Catch up */
+        filledIp = ip;
         while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
 
         /* Encode Literals */
@@ -1013,12 +1027,26 @@ _next_match:
             }
 
             if ((outputDirective) &&    /* Check output buffer overflow */
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
                 if (outputDirective == fillOutput) {
                     /* Match description too long : reduce it */
-                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
                     ip -= matchCode - newMatchCode;
+                    assert(newMatchCode < matchCode);
                     matchCode = newMatchCode;
+                    if (unlikely(ip < filledIp)) {
+                        /* We have already filled up to filledIp so if ip ends up less than filledIp
+                         * we have positions in the hash table beyond the current position. This is
+                         * a problem if we reuse the hash table. So we have to remove these positions
+                         * from the hash table.
+                         */
+                        const BYTE* ptr;
+                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+                        for (ptr = ip + 1; ptr <= filledIp; ++ptr) {
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
+                        }
+                    }
                 } else {
                     assert(outputDirective == limitedOutput);
                     return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
@@ -1038,6 +1066,8 @@ _next_match:
             } else
                 *token += (BYTE)(matchCode);
         }
+        /* Ensure we have enough space for the last literals. */
+        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
 
         anchor = ip;
author	Yann Collet <Cyan4973@users.noreply.github.com>	2019-07-17 20:25:41 (GMT)
committer	GitHub <noreply@github.com>	2019-07-17 20:25:41 (GMT)
commit	19b099986aeccc12bb46ad207fe8de5b36bdb7bc (patch)
tree	cbc38b9af41d1b2c21c56d019f0daf0f76b1093b /lib
parent	7654a5a6d287e15e4c42b7d7386fbbd1b857f9e2 (diff)
parent	13a2d9e34ffc4170720ce417c73e396d0ac1471a (diff)
download	lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.zip lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.tar.gz lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.tar.bz2