From e450018588560537c2c4b4b2dd3515a9ef3a83f7 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Tue, 21 Oct 2014 00:12:55 +0100
Subject: LZ4 HC : fixed small dictionary streaming compression

---
 lz4.c             |  6 +++---
 lz4hc.c           | 26 ++++++++++++------------
 programs/fuzzer.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 70 insertions(+), 21 deletions(-)
 mode change 100644 => 100755 lz4.c
 mode change 100644 => 100755 lz4hc.c
 mode change 100644 => 100755 programs/fuzzer.c

diff --git a/lz4.c b/lz4.c
old mode 100644
new mode 100755
index 39f176f..8adf96a
--- a/lz4.c
+++ b/lz4.c
@@ -876,8 +876,8 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
  * in order to remove useless branches during compilation optimization.
  */
 FORCE_INLINE int LZ4_decompress_generic(
-                 const char* source,
-                 char* dest,
+                 const char* const source,
+                 char* const dest,
                  int inputSize,
                  int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
 
@@ -885,7 +885,7 @@ FORCE_INLINE int LZ4_decompress_generic(
                  int partialDecoding,    /* full, partial */
                  int targetOutputSize,   /* only used if partialDecoding==partial */
                  int dict,               /* noDict, withPrefix64k, usingExtDict */
-                 const char* dictStart,  /* only if dict==usingExtDict */
+                 const char* const dictStart,  /* only if dict==usingExtDict */
                  int dictSize            /* note : = 0 if noDict */
                  )
 {
diff --git a/lz4hc.c b/lz4hc.c
old mode 100644
new mode 100755
index 72739a7..38e0ce4
--- a/lz4hc.c
+++ b/lz4hc.c
@@ -346,13 +346,13 @@ FORCE_INLINE void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* base)
 {
     MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
-    hc4->nextToUpdate = 1;
-    hc4->base = base;
+    hc4->nextToUpdate = 64 KB;
+    hc4->base = base - 64 KB;
     hc4->inputBuffer = base;
     hc4->end = base;
-    hc4->dictBase = base;
-    hc4->dictLimit = 0;
-    hc4->lowLimit = 0;
+    hc4->dictBase = base - 64 KB;
+    hc4->dictLimit = 64 KB;
+    hc4->lowLimit = 64 KB;
 }
 
 
@@ -422,17 +422,17 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   // I
     const BYTE* const base = hc4->base;
     const BYTE* const dictBase = hc4->dictBase;
     const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
     U32 matchIndex;
-    const U32 idxLow = (ip-base) > 64 KB ? (U32)(ip - base) - 64 KB : 0;
     const BYTE* match;
     int nbAttempts=maxNbAttempts;
     size_t ml=0;
 
-    /* HC4 match finder */
+	/* HC4 match finder */
     LZ4HC_Insert(hc4, ip);
     matchIndex = HashTable[LZ4HC_hashPtr(ip)];
 
-    while ((matchIndex>idxLow) && (nbAttempts))
+    while ((matchIndex>=lowLimit) && (nbAttempts))
     {
         nbAttempts--;
         if (matchIndex >= dictLimit)
@@ -480,19 +480,19 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
     U32* const HashTable = hc4->hashTable;
     const BYTE* const base = hc4->base;
     const U32 dictLimit = hc4->dictLimit;
-	const U32 dictLowLimit = hc4->lowLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
     const BYTE* const dictBase = hc4->dictBase;
     const BYTE* match;
     U32   matchIndex;
-    const U32 idxLow = (ip-base) > 64 KB ? (U32)(ip-base) - 64 KB : 0;
     int nbAttempts = maxNbAttempts;
     int delta = (int)(ip-iLowLimit);
 
-    /* First Match */
+
+	/* First Match */
     LZ4HC_Insert(hc4, ip);
     matchIndex = HashTable[LZ4HC_hashPtr(ip)];
 
-    while ((matchIndex>idxLow) && (nbAttempts))
+    while ((matchIndex>=lowLimit) && (nbAttempts))
     {
         nbAttempts--;
         if (matchIndex >= dictLimit)
@@ -527,7 +527,7 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
                 mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
                 if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
                     mlt += LZ4HC_CommonLength(ip+mlt, base+dictLimit, iHighLimit);
-				while ((ip+back > iLowLimit) && (matchIndex+back > dictLowLimit) && (ip[back-1] == match[back-1])) back--;
+				while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--;
 				mlt -= back;
                 if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
             }
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
old mode 100644
new mode 100755
index cf56251..225361b
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -673,7 +673,7 @@ static void FUZ_unitTests(void)
     const unsigned cycleNb= 0;
     char testInput[testInputSize];
     char testCompressed[testCompressedSize];
-    char testVerify[testCompressedSize];
+    char testVerify[testInputSize];
     U32 randState = 0;
 
     // Init
@@ -719,7 +719,7 @@ static void FUZ_unitTests(void)
         crcNew = XXH64(testVerify, testCompressedSize, 0);
         FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption");
 
-        // dictionary multi compression test
+        // multiple HC compression test with dictionary
         {
             int result1, result2;
             int segSize = testCompressedSize / 2;
@@ -727,9 +727,9 @@ static void FUZ_unitTests(void)
             LZ4_resetStreamHC(&sHC, 0);
             LZ4_loadDictHC(&sHC, testInput, segSize);
             result1 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + segSize, testCompressed, segSize, segSize -1);
-            FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
-            result2 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize -1);
-            FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
+            FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result1);
+            result2 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize-1);
+            FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result2);
 
             result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result1, segSize, testInput, segSize);
             FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 1 failed");
@@ -750,6 +750,55 @@ static void FUZ_unitTests(void)
         FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe_usingDict() decompression failed following remote dictionary HC compression test");
         crcNew = XXH64(testVerify, testCompressedSize, 0);
         FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() decompression corruption");
+
+        // multiple HC compression with ext. dictionary
+        {
+            XXH64_state_t crcOrigState;
+            XXH64_state_t crcNewState;
+            const char* dict = testInput + 3;
+            int dictSize = (FUZ_rand(&randState) & 8191);
+            char* dst = testVerify;
+
+            size_t segStart = dictSize + 7;
+            int segSize = (FUZ_rand(&randState) & 8191);
+            int segNb = 1;
+
+            LZ4_resetStreamHC(&sHC, 0);
+            LZ4_loadDictHC(&sHC, dict, dictSize);
+
+            XXH64_reset(&crcOrigState, 0);
+            XXH64_reset(&crcNewState, 0);
+
+            while (segStart + segSize < testInputSize)
+            {
+                XXH64_update(&crcOrigState, testInput + segStart, segSize);
+                crcOrig = XXH64_digest(&crcOrigState);
+                result = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + segStart, testCompressed, segSize, LZ4_compressBound(segSize));
+                FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
+
+                result = LZ4_decompress_safe_usingDict(testCompressed, dst, result, segSize, dict, dictSize);
+                FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe_usingDict() dictionary decompression part %i failed", segNb);
+                XXH64_update(&crcNewState, dst, segSize);
+                crcNew = XXH64_digest(&crcNewState);
+				if (crcOrig!=crcNew)
+				{
+					size_t c=0;
+					while (dst[c] == testInput[segStart+c]) c++;
+					DISPLAY("Bad decompression at %u / %u \n", (U32)c, (U32)segSize);
+				}
+                FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() part %i corruption", segNb);
+
+                dict = dst;
+                //dict = testInput + segStart;
+                dictSize = segSize;
+
+                dst += segSize + 1;
+                segNb ++;
+
+                segStart += segSize + (FUZ_rand(&randState) & 0xF) + 1;
+                segSize = (FUZ_rand(&randState) & 8191);
+            }
+        }
     }
 
     printf("All unit tests completed succesfully \n");
-- 
cgit v0.12