summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <yann.collet.73@gmail.com>2014-10-20 23:12:55 (GMT)
committerYann Collet <yann.collet.73@gmail.com>2014-10-20 23:12:55 (GMT)
commite450018588560537c2c4b4b2dd3515a9ef3a83f7 (patch)
tree2b1144d85ee09dcef56f81fde5a696bb0c027207
parent3dab5f476a2e5a0cd4cd9a859e94a5110abda23d (diff)
downloadlz4-e450018588560537c2c4b4b2dd3515a9ef3a83f7.zip
lz4-e450018588560537c2c4b4b2dd3515a9ef3a83f7.tar.gz
lz4-e450018588560537c2c4b4b2dd3515a9ef3a83f7.tar.bz2
LZ4 HC : fixed small dictionary streaming compression
-rwxr-xr-x[-rw-r--r--]lz4.c6
-rwxr-xr-x[-rw-r--r--]lz4hc.c26
-rwxr-xr-x[-rw-r--r--]programs/fuzzer.c59
3 files changed, 70 insertions, 21 deletions
diff --git a/lz4.c b/lz4.c
index 39f176f..8adf96a 100644..100755
--- a/lz4.c
+++ b/lz4.c
@@ -876,8 +876,8 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
* in order to remove useless branches during compilation optimization.
*/
FORCE_INLINE int LZ4_decompress_generic(
- const char* source,
- char* dest,
+ const char* const source,
+ char* const dest,
int inputSize,
int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
@@ -885,7 +885,7 @@ FORCE_INLINE int LZ4_decompress_generic(
int partialDecoding, /* full, partial */
int targetOutputSize, /* only used if partialDecoding==partial */
int dict, /* noDict, withPrefix64k, usingExtDict */
- const char* dictStart, /* only if dict==usingExtDict */
+ const char* const dictStart, /* only if dict==usingExtDict */
int dictSize /* note : = 0 if noDict */
)
{
diff --git a/lz4hc.c b/lz4hc.c
index 72739a7..38e0ce4 100644..100755
--- a/lz4hc.c
+++ b/lz4hc.c
@@ -346,13 +346,13 @@ FORCE_INLINE void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* base)
{
MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
- hc4->nextToUpdate = 1;
- hc4->base = base;
+ hc4->nextToUpdate = 64 KB;
+ hc4->base = base - 64 KB;
hc4->inputBuffer = base;
hc4->end = base;
- hc4->dictBase = base;
- hc4->dictLimit = 0;
- hc4->lowLimit = 0;
+ hc4->dictBase = base - 64 KB;
+ hc4->dictLimit = 64 KB;
+ hc4->lowLimit = 64 KB;
}
@@ -422,17 +422,17 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, // I
const BYTE* const base = hc4->base;
const BYTE* const dictBase = hc4->dictBase;
const U32 dictLimit = hc4->dictLimit;
+ const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
U32 matchIndex;
- const U32 idxLow = (ip-base) > 64 KB ? (U32)(ip - base) - 64 KB : 0;
const BYTE* match;
int nbAttempts=maxNbAttempts;
size_t ml=0;
- /* HC4 match finder */
+ /* HC4 match finder */
LZ4HC_Insert(hc4, ip);
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
- while ((matchIndex>idxLow) && (nbAttempts))
+ while ((matchIndex>=lowLimit) && (nbAttempts))
{
nbAttempts--;
if (matchIndex >= dictLimit)
@@ -480,19 +480,19 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
U32* const HashTable = hc4->hashTable;
const BYTE* const base = hc4->base;
const U32 dictLimit = hc4->dictLimit;
- const U32 dictLowLimit = hc4->lowLimit;
+ const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
const BYTE* const dictBase = hc4->dictBase;
const BYTE* match;
U32 matchIndex;
- const U32 idxLow = (ip-base) > 64 KB ? (U32)(ip-base) - 64 KB : 0;
int nbAttempts = maxNbAttempts;
int delta = (int)(ip-iLowLimit);
- /* First Match */
+
+ /* First Match */
LZ4HC_Insert(hc4, ip);
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
- while ((matchIndex>idxLow) && (nbAttempts))
+ while ((matchIndex>=lowLimit) && (nbAttempts))
{
nbAttempts--;
if (matchIndex >= dictLimit)
@@ -527,7 +527,7 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
mlt += LZ4HC_CommonLength(ip+mlt, base+dictLimit, iHighLimit);
- while ((ip+back > iLowLimit) && (matchIndex+back > dictLowLimit) && (ip[back-1] == match[back-1])) back--;
+ while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--;
mlt -= back;
if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
}
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index cf56251..225361b 100644..100755
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -673,7 +673,7 @@ static void FUZ_unitTests(void)
const unsigned cycleNb= 0;
char testInput[testInputSize];
char testCompressed[testCompressedSize];
- char testVerify[testCompressedSize];
+ char testVerify[testInputSize];
U32 randState = 0;
// Init
@@ -719,7 +719,7 @@ static void FUZ_unitTests(void)
crcNew = XXH64(testVerify, testCompressedSize, 0);
FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption");
- // dictionary multi compression test
+ // multiple HC compression test with dictionary
{
int result1, result2;
int segSize = testCompressedSize / 2;
@@ -727,9 +727,9 @@ static void FUZ_unitTests(void)
LZ4_resetStreamHC(&sHC, 0);
LZ4_loadDictHC(&sHC, testInput, segSize);
result1 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + segSize, testCompressed, segSize, segSize -1);
- FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
- result2 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize -1);
- FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
+ FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result1);
+ result2 = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize-1);
+ FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result2);
result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result1, segSize, testInput, segSize);
FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 1 failed");
@@ -750,6 +750,55 @@ static void FUZ_unitTests(void)
FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe_usingDict() decompression failed following remote dictionary HC compression test");
crcNew = XXH64(testVerify, testCompressedSize, 0);
FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() decompression corruption");
+
+ // multiple HC compression with ext. dictionary
+ {
+ XXH64_state_t crcOrigState;
+ XXH64_state_t crcNewState;
+ const char* dict = testInput + 3;
+ int dictSize = (FUZ_rand(&randState) & 8191);
+ char* dst = testVerify;
+
+ size_t segStart = dictSize + 7;
+ int segSize = (FUZ_rand(&randState) & 8191);
+ int segNb = 1;
+
+ LZ4_resetStreamHC(&sHC, 0);
+ LZ4_loadDictHC(&sHC, dict, dictSize);
+
+ XXH64_reset(&crcOrigState, 0);
+ XXH64_reset(&crcNewState, 0);
+
+ while (segStart + segSize < testInputSize)
+ {
+ XXH64_update(&crcOrigState, testInput + segStart, segSize);
+ crcOrig = XXH64_digest(&crcOrigState);
+ result = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + segStart, testCompressed, segSize, LZ4_compressBound(segSize));
+ FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
+
+ result = LZ4_decompress_safe_usingDict(testCompressed, dst, result, segSize, dict, dictSize);
+ FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe_usingDict() dictionary decompression part %i failed", segNb);
+ XXH64_update(&crcNewState, dst, segSize);
+ crcNew = XXH64_digest(&crcNewState);
+ if (crcOrig!=crcNew)
+ {
+ size_t c=0;
+ while (dst[c] == testInput[segStart+c]) c++;
+ DISPLAY("Bad decompression at %u / %u \n", (U32)c, (U32)segSize);
+ }
+ FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() part %i corruption", segNb);
+
+ dict = dst;
+ //dict = testInput + segStart;
+ dictSize = segSize;
+
+ dst += segSize + 1;
+ segNb ++;
+
+ segStart += segSize + (FUZ_rand(&randState) & 0xF) + 1;
+ segSize = (FUZ_rand(&randState) & 8191);
+ }
+ }
}
printf("All unit tests completed succesfully \n");