diff options
-rw-r--r-- | lib/lz4.h | 14 | ||||
-rw-r--r-- | lib/lz4hc.c | 94 | ||||
-rw-r--r-- | programs/util.h | 2 | ||||
-rw-r--r-- | tests/fullbench.c | 34 | ||||
-rw-r--r-- | visual/.gitignore | 2 |
5 files changed, 79 insertions, 67 deletions
@@ -317,15 +317,19 @@ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const * If less than 64KB of data has been decoded all the data must be present. * * Special : if application sets a ring buffer for decompression, it must respect one of the following conditions : - * - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) - * In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). - * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. - * maxBlockSize is implementation dependent. It's the maximum size of any single block. + * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is the maximum size of any single block. It is implementation dependent, and can have any value (presumed > 16 bytes). * In which case, encoding and decoding buffers do not need to be synchronized, * and encoding ring buffer can have any size, including small ones ( < 64 KB). - * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * - Decompression buffer size is _at least_ 64 KB + 8 bytes + maxBlockSize. * In which case, encoding and decoding buffers do not need to be synchronized, * and encoding ring buffer can have any size, including larger than decoding buffer. + * - Decompression buffer size is exactly the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions). + * If the decoding function is provided with the exact decompressed size of each block, + * then decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). + * If the decoding function only knows the compressed size, + * then buffer size must be a minimum of 64 KB + 8 bytes + maxBlockSize. * Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, * and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block. */ diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 948d66d..bb1b1a6 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -138,6 +138,7 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, { int back = 0; int const min = (int)MAX(iMin - ip, mMin - match); + assert(min <= 0); assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); while ( (back > min) @@ -222,9 +223,9 @@ LZ4HC_InsertAndGetWiderMatch ( const U32 dictLimit = hc4->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; const U32 ipIndex = (U32)(ip - base); - const U32 lowLimit = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE; + const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE; const BYTE* const dictBase = hc4->dictBase; - int const delta = (int)(ip-iLowLimit); + int const lookBackLength = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; U32 const pattern = LZ4_read32(ip); U32 matchIndex; @@ -236,10 +237,10 @@ LZ4HC_InsertAndGetWiderMatch ( /* First Match */ LZ4HC_Insert(hc4, ip); matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - DEBUGLOG(7, "First match at index %u / %u (lowLimit)", - matchIndex, lowLimit); + DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", + matchIndex, lowestMatchIndex); - while ((matchIndex>=lowLimit) && (nbAttempts)) { + while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { DEBUGLOG(7, "remaining attempts : %i", nbAttempts); nbAttempts--; assert(matchIndex < ipIndex); @@ -247,34 +248,35 @@ LZ4HC_InsertAndGetWiderMatch ( /* do nothing */ } else if (matchIndex >= dictLimit) { const BYTE* const matchPtr = base + matchIndex; + assert(matchPtr >= lowPrefixPtr); + assert(matchPtr < ip); assert(longest >= 1); - if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - delta + longest - 1)) { + if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - int const back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; + int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; mlt -= back; - if (mlt > longest) { longest = mlt; *matchpos = matchPtr+back; *startpos = ip+back; - } } - } + } } } } else { /* matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; if (LZ4_read32(matchPtr) == pattern) { + const BYTE* const dictStart = dictBase + hc4->lowLimit; int mlt; int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) - mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); - back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictBase+lowLimit) : 0; + mlt += LZ4_count(ip+mlt, lowPrefixPtr, iHighLimit); + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; mlt -= back; if (mlt > longest) { longest = mlt; - *matchpos = base + matchIndex + back; + *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ *startpos = ip + back; } } } @@ -306,13 +308,13 @@ LZ4HC_InsertAndGetWiderMatch ( matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */ } } } } } - } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */ + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - if (dict == usingDictCtx && nbAttempts && ipIndex - lowLimit < MAX_DISTANCE) { + if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) { size_t const dictEndOffset = dictCtx->end - dictCtx->base; assert(dictEndOffset <= 1 GB); dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; - matchIndex = dictMatchIndex + lowLimit - (U32)dictEndOffset; + matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; while (ipIndex - matchIndex <= MAX_DISTANCE && nbAttempts--) { const BYTE* const matchPtr = dictCtx->base + dictMatchIndex; @@ -322,7 +324,7 @@ LZ4HC_InsertAndGetWiderMatch ( const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0; + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0; mlt -= back; if (mlt > longest) { longest = mlt; @@ -459,14 +461,14 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( BYTE* op = (BYTE*) dest; BYTE* oend = op + maxOutputSize; - int ml, ml2, ml3, ml0; + int ml0, ml, ml2, ml3; + const BYTE* start0; + const BYTE* ref0; const BYTE* ref = NULL; const BYTE* start2 = NULL; const BYTE* ref2 = NULL; const BYTE* start3 = NULL; const BYTE* ref3 = NULL; - const BYTE* start0; - const BYTE* ref0; /* init */ *srcSizePtr = 0; @@ -479,31 +481,27 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( if (ml<MINMATCH) { ip++; continue; } /* saved, in case we would skip too much */ - start0 = ip; - ref0 = ref; - ml0 = ml; + start0 = ip; ref0 = ref; ml0 = ml; _Search2: - if (ip+ml <= mflimit) + if (ip+ml <= mflimit) { ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts, patternAnalysis, dict, favorCompressionRatio); - else + } else { ml2 = ml; + } - if (ml2 == ml) { /* No better match */ + if (ml2 == ml) { /* No better match => encode ML1 */ optr = op; if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; continue; } - if (start0 < ip) { - if (start2 < ip + ml0) { /* empirical */ - ip = start0; - ref = ref0; - ml = ml0; - } - } + if (start0 < ip) { /* first match was skipped at least once */ + if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ + } } /* Here, start0==ip */ if ((start2 - ip) < 3) { /* First Match too small : removed */ @@ -531,14 +529,15 @@ _Search3: } /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ - if (start2 + ml2 <= mflimit) + if (start2 + ml2 <= mflimit) { ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts, patternAnalysis, dict, favorCompressionRatio); - else + } else { ml3 = ml2; + } - if (ml3 == ml2) { /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ /* ip & ref are known; Now for ml */ if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ @@ -583,11 +582,12 @@ _Search3: } /* - * OK, now we have 3 ascending matches; let's write at least the first one - * ip & ref are known; Now for ml + * OK, now we have 3 ascending matches; + * let's write the first one ML1. + * ip & ref are known; Now decide ml. */ if (start2 < ip+ml) { - if ((start2 - ip) < (int)ML_MASK) { + if ((start2 - ip) < OPTIMAL_ML) { int correction; if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; @@ -604,14 +604,13 @@ _Search3: optr = op; if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; - ip = start2; - ref = ref2; - ml = ml2; + /* ML2 becomes ML1 */ + ip = start2; ref = ref2; ml = ml2; - start2 = start3; - ref2 = ref3; - ml2 = ml3; + /* ML3 becomes ML2 */ + start2 = start3; ref2 = ref3; ml2 = ml3; + /* let's find a new ML3 */ goto _Search3; } @@ -705,8 +704,6 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( ctx->end += *srcSizePtr; if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); - assert(cLevel >= 0); - assert(cLevel <= LZ4HC_CLEVEL_MAX); { cParams_t const cParam = clTable[cLevel]; HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; if (cParam.strat == lz4hc) @@ -905,7 +902,8 @@ void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) { DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); - if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ + if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4) + LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ ctxPtr->lowLimit = ctxPtr->dictLimit; diff --git a/programs/util.h b/programs/util.h index ef6ca77..d74db0d 100644 --- a/programs/util.h +++ b/programs/util.h @@ -194,7 +194,7 @@ extern "C" { return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom); } -#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2)) +#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || (defined(__GLIBC__) && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2) ) ) #include <time.h> typedef struct timespec UTIL_time_t; diff --git a/tests/fullbench.c b/tests/fullbench.c index ee2966f..c06e230 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -267,13 +267,20 @@ static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int return outSize; } -static int local_LZ4_decompress_fast_usingDict(const char* in, char* out, int inSize, int outSize) +static int local_LZ4_decompress_fast_usingDict_prefix(const char* in, char* out, int inSize, int outSize) { (void)inSize; LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65536); return outSize; } +static int local_LZ4_decompress_fast_usingExtDict(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65535); + return outSize; +} + static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int inSize, int outSize) { (void)inSize; @@ -460,7 +467,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) double averageTime; clock_t clockTime; - PROGRESS("%1i- %-28.28s :%9i ->\r", loopNb, compressorName, (int)benchedSize); + PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, compressorName, (int)benchedSize); { size_t i; for (i=0; i<benchedSize; i++) compressed_buff[i]=(char)i; } /* warming up memory */ nb_loops = 0; @@ -471,7 +478,8 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) if (initFunction!=NULL) initFunction(); for (chunkNb=0; chunkNb<nbChunks; chunkNb++) { chunkP[chunkNb].compressedSize = compressionFunction(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize); - if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1); + if (chunkP[chunkNb].compressedSize==0) + DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1); } nb_loops++; } @@ -482,13 +490,13 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) if (averageTime < bestTime) bestTime = averageTime; cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].compressedSize; ratio = (double)cSize/(double)benchedSize*100.; - PROGRESS("%1i- %-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000); + PROGRESS("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000); } if (ratio<100.) - DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000); + DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000); else - DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000); + DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000); } /* Prepare layout for decompression */ @@ -509,11 +517,12 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) } for (chunkNb=0; chunkNb<nbChunks; chunkNb++) { chunkP[chunkNb].compressedSize = LZ4_compress_default(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize, maxCompressedChunkSize); - if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1); + if (chunkP[chunkNb].compressedSize==0) + DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1); } /* Decompression Algorithms */ - for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && (g_decompressionTest); dAlgNb++) { + for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && g_decompressionTest; dAlgNb++) { const char* dName; int (*decompressionFunction)(const char*, char*, int, int); double bestTime = 100000000.; @@ -524,7 +533,8 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) { case 0: DISPLAY("Decompression functions : \n"); continue; case 1: decompressionFunction = local_LZ4_decompress_fast; dName = "LZ4_decompress_fast"; break; - case 3: decompressionFunction = local_LZ4_decompress_fast_usingDict; dName = "LZ4_decompress_fast_usingDict"; break; + case 2: decompressionFunction = local_LZ4_decompress_fast_usingDict_prefix; dName = "LZ4_decompress_fast_usingDict(prefix)"; break; + case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break; case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break; case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break; case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break; @@ -555,7 +565,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) clock_t clockTime; U32 crcDecoded; - PROGRESS("%1i- %-29.29s :%10i ->\r", loopNb, dName, (int)benchedSize); + PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, dName, (int)benchedSize); nb_loops = 0; clockTime = clock(); @@ -574,14 +584,14 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) averageTime = (double)clockTime / nb_loops / CLOCKS_PER_SEC; if (averageTime < bestTime) bestTime = averageTime; - PROGRESS("%1i- %-29.29s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); + PROGRESS("%2i-%-34.34s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); /* CRC Checking */ crcDecoded = XXH32(orig_buff, (int)benchedSize, 0); if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); } } - DISPLAY("%2i-%-29.29s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); + DISPLAY("%2i-%-34.34s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); } } free(orig_buff); diff --git a/visual/.gitignore b/visual/.gitignore index dea92fc..276f8f5 100644 --- a/visual/.gitignore +++ b/visual/.gitignore @@ -6,5 +6,5 @@ *.sdf *.suo *.user - +ver*/ VS2010/bin/ |