summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <cyan@fb.com>2022-09-15 23:25:00 (GMT)
committerYann Collet <cyan@fb.com>2022-09-15 23:25:00 (GMT)
commitec0d3e6ec9feeac2ad535b97da70c0ce00a0b81e (patch)
treeace41bb515aa6dc7f09e14160032bddcaa8de2fb
parentc619263b1719b8fe7f5994e2ca39dff1a3fa4ebd (diff)
downloadlz4-ec0d3e6ec9feeac2ad535b97da70c0ce00a0b81e.zip
lz4-ec0d3e6ec9feeac2ad535b97da70c0ce00a0b81e.tar.gz
lz4-ec0d3e6ec9feeac2ad535b97da70c0ce00a0b81e.tar.bz2
fix benchmark more using Dictionary
benchmark dictionary mode, implemented in #808, is incorrect. It would desynchronize compression and decompression as soon as 2+ files are dictionary compressed. Also : slightly improved traces system, to also include __LINE__ number
-rw-r--r--lib/lz4.c29
-rw-r--r--lib/lz4hc.c13
-rw-r--r--programs/bench.c2
3 files changed, 35 insertions, 9 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 8999137..3d5fba6 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -279,7 +279,7 @@ static const int LZ4_minLength = (MFLIMIT+1);
static int g_debuglog_enable = 1;
# define DEBUGLOG(l, ...) { \
if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
- fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __FILE__ " %i: ", __LINE__); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, " \n"); \
} }
@@ -1991,6 +1991,7 @@ LZ4_decompress_generic(
}
/* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
+ DEBUGLOG(6, "using fast decode loop");
while (1) {
/* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
@@ -2001,7 +2002,10 @@ LZ4_decompress_generic(
/* decode literal length */
if (length == RUN_MASK) {
size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
- if (addl == rvl_error) { goto _output_error; }
+ if (addl == rvl_error) {
+ DEBUGLOG(6, "error reading long literal length");
+ goto _output_error;
+ }
length += addl;
if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
@@ -2024,6 +2028,7 @@ LZ4_decompress_generic(
/* get offset */
offset = LZ4_readLE16(ip); ip+=2;
+ DEBUGLOG(6, " offset = %zu", offset);
match = op - offset;
assert(match <= op); /* overflow check */
@@ -2032,11 +2037,17 @@ LZ4_decompress_generic(
if (length == ML_MASK) {
size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
- if (addl == rvl_error) { goto _output_error; }
+ if (addl == rvl_error) {
+ DEBUGLOG(6, "error reading long match length");
+ goto _output_error;
+ }
length += addl;
length += MINMATCH;
if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
- if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) {
+ DEBUGLOG(6, "Error : offset outside buffers");
+ goto _output_error;
+ }
if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
goto safe_match_copy;
}
@@ -2060,7 +2071,10 @@ LZ4_decompress_generic(
continue;
} } }
- if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) {
+ DEBUGLOG(6, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match);
+ goto _output_error;
+ }
/* match starting within external dictionary */
if ((dict==usingExtDict) && (match < lowPrefix)) {
assert(dictEnd != NULL);
@@ -2069,7 +2083,8 @@ LZ4_decompress_generic(
DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
length = MIN(length, (size_t)(oend-op));
} else {
- goto _output_error; /* end-of-block condition violated */
+ DEBUGLOG(6, "end-of-block condition violated")
+ goto _output_error;
} }
if (length <= (size_t)(lowPrefix-match)) {
@@ -2109,6 +2124,7 @@ LZ4_decompress_generic(
#endif
/* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+ DEBUGLOG(6, "using safe decode loop");
while (1) {
assert(ip < iend);
token = *ip++;
@@ -2416,6 +2432,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
int compressedSize, int maxOutputSize,
const void* dictStart, size_t dictSize)
{
+ DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict");
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
decode_full_block, usingExtDict,
(BYTE*)dest, (const BYTE*)dictStart, dictSize);
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index e83246b..fa3e014 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -102,6 +102,7 @@ static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
{
size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart);
size_t newStartingOffset = bufferSize + hc4->dictLimit;
+ DEBUGLOG(5, "LZ4HC_init_internal");
assert(newStartingOffset >= bufferSize); /* check overflow */
if (newStartingOffset > 1 GB) {
LZ4HC_clearTables(hc4);
@@ -422,6 +423,7 @@ LZ4HC_InsertAndGetWiderMatch (
U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
assert(dictEndOffset <= 1 GB);
matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
+ if (dictMatchIndex>0) DEBUGLOG(7, "dictEndOffset = %zu, dictMatchIndex = %u => relative matchIndex = %i", dictEndOffset, dictMatchIndex, (int)dictMatchIndex - (int)dictEndOffset);
while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex;
@@ -437,6 +439,7 @@ LZ4HC_InsertAndGetWiderMatch (
longest = mlt;
*matchpos = prefixPtr - prefixIdx + matchIndex + back;
*startpos = ip + back;
+ DEBUGLOG(8, "found match of length %i at vPos=%i", longest, (int)matchIndex - (int)prefixIdx + back);
} }
{ U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
@@ -456,6 +459,7 @@ LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table wi
const dictCtx_directive dict)
{
const BYTE* uselessPtr = ip;
+ DEBUGLOG(7, "LZ4HC_InsertAndFindBestMatch");
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
* but this won't be the case here, as we define iLowLimit==ip,
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
@@ -585,6 +589,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
const BYTE* ref3 = NULL;
/* init */
+ DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict);
*srcSizePtr = 0;
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
@@ -831,8 +836,8 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
{ lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
};
- DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
- ctx, src, *srcSizePtr, limit);
+ DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)",
+ src, *srcSizePtr);
if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
@@ -966,6 +971,7 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
LZ4_streamHC_t state;
LZ4_streamHC_t* const statePtr = &state;
#endif
+ DEBUGLOG(5, "LZ4_compress_HC")
cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
FREEMEM(statePtr);
@@ -1034,7 +1040,7 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
{
LZ4HC_CCtx_internal* const s = &LZ4_streamHCPtr->internal_donotuse;
- DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+ DEBUGLOG(5, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
if (s->dirty) {
LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
} else {
@@ -1150,6 +1156,7 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
{
+ DEBUGLOG(5, "LZ4_compress_HC_continue");
if (dstCapacity < LZ4_compressBound(srcSize))
return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
else
diff --git a/programs/bench.c b/programs/bench.c
index 4d35ef9..f7eb63d 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -242,6 +242,7 @@ LZ4_compressBlockStream(const struct compressionParameters* pThis,
int srcSize, int dstSize)
{
int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1;
+ LZ4_compressResetStream(pThis);
return LZ4_compress_fast_continue(pThis->LZ4_stream, src, dst, srcSize, dstSize, acceleration);
}
@@ -250,6 +251,7 @@ LZ4_compressBlockStreamHC(const struct compressionParameters* pThis,
const char* src, char* dst,
int srcSize, int dstSize)
{
+ LZ4_compressResetStreamHC(pThis);
return LZ4_compress_HC_continue(pThis->LZ4_streamHC, src, dst, srcSize, dstSize);
}