From d8aafe2c52b4b16da9e8d289ad9bdb04b0bc4a05 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 Aug 2017 00:48:19 -0700 Subject: dictionary compression correctly uses compression level Not obvious : copying the state was copying cdict's compression level --- doc/lz4_manual.html | 36 ++++++++++++++++++++---------------- doc/lz4frame_manual.html | 2 +- lib/lz4.h | 36 ++++++++++++++++++------------------ lib/lz4frame.c | 23 +++++++++++++---------- lib/lz4frame.h | 4 +++- lib/lz4hc.c | 12 ++++++++++++ lib/lz4hc.h | 23 ++++++++++++++++------- tests/frametest.c | 24 ++++++++++++++++++++++++ 8 files changed, 107 insertions(+), 53 deletions(-) diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html index 60b41a8..ecc985d 100644 --- a/doc/lz4_manual.html +++ b/doc/lz4_manual.html @@ -175,8 +175,8 @@ int LZ4_freeStream (LZ4_stream_t* streamPtr); Important : Previous data blocks are assumed to remain present and unmodified ! 'dst' buffer must be already allocated. If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. - If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero. - After an error, the stream status is invalid, and it can only be reset or freed. + If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function @return==0. + After an error, the stream status is invalid, it can only be reset or freed.


@@ -205,20 +205,20 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
-

These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB) - In the case of a ring buffers, decoding buffer must be either : - - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) - In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). - - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. - maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including small ones ( < 64 KB). - - _At least_ 64 KB + 8 bytes + maxBlockSize. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including larger than decoding buffer. - Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, - and indicate where it is saved using LZ4_setStreamDecode() +

These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB) + In the case of a ring buffers, decoding buffer must be either : + - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) + In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). + - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block. + In which case, encoding and decoding buffers do not need to be synchronized, + and encoding ring buffer can have any size, including small ones ( < 64 KB). + - _At least_ 64 KB + 8 bytes + maxBlockSize. + In which case, encoding and decoding buffers do not need to be synchronized, + and encoding ring buffer can have any size, including larger than decoding buffer. + Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, + and indicate where it is saved using LZ4_setStreamDecode()


int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
@@ -321,5 +321,9 @@ union LZ4_streamDecode_u {
    Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS 
 


+

Experimental API section

#ifdef LZ4_HC_STATIC_LINKING_ONLY
+#ifndef LZ4_SLO_299387928743
+#define LZ4_SLO_299387928743
+

diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html index dc8251e..a1a6e96 100644 --- a/doc/lz4frame_manual.html +++ b/doc/lz4frame_manual.html @@ -74,7 +74,7 @@ LZ4F_contentChecksum_t contentChecksumFlag;
/* noContentChecksum, contentChecksumEnabled ; 0 == default */ LZ4F_frameType_t frameType; /* LZ4F_frame, skippableFrame ; 0 == default */ unsigned long long contentSize; /* Size of uncompressed content ; 0 == unknown */ - unsigned dictID; /* Dictionary ID, sent by the compressor, to help the decoder select the right dictionary; 0 == no dictionary used */ + unsigned dictID; /* Dictionary ID, sent by the compressor to help decoder select the correct dictionary; 0 == no dictID provided */ unsigned reserved[1]; /* must be zero for forward compatibility */ } LZ4F_frameInfo_t;

makes it possible to supply detailed frame parameters to the stream interface. diff --git a/lib/lz4.h b/lib/lz4.h index c5b6103..86ca0d5 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -259,8 +259,8 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in * Important : Previous data blocks are assumed to remain present and unmodified ! * 'dst' buffer must be already allocated. * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. - * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero. - * After an error, the stream status is invalid, and it can only be reset or freed. + * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function @return==0. + * After an error, the stream status is invalid, it can only be reset or freed. */ LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); @@ -291,22 +291,21 @@ LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_str */ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); -/*! -LZ4_decompress_*_continue() : - These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB) - In the case of a ring buffers, decoding buffer must be either : - - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) - In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). - - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. - maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including small ones ( < 64 KB). - - _At least_ 64 KB + 8 bytes + maxBlockSize. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including larger than decoding buffer. - Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, - and indicate where it is saved using LZ4_setStreamDecode() +/*! LZ4_decompress_*_continue() : + * These decoding functions allow decompression of multiple blocks in "streaming" mode. + * Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB) + * In the case of a ring buffers, decoding buffer must be either : + * - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) + * In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). + * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including larger than decoding buffer. + * Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, + * and indicate where it is saved using LZ4_setStreamDecode() */ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize); LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize); @@ -458,6 +457,7 @@ LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4 #endif /* LZ4_H_2983827168210 */ + #if defined (__cplusplus) } #endif diff --git a/lib/lz4frame.c b/lib/lz4frame.c index 180658d..9aeb456 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -61,6 +61,7 @@ You can contact the author at : **************************************/ #include "lz4frame_static.h" #include "lz4.h" +#define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" #define XXH_STATIC_LINKING_ONLY #include "xxhash.h" @@ -217,6 +218,8 @@ static LZ4F_errorCode_t err0r(LZ4F_errorCodes code) unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; } +int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; } + /*-************************************ * Private functions @@ -308,7 +311,7 @@ size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* prefere * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). * The LZ4F_preferences_t structure is optional : you may provide NULL as argument, * however, it's the only way to provide a dictID, so it's not recommended. - * @return : number of bytes written into dstBuffer. + * @return : number of bytes written into dstBuffer, * or an error code if it fails (can be tested using LZ4F_isError()) */ size_t LZ4F_compressFrame_usingCDict(void* dstBuffer, size_t dstCapacity, @@ -535,8 +538,10 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr, if (cdict) { if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) { memcpy(cctxPtr->lz4CtxPtr, cdict->fastCtx, sizeof(*cdict->fastCtx)); - } else + } else { memcpy(cctxPtr->lz4CtxPtr, cdict->HCCtx, sizeof(*cdict->HCCtx)); + LZ4_setCompressionLevel((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel); + } } else { if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr)); @@ -659,14 +664,12 @@ static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr) typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus; /*! LZ4F_compressUpdate() : -* LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary. -* The most important rule is that dstBuffer MUST be large enough (dstCapacity) to ensure compression completion even in worst case. -* If this condition is not respected, LZ4F_compress() will fail (result is an errorCode) -* You can get the minimum value of dstCapacity by using LZ4F_compressBound() -* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument. -* The result of the function is the number of bytes written into dstBuffer : it can be zero, meaning input data was just buffered. -* The function outputs an error code if it fails (can be tested using LZ4F_isError()) -*/ + * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary. + * dstBuffer MUST be >= LZ4F_compressBound(srcSize, preferencesPtr). + * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument. + * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered. + * or an error code if it fails (which can be tested using LZ4F_isError()) + */ size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* compressOptionsPtr) { LZ4F_compressOptions_t cOptionsNull; diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 42e2f53..307d7b2 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -162,7 +162,7 @@ typedef struct { LZ4F_contentChecksum_t contentChecksumFlag; /* noContentChecksum, contentChecksumEnabled ; 0 == default */ LZ4F_frameType_t frameType; /* LZ4F_frame, skippableFrame ; 0 == default */ unsigned long long contentSize; /* Size of uncompressed content ; 0 == unknown */ - unsigned dictID; /* Dictionary ID, sent by the compressor, to help the decoder select the right dictionary; 0 == no dictionary used */ + unsigned dictID; /* Dictionary ID, sent by the compressor to help decoder select the correct dictionary; 0 == no dictID provided */ unsigned reserved[1]; /* must be zero for forward compatibility */ } LZ4F_frameInfo_t; @@ -177,6 +177,8 @@ typedef struct { unsigned reserved[4]; /* must be zero for forward compatibility */ } LZ4F_preferences_t; +LZ4FLIB_API int LZ4F_compressionLevel_max(); + /*-********************************* * Simple compression function diff --git a/lib/lz4hc.c b/lib/lz4hc.c index b17760d..22eb071 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -69,6 +69,8 @@ /*=== Macros ===*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) #define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ #define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ @@ -627,6 +629,16 @@ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) LZ4_streamHCPtr->internal_donotuse.searchNum = LZ4HC_getSearchNum(compressionLevel); } +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + int const currentCLevel = LZ4_streamHCPtr->internal_donotuse.compressionLevel; + int const minCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? 1 : LZ4HC_CLEVEL_OPT_MIN; + int const maxCLevel = currentCLevel < LZ4HC_CLEVEL_OPT_MIN ? LZ4HC_CLEVEL_OPT_MIN-1 : LZ4HC_CLEVEL_MAX; + compressionLevel = MIN(compressionLevel, minCLevel); + compressionLevel = MAX(compressionLevel, maxCLevel); + LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel; +} + int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize) { LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; diff --git a/lib/lz4hc.h b/lib/lz4hc.h index b63c825..66d5636 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -161,7 +161,7 @@ typedef struct typedef struct { unsigned int hashTable[LZ4HC_HASHTABLESIZE]; - unsigned short chainTable[LZ4HC_MAXD]; + unsigned short chainTable[LZ4HC_MAXD]; const unsigned char* end; /* next block here to continue on current prefix */ const unsigned char* base; /* All index relative to this position */ const unsigned char* dictBase; /* alternate base for extDict */ @@ -245,25 +245,34 @@ LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStr * or 0 if compression fails. * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src` */ -LZ4LIB_API int LZ4_compress_HC_destSize(void* LZ4HC_Data, +int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int compressionLevel); -/*! LZ4_compress_HC_continue_destSize() : +/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental) * Similar as LZ4_compress_HC_continue(), * but will read a variable nb of bytes from `src` * to fit into `targetDstSize` budget. * Result is provided in 2 parts : * @return : the number of bytes written into 'dst' * or 0 if compression fails. - * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src` - * Important : due to limitations, this prototype only works well up to cLevel < LZ4HC_CLEVEL_OPT_MIN - * beyond that level, compression performance will be much reduced due to internal incompatibilities + * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`. + * Important : due to limitations, this prototype only works well up to cLevel < LZ4HC_CLEVEL_OPT_MIN + * beyond that level, compression performance will be much reduced due to internal incompatibilities */ -LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, +int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDstSize); +/*! LZ4_setCompressionLevel() : v1.8.0 (experimental) + * It's possible to change compression level after LZ4_resetStreamHC(), between 2 invocations of LZ4_compress_HC_continue*(), + * but that requires to stay in the same mode (aka 1-10 or 11-12). + * This function ensures this condition. + */ +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); + + + #endif /* LZ4_HC_SLO_098092834 */ #endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/tests/frametest.c b/tests/frametest.c index 1ee0d96..4723aac 100644 --- a/tests/frametest.c +++ b/tests/frametest.c @@ -508,6 +508,30 @@ int basicTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeWithDict); if (cSizeWithDict >= cSizeNoDict) goto _output_error; /* must be more efficient */ + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict, negative level : "); + { size_t cSizeLevelMax; + LZ4F_preferences_t cParams; + memset(&cParams, 0, sizeof(cParams)); + cParams.compressionLevel = -3; + CHECK_V(cSizeLevelMax, + LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity, + CNBuffer, dictSize, + cdict, &cParams) ); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax); + } + + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict, level max : "); + { size_t cSizeLevelMax; + LZ4F_preferences_t cParams; + memset(&cParams, 0, sizeof(cParams)); + cParams.compressionLevel = LZ4F_compressionLevel_max(); + CHECK_V(cSizeLevelMax, + LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity, + CNBuffer, dictSize, + cdict, &cParams) ); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax); + } + LZ4F_freeCDict(cdict); } -- cgit v0.12