From 31f2cdf4d214edcd9d4496058b0f9a4c4f8c0fad Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 9 Aug 2017 16:51:19 -0700 Subject: implemented dictionary compression in lz4frame note : only compression API is implemented and tested still to do : decompression API --- doc/lz4_manual.html | 2 +- doc/lz4frame_manual.html | 36 ++++---- lib/lz4.c | 7 +- lib/lz4.h | 2 +- lib/lz4frame.c | 227 +++++++++++++++++++++++++++++++++-------------- lib/lz4frame.h | 39 ++++---- lib/lz4frame_static.h | 44 +++++++++ lib/lz4hc.c | 12 ++- tests/frametest.c | 28 +++++- 9 files changed, 285 insertions(+), 112 deletions(-) diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html index fecd8cd..60b41a8 100644 --- a/doc/lz4_manual.html +++ b/doc/lz4_manual.html @@ -277,7 +277,7 @@ union LZ4_stream_u { init this structure before first use. note : only use in association with static linking ! this definition is not API/ABI safe, - and may change in a future version ! + it may change in a future version !


diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html index e6873c1..dc8251e 100644 --- a/doc/lz4frame_manual.html +++ b/doc/lz4frame_manual.html @@ -69,13 +69,13 @@ } LZ4F_frameType_t;
typedef struct {
-  LZ4F_blockSizeID_t     blockSizeID;           /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
-  LZ4F_blockMode_t       blockMode;             /* blockLinked, blockIndependent ; 0 == default */
-  LZ4F_contentChecksum_t contentChecksumFlag;   /* noContentChecksum, contentChecksumEnabled ; 0 == default  */
-  LZ4F_frameType_t       frameType;             /* LZ4F_frame, skippableFrame ; 0 == default */
-  unsigned long long     contentSize;           /* Size of uncompressed (original) content ; 0 == unknown */
-  unsigned               dictID;                /* Dictionary ID, sent by the compressor, to help the decoder select the right dictionary; 0 == no dictionary used */
-  unsigned               reserved[1];           /* must be zero for forward compatibility */
+  LZ4F_blockSizeID_t     blockSizeID;          /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
+  LZ4F_blockMode_t       blockMode;            /* blockLinked, blockIndependent ; 0 == default */
+  LZ4F_contentChecksum_t contentChecksumFlag;  /* noContentChecksum, contentChecksumEnabled ; 0 == default  */
+  LZ4F_frameType_t       frameType;            /* LZ4F_frame, skippableFrame ; 0 == default */
+  unsigned long long     contentSize;          /* Size of uncompressed content ; 0 == unknown */
+  unsigned               dictID;               /* Dictionary ID, sent by the compressor, to help the decoder select the right dictionary; 0 == no dictionary used */
+  unsigned               reserved[1];          /* must be zero for forward compatibility */
 } LZ4F_frameInfo_t;
 

makes it possible to supply detailed frame parameters to the stream interface. It's not required to set all fields, as long as the structure was initially memset() to zero. @@ -85,7 +85,7 @@

typedef struct {
   LZ4F_frameInfo_t frameInfo;
   int      compressionLevel;       /* 0 == default (fast mode); values above LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values below 0 trigger "fast acceleration", proportional to value */
-  unsigned autoFlush;              /* 1 == always flush (reduce usage of tmp buffer) */
+  unsigned autoFlush;              /* 1 == always flush, to reduce usage of internal buffers */
   unsigned reserved[4];            /* must be zero for forward compatibility */
 } LZ4F_preferences_t;
 

makes it possible to supply detailed compression parameters to the stream interface. @@ -101,12 +101,12 @@


-
size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
-

Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.1 - An important rule is that dstBuffer MUST be large enough (dstCapacity) to store the result in worst case situation. - This value is supplied by LZ4F_compressFrameBound(). - If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode). - The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default. +

size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
+                                const void* srcBuffer, size_t srcSize,
+                                const LZ4F_preferences_t* preferencesPtr);
+

Compress an entire srcBuffer into a valid LZ4 frame. + dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). + The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default. @return : number of bytes written into dstBuffer. or an error code if it fails (can be tested using LZ4F_isError()) @@ -134,9 +134,11 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);

Compression


 
-
size_t LZ4F_compressBegin(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* prefsPtr);
-

will write the frame header into dstBuffer. - dstCapacity must be large enough to store the header. Maximum header size is LZ4F_HEADER_SIZE_MAX bytes. +

size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
+                                      void* dstBuffer, size_t dstCapacity,
+                                      const LZ4F_preferences_t* prefsPtr);
+

will write the frame header into dstBuffer. + dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default. @return : number of bytes written into dstBuffer for the header or an error code (which can be tested using LZ4F_isError()) diff --git a/lib/lz4.c b/lib/lz4.c index 87ec6ab..69ee3eb 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -938,6 +938,7 @@ void LZ4_resetStream (LZ4_stream_t* LZ4_stream) int LZ4_freeStream (LZ4_stream_t* LZ4_stream) { + if (!LZ4_stream) return 0; /* support free on NULL */ FREEMEM(LZ4_stream); return (0); } @@ -1277,11 +1278,6 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize) /*===== streaming decompression functions =====*/ -/* - * If you prefer dynamic allocation methods, - * LZ4_createStreamDecode() - * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. - */ LZ4_streamDecode_t* LZ4_createStreamDecode(void) { LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t)); @@ -1290,6 +1286,7 @@ LZ4_streamDecode_t* LZ4_createStreamDecode(void) int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) { + if (!LZ4_stream) return 0; /* support free on NULL */ FREEMEM(LZ4_stream); return 0; } diff --git a/lib/lz4.h b/lib/lz4.h index 5b6bc92..c5b6103 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -380,7 +380,7 @@ typedef struct { * init this structure before first use. * note : only use in association with static linking ! * this definition is not API/ABI safe, - * and may change in a future version ! + * it may change in a future version ! */ #define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) #define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long)) diff --git a/lib/lz4frame.c b/lib/lz4frame.c index c2d6d5c..f9af991 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -170,6 +170,7 @@ typedef struct LZ4F_cctx_s LZ4F_preferences_t prefs; U32 version; U32 cStage; + int dictionary; size_t maxBlockSize; size_t maxBufferSize; BYTE* tmpBuff; @@ -178,7 +179,7 @@ typedef struct LZ4F_cctx_s U64 totalInSize; XXH32_state_t xxh; void* lz4CtxPtr; - U32 lz4CtxLevel; /* 0: unallocated; 1: LZ4_stream_t; 3: LZ4_streamHC_t */ + U32 lz4CtxLevel; /* 0: unallocated; 1: LZ4_stream_t; 3: LZ4_streamHC_t */ } LZ4F_cctx_t; @@ -256,7 +257,7 @@ static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSI return requestedBSID; } -/* LZ4F_compressBound() : +/* LZ4F_compressBound_internal() : * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations. * prefsPtr is optional : if NULL is provided, preferences will be set to cover worst case scenario. * Result is always the same for a srcSize and prefsPtr, so it can be relied upon to size reusable buffers. @@ -301,16 +302,19 @@ size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* prefere } -/*! LZ4F_compressFrame() : - * Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.0, in a single step. - * The most important rule is that dstBuffer MUST be large enough (dstCapacity) to ensure compression completion even in worst case. - * If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode) - * Get the minimum value of dstCapacity by using LZ4F_compressFrameBound(). - * The LZ4F_preferences_t structure is optional : if NULL is provided as argument, preferences will be set to default. - * The result of the function is the number of bytes written into dstBuffer. - * The function outputs an error code if it fails (can be tested using LZ4F_isError()) +/*! LZ4F_compressFrame_usingCDict() : + * Compress srcBuffer using a dictionary, in a single step. + * cdict can be NULL, in which case, no dictionary is used. + * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). + * The LZ4F_preferences_t structure is optional : you may provide NULL as argument, + * however, it's the only way to provide a dictID, so it's not recommended. + * @return : number of bytes written into dstBuffer. + * or an error code if it fails (can be tested using LZ4F_isError()) */ -size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr) +size_t LZ4F_compressFrame_usingCDict(void* dstBuffer, size_t dstCapacity, + const void* srcBuffer, size_t srcSize, + const LZ4F_CDict* cdict, + const LZ4F_preferences_t* preferencesPtr) { LZ4F_cctx_t cctxI; LZ4_stream_t lz4ctx; @@ -321,10 +325,12 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBu BYTE* const dstEnd = dstStart + dstCapacity; memset(&cctxI, 0, sizeof(cctxI)); - memset(&options, 0, sizeof(options)); - cctxI.version = LZ4F_VERSION; - cctxI.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works because autoflush==1 & stableSrc==1 */ + cctxI.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */ + if (prefs.compressionLevel < LZ4HC_CLEVEL_MIN) { + cctxI.lz4CtxPtr = &lz4ctx; + cctxI.lz4CtxLevel = 1; + } /* fast compression context pre-created on stack */ if (preferencesPtr!=NULL) prefs = *preferencesPtr; @@ -333,22 +339,18 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBu if (prefs.frameInfo.contentSize != 0) prefs.frameInfo.contentSize = (U64)srcSize; /* auto-correct content size if selected (!=0) */ - if (prefs.compressionLevel < LZ4HC_CLEVEL_MIN) { - cctxI.lz4CtxPtr = &lz4ctx; - cctxI.lz4CtxLevel = 1; - } /* otherwise : will be created within LZ4F_compressBegin */ - prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize); prefs.autoFlush = 1; if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID)) - prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* no need for linked blocks */ + prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* only one block => no need for inter-block link */ + memset(&options, 0, sizeof(options)); options.stableSrc = 1; - if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs)) /* condition to guarantee success */ + if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs)) /* condition to guarantee success */ return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); - { size_t const headerSize = LZ4F_compressBegin(&cctxI, dstBuffer, dstCapacity, &prefs); /* write header */ + { size_t const headerSize = LZ4F_compressBegin_usingCDict(&cctxI, dstBuffer, dstCapacity, cdict, &prefs); /* write header */ if (LZ4F_isError(headerSize)) return headerSize; dstPtr += headerSize; /* header size */ } @@ -360,24 +362,91 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBu if (LZ4F_isError(tailSize)) return tailSize; dstPtr += tailSize; } - if (prefs.compressionLevel >= LZ4HC_CLEVEL_MIN) /* Ctx allocation only for lz4hc */ + if (prefs.compressionLevel >= LZ4HC_CLEVEL_MIN) /* Ctx allocation only for lz4hc */ FREEMEM(cctxI.lz4CtxPtr); return (dstPtr - dstStart); } +/*! LZ4F_compressFrame() : + * Compress an entire srcBuffer into a valid LZ4 frame, in a single step. + * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). + * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default. + * @return : number of bytes written into dstBuffer. + * or an error code if it fails (can be tested using LZ4F_isError()) + */ +size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, + const void* srcBuffer, size_t srcSize, + const LZ4F_preferences_t* preferencesPtr) +{ + return LZ4F_compressFrame_usingCDict(dstBuffer, dstCapacity, + srcBuffer, srcSize, + NULL, preferencesPtr); +} + + +/*-*************************************************** +* Dictionary compression +*****************************************************/ + +struct LZ4F_CDict_s { + void* dictContent; + LZ4_stream_t* fastCtx; + LZ4_streamHC_t* HCCtx; +}; /* typedef'd to LZ4F_CDict within lz4frame_static.h */ + +/*! LZ4_createCDict() : + * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. + * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. + * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict + * @return : digested dictionary for compression, or NULL if failed */ +LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize) +{ + const char* dictStart = (const char*)dictBuffer; + LZ4F_CDict* cdict = (LZ4F_CDict*) malloc(sizeof(*cdict)); + if (!cdict) return NULL; + if (dictSize > 64 KB) { + dictStart += dictSize - 64 KB; + dictSize = 64 KB; + } + cdict->dictContent = ALLOCATOR(dictSize); + cdict->fastCtx = LZ4_createStream(); + cdict->HCCtx = LZ4_createStreamHC(); + if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) { + LZ4F_freeCDict(cdict); + return NULL; + } + memcpy(cdict->dictContent, dictStart, dictSize); + LZ4_resetStream(cdict->fastCtx); + LZ4_loadDict (cdict->fastCtx, cdict->dictContent, (int)dictSize); + LZ4_resetStreamHC(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT); + LZ4_loadDictHC(cdict->HCCtx, cdict->dictContent, (int)dictSize); + return cdict; +} + +void LZ4F_freeCDict(LZ4F_CDict* cdict) +{ + if (cdict==NULL) return; /* support free on NULL */ + FREEMEM(cdict->dictContent); + LZ4_freeStream(cdict->fastCtx); + LZ4_freeStreamHC(cdict->HCCtx); + FREEMEM(cdict); +} + + /*-********************************* * Advanced compression functions ***********************************/ /*! LZ4F_createCompressionContext() : - * The first thing to do is to create a compressionContext object, which will be used in all compression operations. - * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure. - * The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries. - * The function will provide a pointer to an allocated LZ4F_compressionContext_t object. - * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation. - * Object can release its memory using LZ4F_freeCompressionContext(); + * The first thing to do is to create a compressionContext object, which will be used in all compression operations. + * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure. + * The version provided MUST be LZ4F_VERSION. It is intended to track potential incompatible differences between different binaries. + * The function will provide a pointer to an allocated LZ4F_compressionContext_t object. + * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation. + * Object can release its memory using LZ4F_freeCompressionContext(); */ LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version) { @@ -385,7 +454,7 @@ LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_c if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed); cctxPtr->version = version; - cctxPtr->cStage = 0; /* Next stage : write header */ + cctxPtr->cStage = 0; /* Next stage : init stream */ *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr; @@ -397,8 +466,8 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_comp { LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext; - if (cctxPtr != NULL) { /* null pointers can be safely provided to this function, like free() */ - FREEMEM(cctxPtr->lz4CtxPtr); + if (cctxPtr != NULL) { /* support free on NULL */ + FREEMEM(cctxPtr->lz4CtxPtr); /* works because LZ4_streamHC_t and LZ4_stream_t are simple POD types */ FREEMEM(cctxPtr->tmpBuff); FREEMEM(LZ4F_compressionContext); } @@ -407,22 +476,23 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_comp } -/*! LZ4F_compressBegin() : - * will write the frame header into dstBuffer. - * dstBuffer must be large enough to accommodate a header (dstCapacity). Maximum header size is LZ4F_HEADER_SIZE_MAX bytes. - * @return : number of bytes written into dstBuffer for the header - * or an error code (can be tested using LZ4F_isError()) +/*! LZ4F_compressBegin_usingCDict() : + * init streaming compression and writes frame header into dstBuffer. + * dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes. + * @return : number of bytes written into dstBuffer for the header + * or an error code (can be tested using LZ4F_isError()) */ -size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* preferencesPtr) +size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr, + void* dstBuffer, size_t dstCapacity, + const LZ4F_CDict* cdict, + const LZ4F_preferences_t* preferencesPtr) { LZ4F_preferences_t prefNull; BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; BYTE* headerStart; - size_t requiredBuffSize; if (dstCapacity < maxFHSize) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); - if (cctxPtr->cStage != 0) return err0r(LZ4F_ERROR_GENERIC); memset(&prefNull, 0, sizeof(prefNull)); if (preferencesPtr == NULL) preferencesPtr = &prefNull; cctxPtr->prefs = *preferencesPtr; @@ -437,31 +507,41 @@ size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacit cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC(); if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed); cctxPtr->lz4CtxLevel = tableID; - } - } + } } /* Buffer Management */ - if (cctxPtr->prefs.frameInfo.blockSizeID == 0) cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT; + if (cctxPtr->prefs.frameInfo.blockSizeID == 0) + cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT; cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID); - requiredBuffSize = cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 128 KB); - if (preferencesPtr->autoFlush) - requiredBuffSize = (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB; /* just needs dict */ + { size_t const requiredBuffSize = preferencesPtr->autoFlush ? + (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB : /* only needs windows size */ + cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 128 KB); - if (cctxPtr->maxBufferSize < requiredBuffSize) { - cctxPtr->maxBufferSize = 0; - FREEMEM(cctxPtr->tmpBuff); - cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize); - if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed); - cctxPtr->maxBufferSize = requiredBuffSize; - } + if (cctxPtr->maxBufferSize < requiredBuffSize) { + cctxPtr->maxBufferSize = 0; + FREEMEM(cctxPtr->tmpBuff); + cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize); + if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed); + cctxPtr->maxBufferSize = requiredBuffSize; + } } cctxPtr->tmpIn = cctxPtr->tmpBuff; cctxPtr->tmpInSize = 0; XXH32_reset(&(cctxPtr->xxh), 0); - if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) - LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr)); - else - LZ4_resetStreamHC((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), cctxPtr->prefs.compressionLevel); + + /* context init */ + cctxPtr->dictionary = (cdict!=NULL); + if (cdict) { + if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) { + memcpy(cctxPtr->lz4CtxPtr, cdict->fastCtx, sizeof(*cdict->fastCtx)); + } else + memcpy(cctxPtr->lz4CtxPtr, cdict->HCCtx, sizeof(*cdict->HCCtx)); + } else { + if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) + LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr)); + else + LZ4_resetStreamHC((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), cctxPtr->prefs.compressionLevel); + } /* Magic Number */ LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER); @@ -487,16 +567,31 @@ size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacit LZ4F_writeLE32(dstPtr, cctxPtr->prefs.frameInfo.dictID); dstPtr += 4; } - /* CRC Byte */ + /* Header CRC Byte */ *dstPtr = LZ4F_headerChecksum(headerStart, dstPtr - headerStart); dstPtr++; cctxPtr->cStage = 1; /* header written, now request input data block */ - return (dstPtr - dstStart); } +/*! LZ4F_compressBegin() : + * init streaming compression and writes frame header into dstBuffer. + * dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes. + * preferencesPtr can be NULL, in which case default parameters are selected. + * @return : number of bytes written into dstBuffer for the header + * or an error code (can be tested using LZ4F_isError()) + */ +size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr, + void* dstBuffer, size_t dstCapacity, + const LZ4F_preferences_t* preferencesPtr) +{ + return LZ4F_compressBegin_usingCDict(cctxPtr, dstBuffer, dstCapacity, + NULL, preferencesPtr); +} + + /* LZ4F_compressBound() : * @ return size of Dst buffer given a srcSize to handle worst case situations. * The LZ4F_frameInfo_t structure is optional : if NULL, preferences will be set to cover worst case situations. @@ -537,19 +632,19 @@ static int LZ4F_localLZ4_compress_limitedOutput_continue(void* ctx, const char* return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); } -static int LZ4F_localLZ4_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level) +static int LZ4F_localLZ4_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level) { (void) level; - return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstSize); + return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity); } -static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level) +static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int dictionary, int level) { if (level < LZ4HC_CLEVEL_MIN) { - if (blockMode == LZ4F_blockIndependent) return LZ4F_localLZ4_compress_limitedOutput_withState; + if (blockMode == LZ4F_blockIndependent && !dictionary) return LZ4F_localLZ4_compress_limitedOutput_withState; return LZ4F_localLZ4_compress_limitedOutput_continue; } - if (blockMode == LZ4F_blockIndependent) return LZ4_compress_HC_extStateHC; + if (blockMode == LZ4F_blockIndependent && !dictionary) return LZ4_compress_HC_extStateHC; return LZ4F_localLZ4_compressHC_limitedOutput_continue; } @@ -580,7 +675,7 @@ size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapaci BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; LZ4F_lastBlockStatus lastBlockCompressed = notDone; - compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel); + compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->dictionary, cctxPtr->prefs.compressionLevel); if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC); @@ -679,7 +774,7 @@ size_t LZ4F_flush(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const (void)compressOptionsPtr; /* not yet useful */ /* select compression function */ - compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel); + compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->dictionary, cctxPtr->prefs.compressionLevel); /* compress tmp buffer */ dstPtr += LZ4F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel); diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 0efe220..42e2f53 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -157,13 +157,13 @@ typedef LZ4F_contentChecksum_t contentChecksum_t; * It's not required to set all fields, as long as the structure was initially memset() to zero. * All reserved fields must be set to zero. */ typedef struct { - LZ4F_blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB ; 0 == default */ - LZ4F_blockMode_t blockMode; /* blockLinked, blockIndependent ; 0 == default */ - LZ4F_contentChecksum_t contentChecksumFlag; /* noContentChecksum, contentChecksumEnabled ; 0 == default */ - LZ4F_frameType_t frameType; /* LZ4F_frame, skippableFrame ; 0 == default */ - unsigned long long contentSize; /* Size of uncompressed (original) content ; 0 == unknown */ - unsigned dictID; /* Dictionary ID, sent by the compressor, to help the decoder select the right dictionary; 0 == no dictionary used */ - unsigned reserved[1]; /* must be zero for forward compatibility */ + LZ4F_blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB ; 0 == default */ + LZ4F_blockMode_t blockMode; /* blockLinked, blockIndependent ; 0 == default */ + LZ4F_contentChecksum_t contentChecksumFlag; /* noContentChecksum, contentChecksumEnabled ; 0 == default */ + LZ4F_frameType_t frameType; /* LZ4F_frame, skippableFrame ; 0 == default */ + unsigned long long contentSize; /* Size of uncompressed content ; 0 == unknown */ + unsigned dictID; /* Dictionary ID, sent by the compressor, to help the decoder select the right dictionary; 0 == no dictionary used */ + unsigned reserved[1]; /* must be zero for forward compatibility */ } LZ4F_frameInfo_t; /*! LZ4F_preferences_t : @@ -173,7 +173,7 @@ typedef struct { typedef struct { LZ4F_frameInfo_t frameInfo; int compressionLevel; /* 0 == default (fast mode); values above LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values below 0 trigger "fast acceleration", proportional to value */ - unsigned autoFlush; /* 1 == always flush (reduce usage of tmp buffer) */ + unsigned autoFlush; /* 1 == always flush, to reduce usage of internal buffers */ unsigned reserved[4]; /* must be zero for forward compatibility */ } LZ4F_preferences_t; @@ -187,17 +187,16 @@ typedef struct { */ LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr); -/*!LZ4F_compressFrame() : - * Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.1 - * An important rule is that dstBuffer MUST be large enough (dstCapacity) to store the result in worst case situation. - * This value is supplied by LZ4F_compressFrameBound(). - * If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode). - * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default. +/*! LZ4F_compressFrame() : + * Compress an entire srcBuffer into a valid LZ4 frame. + * dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). + * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default. * @return : number of bytes written into dstBuffer. * or an error code if it fails (can be tested using LZ4F_isError()) */ -LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr); - +LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, + const void* srcBuffer, size_t srcSize, + const LZ4F_preferences_t* preferencesPtr); /*-*********************************** @@ -231,13 +230,15 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx); #define LZ4F_HEADER_SIZE_MAX 19 /*! LZ4F_compressBegin() : - * will write the frame header into dstBuffer. - * dstCapacity must be large enough to store the header. Maximum header size is LZ4F_HEADER_SIZE_MAX bytes. + * will write the frame header into dstBuffer. + * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. * `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default. * @return : number of bytes written into dstBuffer for the header * or an error code (which can be tested using LZ4F_isError()) */ -LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* prefsPtr); +LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx, + void* dstBuffer, size_t dstCapacity, + const LZ4F_preferences_t* prefsPtr); /*! LZ4F_compressBound() : * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations. diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h index d3bae82..5f22aed 100644 --- a/lib/lz4frame_static.h +++ b/lib/lz4frame_static.h @@ -82,6 +82,50 @@ typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes; LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult); + +/********************************** + * Bulk processing dictionary API + *********************************/ +typedef struct LZ4F_CDict_s LZ4F_CDict; + +/*! LZ4_createCDict() : + * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. + * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. + * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */ +LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize); +void LZ4F_freeCDict(LZ4F_CDict* CDict); + +/*! LZ4_compressFrame_usingCDict() : + * Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary. + * If cdict==NULL, compress without a dictionary. + * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). + * If this condition is not respected, function will fail (@return an errorCode). + * The LZ4F_preferences_t structure is optional : you may provide NULL as argument, + * but it's not recommended, as it's the only way to provide dictID in the frame header. + * @return : number of bytes written into dstBuffer. + * or an error code if it fails (can be tested using LZ4F_isError()) + */ +size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const LZ4F_CDict* cdict, + const LZ4F_preferences_t* preferencesPtr); + + +/*! LZ4F_compressBegin_usingCDict() : + * Inits streaming dictionary compression, and writes the frame header into dstBuffer. + * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. + * `prefsPtr` is optional : you may provide NULL as argument, + * however, it's the only way to provide dictID in the frame header. + * @return : number of bytes written into dstBuffer for the header, + * or an error code (which can be tested using LZ4F_isError()) + */ +size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx, + void* dstBuffer, size_t dstCapacity, + const LZ4F_CDict* cdict, + const LZ4F_preferences_t* prefsPtr); + + #if defined (__cplusplus) } #endif diff --git a/lib/lz4hc.c b/lib/lz4hc.c index ca9c2e6..b17760d 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -610,7 +610,11 @@ int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, i **************************************/ /* allocation */ LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); } -int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; } +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { + if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ + free(LZ4_streamHCPtr); + return 0; +} /* initialization */ @@ -767,7 +771,11 @@ void* LZ4_createHC (char* inputBuffer) return hc4; } -int LZ4_freeHC (void* LZ4HC_Data) { FREEMEM(LZ4HC_Data); return 0; } +int LZ4_freeHC (void* LZ4HC_Data) { + if (!LZ4HC_Data) return 0; /* support free on NULL */ + FREEMEM(LZ4HC_Data); + return 0; +} int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) { diff --git a/tests/frametest.c b/tests/frametest.c index 02d7e5b..1ee0d96 100644 --- a/tests/frametest.c +++ b/tests/frametest.c @@ -482,10 +482,36 @@ int basicTests(U32 seed, double compressibility) if (prefs.frameInfo.dictID != dictID) goto _output_error; DISPLAYLEVEL(3, "%u \n", (U32)prefs.frameInfo.dictID); - CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL; CHECK( LZ4F_freeDecompressionContext(dCtx) ); dCtx = NULL; + CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL; + } + + + /* Dictionary compression test */ + { size_t const dictSize = 63 KB; + size_t const dstCapacity = LZ4F_compressFrameBound(dictSize, NULL); + size_t cSizeNoDict, cSizeWithDict; + LZ4F_CDict* const cdict = LZ4F_createCDict(CNBuffer, dictSize); + if (cdict == NULL) goto _output_error; + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with NULL dict : "); + CHECK_V(cSizeNoDict, + LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity, + CNBuffer, dictSize, + NULL, NULL) ); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeNoDict); + + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict : "); + CHECK_V(cSizeWithDict, + LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity, + CNBuffer, dictSize, + cdict, NULL) ); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeWithDict); + if (cSizeWithDict >= cSizeNoDict) goto _output_error; /* must be more efficient */ + + LZ4F_freeCDict(cdict); } + DISPLAYLEVEL(3, "Skippable frame test : \n"); { size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH; unsigned maxBits = FUZ_highbit((U32)decodedBufferSize); -- cgit v0.12