diff options
Diffstat (limited to 'lib/lz4frame.h')
-rw-r--r-- | lib/lz4frame.h | 143 |
1 files changed, 104 insertions, 39 deletions
diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 75f1fd9..ca20dc9 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -32,11 +32,14 @@ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ -/* LZ4F is a stand-alone API to create LZ4-compressed frames - * conformant with specification v1.6.1. - * It also offers streaming capabilities. +/* LZ4F is a stand-alone API able to create and decode LZ4 frames + * conformant with specification v1.6.1 in doc/lz4_Frame_format.md . + * Generated frames are compatible with `lz4` CLI. + * + * LZ4F also offers streaming capabilities. + * * lz4.h is not required when using lz4frame.h, - * except to get constant such as LZ4_VERSION_NUMBER. + * except to extract common constant such as LZ4_VERSION_NUMBER. * */ #ifndef LZ4F_H_09782039843 @@ -195,7 +198,7 @@ typedef struct { * Simple compression function ***********************************/ -LZ4FLIB_API int LZ4F_compressionLevel_max(void); +LZ4FLIB_API int LZ4F_compressionLevel_max(void); /* v1.8.0+ */ /*! LZ4F_compressFrameBound() : * Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences. @@ -247,7 +250,9 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx); /*---- Compression ----*/ -#define LZ4F_HEADER_SIZE_MAX 19 /* LZ4 Frame header size can vary from 7 to 19 bytes */ +#define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected paramaters */ +#define LZ4F_HEADER_SIZE_MAX 19 + /*! LZ4F_compressBegin() : * will write the frame header into dstBuffer. * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. @@ -260,15 +265,19 @@ LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx, const LZ4F_preferences_t* prefsPtr); /*! LZ4F_compressBound() : - * Provides minimum dstCapacity required to guarantee compression success - * given a srcSize and preferences, covering worst case scenario. + * Provides minimum dstCapacity required to guarantee success of + * LZ4F_compressUpdate(), given a srcSize and preferences, for a worst case scenario. + * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() instead. + * Note that the result is only valid for a single invocation of LZ4F_compressUpdate(). + * When invoking LZ4F_compressUpdate() multiple times, + * if the output buffer is gradually filled up instead of emptied and re-used from its start, + * one must check if there is enough remaining capacity before each invocation, using LZ4F_compressBound(). + * @return is always the same for a srcSize and prefsPtr. * prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario. - * Estimation is valid for either LZ4F_compressUpdate(), LZ4F_flush() or LZ4F_compressEnd(), - * Estimation includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes. - * It also includes frame footer (ending + checksum), which would have to be generated by LZ4F_compressEnd(). - * Estimation doesn't include frame header, as it was already generated by LZ4F_compressBegin(). - * Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers. - * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations. + * tech details : + * @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes. + * It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd(). + * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin(). */ LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr); @@ -295,6 +304,7 @@ LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx, * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default. * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx) * or an error code if it fails (which can be tested using LZ4F_isError()) + * Note : LZ4F_flush() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr). */ LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, @@ -307,6 +317,7 @@ LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx, * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default. * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark), * or an error code if it fails (which can be tested using LZ4F_isError()) + * Note : LZ4F_compressEnd() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr). * A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task. */ LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx, @@ -345,23 +356,58 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx); * Streaming decompression functions *************************************/ +#define LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH 5 + +/*! LZ4F_headerSize() : v1.9.0+ + * Provide the header size of a frame starting at `src`. + * `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH, + * which is enough to decode the header length. + * @return : size of frame header + * or an error code, which can be tested using LZ4F_isError() + * note : Frame header size is variable, but is guaranteed to be + * >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes. + */ +size_t LZ4F_headerSize(const void* src, size_t srcSize); + /*! LZ4F_getFrameInfo() : * This function extracts frame parameters (max blockSize, dictID, etc.). - * Its usage is optional. - * Extracted information is typically useful for allocation and dictionary. - * This function works in 2 situations : - * - At the beginning of a new frame, in which case - * it will decode information from `srcBuffer`, starting the decoding process. - * Input size must be large enough to successfully decode the entire frame header. - * Frame header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes. - * It's allowed to provide more input data than this minimum. - * - After decoding has been started. - * In which case, no input is read, frame parameters are extracted from dctx. - * - If decoding has barely started, but not yet extracted information from header, + * Its usage is optional: user can call LZ4F_decompress() directly. + * + * Extracted information will fill an existing LZ4F_frameInfo_t structure. + * This can be useful for allocation and dictionary identification purposes. + * + * LZ4F_getFrameInfo() can work in the following situations : + * + * 1) At the beginning of a new frame, before any invocation of LZ4F_decompress(). + * It will decode header from `srcBuffer`, + * consuming the header and starting the decoding process. + * + * Input size must be large enough to contain the full frame header. + * Frame header size can be known beforehand by LZ4F_headerSize(). + * Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes, + * and not more than <= LZ4F_HEADER_SIZE_MAX bytes. + * Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work. + * It's allowed to provide more input data than the header size, + * LZ4F_getFrameInfo() will only consume the header. + * + * If input size is not large enough, + * aka if it's smaller than header size, + * function will fail and return an error code. + * + * 2) After decoding has been started, + * it's possible to invoke LZ4F_getFrameInfo() anytime + * to extract already decoded frame parameters stored within dctx. + * + * Note that, if decoding has barely started, + * and not yet read enough information to decode the header, * LZ4F_getFrameInfo() will fail. - * The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value). - * Decompression must resume from (srcBuffer + *srcSizePtr). - * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call, + * + * The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value). + * LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started, + * and when decoding the header has been successful. + * Decompression must then resume from (srcBuffer + *srcSizePtr). + * + * @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call, * or an error code which can be tested using LZ4F_isError(). * note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely. * note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure. @@ -427,15 +473,15 @@ LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx); /* always su extern "C" { #endif -/* These declarations are not stable and may change in the future. They are - * therefore only safe to depend on when the caller is statically linked - * against the library. To access their declarations, define - * LZ4F_STATIC_LINKING_ONLY. +/* These declarations are not stable and may change in the future. + * They are therefore only safe to depend on + * when the caller is statically linked against the library. + * To access their declarations, define LZ4F_STATIC_LINKING_ONLY. * - * There is a further protection mechanism where these symbols aren't published - * into shared/dynamic libraries. You can override this behavior and force - * them to be published by defining LZ4F_PUBLISH_STATIC_FUNCTIONS. Use at - * your own risk. + * By default, these symbols aren't published into shared/dynamic libraries. + * You can override this behavior and force them to be published + * by defining LZ4F_PUBLISH_STATIC_FUNCTIONS. + * Use at your own risk. */ #ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS #define LZ4FLIB_STATIC_API LZ4FLIB_API @@ -471,19 +517,38 @@ extern "C" { #define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM, /* enum list is exposed, to handle specific errors */ -typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes; +typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) + _LZ4F_dummy_error_enum_for_c89_never_used } LZ4F_errorCodes; LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult); - +LZ4FLIB_STATIC_API size_t LZ4F_getBlockSize(unsigned); /********************************** * Bulk processing dictionary API *********************************/ + +/* A Dictionary is useful for the compression of small messages (KB range). + * It dramatically improves compression efficiency. + * + * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful. + * Best results are generally achieved by using Zstandard's Dictionary Builder + * to generate a high-quality dictionary from a set of samples. + * + * Loading a dictionary has a cost, since it involves construction of tables. + * The Bulk processing dictionary API makes it possible to share this cost + * over an arbitrary number of compression jobs, even concurrently, + * markedly improving compression latency for these cases. + * + * The same dictionary will have to be used on the decompression side + * for decoding to be successful. + * To help identify the correct dictionary at decoding stage, + * the frame header allows optional embedding of a dictID field. + */ typedef struct LZ4F_CDict_s LZ4F_CDict; /*! LZ4_createCDict() : - * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. + * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once. * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */ |