From b17f578a919b7e6b078cede2d52be29dd48c8e8c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 29 May 2019 12:06:13 -0700 Subject: added comments and macros for in-place (de)compression --- doc/lz4_manual.html | 38 ++++++++++++++++++++++++++++++++++++-- lib/lz4.c | 4 ---- lib/lz4.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- tests/fuzzer.c | 3 +-- 4 files changed, 83 insertions(+), 10 deletions(-) diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html index 3a9e0db..0530966 100644 --- a/doc/lz4_manual.html +++ b/doc/lz4_manual.html @@ -40,9 +40,9 @@ Blocks are different from Frames (doc/lz4_Frame_format.md). Frames bundle both blocks and metadata in a specified manner. - This are required for compressed data to be self-contained and portable. + Embedding metadata is required for compressed data to be self-contained and portable. Frame format is delivered through a companion API, declared in lz4frame.h. - Note that the `lz4` CLI can only manage frames. + The `lz4` CLI can only manage frames.

Version


@@ -357,6 +357,40 @@ int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
  
 


+

+ It's possible to have input and output sharing the same buffer, + for highly contrained memory environments. + In both cases, it requires input to lay at the end of the buffer, + and buffer must have some margin, hence be larger than final size. + + This technique is more useful for decompression, + since decompressed size is typically larger, + and margin is mostly required to avoid stripe overflow, so it's short. + + For compression though, margin must be able to cope with both + history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + and data expansion, which can happen when input is not compressible. + As a consequence, buffer size requirements are much higher than average compressed size, + hence memory savings are limited. + + There are ways to limit this cost for compression : + - Reduce history size, by modifying LZ4_DISTANCE_MAX. + Lower values will also reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + so it's a reasonable trick when inputs are known to be small. + - Require the compressor to deliver a "maximum compressed size". + When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + in which case, the return code will be 0 (zero). + The caller must be ready for these cases to happen, + and typically design a backup scheme to send data uncompressed. + The combination of both techniques can significantly reduce + the amount of margin required for in-place compression. + +


+ +
#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ( (decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN)  /**< note: presumes that compressedSize < decompressedSize */
+

+
#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ( (maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+

PRIVATE DEFINITIONS

  Do not use these definitions directly.
  They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
diff --git a/lib/lz4.c b/lib/lz4.c
index 070dd7e..19070c2 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -412,10 +412,6 @@ static const int LZ4_minLength = (MFLIMIT+1);
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
-#ifndef LZ4_DISTANCE_MAX   /* can be user - defined at compile time */
-#  define LZ4_DISTANCE_MAX 65535
-#endif
-
 #if (LZ4_DISTANCE_MAX > 65535)   /* max supported by LZ4 format */
 #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
 #endif
diff --git a/lib/lz4.h b/lib/lz4.h
index 6064967..ae7f52c 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -65,9 +65,9 @@ extern "C" {
 
   Blocks are different from Frames (doc/lz4_Frame_format.md).
   Frames bundle both blocks and metadata in a specified manner.
-  This are required for compressed data to be self-contained and portable.
+  Embedding metadata is required for compressed data to be self-contained and portable.
   Frame format is delivered through a companion API, declared in lz4frame.h.
-  Note that the `lz4` CLI can only manage frames.
+  The `lz4` CLI can only manage frames.
 */
 
 /*^***************************************************************
@@ -462,8 +462,51 @@ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const c
  */
 LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
 
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and buffer must have some margin, hence be larger than final size.
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is mostly required to avoid stripe overflow, so it's short.
+ *
+ * For compression though, margin must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher than average compressed size,
+ * hence memory savings are limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Lower values will also reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN 32
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ( (decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN)  /**< note: presumes that compressedSize < decompressedSize */
+
+#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
 #endif
 
+#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32)
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ( (maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+
+#endif   /* LZ4_STATIC_LINKING_ONLY */
+
+
 
 /*-************************************************************
  *  PRIVATE DEFINITIONS
@@ -567,6 +610,7 @@ union LZ4_streamDecode_u {
 } ;   /* previously typedef'd to LZ4_streamDecode_t */
 
 
+
 /*-************************************
 *  Obsolete Functions
 **************************************/
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 26e25eb..e04caa5 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -1017,8 +1017,7 @@ static void FUZ_unitTests(int compressionLevel)
     DISPLAYLEVEL(3, "in-place compression using LZ4_compress_default() :");
     {   size_t const sampleSize = 65 KB;
         size_t const maxCSize = LZ4_COMPRESSBOUND(sampleSize);
-        size_t const margin = 64 KB;
-        size_t const outSize = maxCSize + margin;
+        size_t const outSize = LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCSize);
         size_t const startIndex = outSize - sampleSize;
         char*  const startInput = testCompressed + startIndex;
         XXH32_hash_t const crcOrig = XXH32(testInput, sampleSize, 0);
-- 
cgit v0.12