From 757497ae3db93a78d146ff573ae267f54e49c9b6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 Aug 2017 16:53:57 -0700 Subject: implemented lz4frame decompression API --- doc/lz4frame_manual.html | 10 ++--- lib/lz4frame.c | 106 +++++++++++++++++++++++------------------------ lib/lz4frame.h | 10 ++--- lib/lz4frame_static.h | 20 ++++++--- tests/frametest.c | 67 ++++++++++++++++++++++++++++-- 5 files changed, 141 insertions(+), 72 deletions(-) diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html index 9593181..2a625a6 100644 --- a/doc/lz4frame_manual.html +++ b/doc/lz4frame_manual.html @@ -176,13 +176,13 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);


size_t LZ4F_compressEnd(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
-

To properly finish an LZ4 frame, invoke LZ4F_compressEnd(). - It will flush whatever data remained within `cctx` (like LZ4_flush()) - and properly finalize the frame, with an endMark and a checksum. +

To properly finish an LZ4 frame, invoke LZ4F_compressEnd(). + It will flush whatever data remained within `cctx` (like LZ4_flush()) + and properly finalize the frame, with an endMark and a checksum. `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default. @return : number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled) or an error code if it fails (which can be tested using LZ4F_isError()) - A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task. + A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.


@@ -190,7 +190,7 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
typedef struct {
   unsigned stableDst;    /* pledge that at least 64KB+64Bytes of previously decompressed data remain unmodifed where it was decoded. This optimization skips storage operations in tmp buffers */
-  unsigned reserved[3];
+  unsigned reserved[3];  /* must be set to zero for forward compatibility */
 } LZ4F_decompressOptions_t;
 

LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index fbccc9d..c27fc51 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -926,6 +926,8 @@ typedef enum {
 void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx)
 {
     dctx->dStage = dstage_getHeader;
+    dctx->dict = NULL;
+    dctx->dictSize = 0;
 }
 
 
@@ -1101,19 +1103,6 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctxPtr, LZ4F_frameInfo_t* frameIn
 }
 
 
-/* trivial redirector, for common prototype */
-static int LZ4F_decompress_safe (const char* src,
-                                 char* dst,
-                                 int compressedSize,
-                                 int dstCapacity,
-                                 const char* dictStart,
-                                 int dictSize)
-{
-    (void)dictStart; (void)dictSize;
-    return LZ4_decompress_safe (src, dst, compressedSize, dstCapacity);
-}
-
-
 static void LZ4F_updateDict(LZ4F_dctx* dctxPtr, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
 {
     if (dctxPtr->dictSize==0)
@@ -1174,21 +1163,22 @@ static void LZ4F_updateDict(LZ4F_dctx* dctxPtr, const BYTE* dstPtr, size_t dstSi
 
 
 /*! LZ4F_decompress() :
- * Call this function repetitively to regenerate data compressed within srcBuffer.
- * The function will attempt to decode up to *srcSizePtr bytes from srcBuffer, into dstBuffer of capacity *dstSizePtr.
+ *  Call this function repetitively to regenerate compressed data in srcBuffer.
+ *  The function will attempt to decode up to *srcSizePtr bytes from srcBuffer
+ *  into dstBuffer of capacity *dstSizePtr.
  *
- * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *  The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
  *
- * The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
- * If the number of bytes read is < number of bytes provided, then the decompression operation is not complete.
- * Remaining data will have to be presented again in a subsequent invocation.
+ *  The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ *  If number of bytes read is < number of bytes provided, then decompression operation is not complete.
+ *  Remaining data will have to be presented again in a subsequent invocation.
  *
- * The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
- * Basically, it's the size of the current (or remaining) compressed block + header of next block.
- * Respecting the hint provides some boost to performance, since it allows less buffer shuffling.
- * Note that this is just a hint, it's always possible to any srcSize value.
- * When a frame is fully decoded, @return will be 0.
- * If decompression failed, @return is an error code which can be tested using LZ4F_isError().
+ *  The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+ *  Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ *  Respecting the hint provides a small boost to performance, since it allows less buffer shuffling.
+ *  Note that this is just a hint, and it's always possible to any srcSize value.
+ *  When a frame is fully decoded, @return will be 0.
+ *  If decompression failed, @return is an error code which can be tested using LZ4F_isError().
  */
 size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
                        void* dstBuffer, size_t* dstSizePtr,
@@ -1265,8 +1255,6 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
             }   }
             dctxPtr->tmpInSize = 0;
             dctxPtr->tmpInTarget = 0;
-            dctxPtr->dict = dctxPtr->tmpOutBuffer;
-            dctxPtr->dictSize = 0;
             dctxPtr->tmpOut = dctxPtr->tmpOutBuffer;
             dctxPtr->tmpOutStart = 0;
             dctxPtr->tmpOutSize = 0;
@@ -1381,15 +1369,8 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
             break;
 
         case dstage_decodeCBlock_intoDst:
-            {   int (*decoder)(const char*, char*, int, int, const char*, int);
-                int decodedSize;
-
-                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
-                    decoder = LZ4_decompress_safe_usingDict;
-                else
-                    decoder = LZ4F_decompress_safe;
-
-                decodedSize = decoder((const char*)selectedIn, (char*)dstPtr,
+            {   int const decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dstPtr,
                         (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize,
                         (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
                 if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC);   /* decompression failed */
@@ -1409,13 +1390,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
 
         case dstage_decodeCBlock_intoTmp:
             /* not enough place into dst : decode into tmpOut */
-            {   int (*decoder)(const char*, char*, int, int, const char*, int);
-                int decodedSize;
-
-                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
-                    decoder = LZ4_decompress_safe_usingDict;
-                else
-                    decoder = LZ4F_decompress_safe;
+            {   int decodedSize;
 
                 /* ensure enough place for tmpOut */
                 if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked) {
@@ -1433,7 +1408,8 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
                 }
 
                 /* Decode */
-                decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut,
+                decodedSize = LZ4_decompress_safe_usingDict(
+                        (const char*)selectedIn, (char*)dctxPtr->tmpOut,
                         (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize,
                         (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
                 if (decodedSize < 0)
@@ -1476,7 +1452,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
                     return err0r(LZ4F_ERROR_frameSize_wrong);   /* incorrect frame size decoded */
                 if (suffixSize == 0) {  /* frame completed */
                     nextSrcSizeHint = 0;
-                    dctxPtr->dStage = dstage_getHeader;
+                    LZ4F_resetDecompressionContext(dctxPtr);
                     doAnotherStage = 0;
                     break;
                 }
@@ -1510,7 +1486,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
                 U32 const resultCRC = XXH32_digest(&(dctxPtr->xxh));
                 if (readCRC != resultCRC) return err0r(LZ4F_ERROR_contentChecksum_invalid);
                 nextSrcSizeHint = 0;
-                dctxPtr->dStage = dstage_getHeader;
+                LZ4F_resetDecompressionContext(dctxPtr);
                 doAnotherStage = 0;
                 break;
             }
@@ -1530,7 +1506,8 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
         case dstage_storeSFrameSize:
             {
                 size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
-                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr))
+                    sizeToCopy = srcEnd - srcPtr;
                 memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
                 dctxPtr->tmpInSize += sizeToCopy;
@@ -1553,24 +1530,25 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
 
         case dstage_skipSkippable:
             {   size_t skipSize = dctxPtr->tmpInTarget;
-                if (skipSize > (size_t)(srcEnd-srcPtr)) skipSize = srcEnd-srcPtr;
+                if (skipSize > (size_t)(srcEnd-srcPtr))
+                    skipSize = srcEnd-srcPtr;
                 srcPtr += skipSize;
                 dctxPtr->tmpInTarget -= skipSize;
                 doAnotherStage = 0;
                 nextSrcSizeHint = dctxPtr->tmpInTarget;
                 if (nextSrcSizeHint) break;
-                dctxPtr->dStage = dstage_getHeader;
+                LZ4F_resetDecompressionContext(dctxPtr);
                 break;
             }
         }
     }
 
-    /* preserve dictionary within tmp if necessary */
+    /* preserve history within tmp if necessary */
     if ( (dctxPtr->frameInfo.blockMode==LZ4F_blockLinked)
-        &&(dctxPtr->dict != dctxPtr->tmpOutBuffer)
-        &&(!decompressOptionsPtr->stableDst)
-        &&((unsigned)(dctxPtr->dStage-1) < (unsigned)(dstage_getSuffix-1))
-        )
+      && (dctxPtr->dict != dctxPtr->tmpOutBuffer)
+      && (dctxPtr->dStage != dstage_getHeader)
+      && (!decompressOptionsPtr->stableDst)
+      && ((unsigned)(dctxPtr->dStage-1) < (unsigned)(dstage_getSuffix-1)) )
     {
         if (dctxPtr->dStage == dstage_flushOut) {
             size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
@@ -1600,3 +1578,23 @@ size_t LZ4F_decompress(LZ4F_dctx* dctxPtr,
     *dstSizePtr = (dstPtr - dstStart);
     return nextSrcSizeHint;
 }
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding.
+ */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const void* dict, size_t dictSize,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    if (dctxPtr->dStage <= dstage_init) {
+        dctxPtr->dict = (const BYTE*)dict;
+        dctxPtr->dictSize = dictSize;
+    }
+    return LZ4F_decompress(dctxPtr, dstBuffer, dstSizePtr,
+                           srcBuffer, srcSizePtr,
+                           decompressOptionsPtr);
+}
diff --git a/lib/lz4frame.h b/lib/lz4frame.h
index dd76194..844255b 100644
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h
@@ -273,13 +273,13 @@ LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx, void* dstBuffer, size_t
 LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
 
 /*! LZ4F_compressEnd() :
- * To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
- * It will flush whatever data remained within `cctx` (like LZ4_flush())
- * and properly finalize the frame, with an endMark and a checksum.
+ *  To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
+ *  It will flush whatever data remained within `cctx` (like LZ4_flush())
+ *  and properly finalize the frame, with an endMark and a checksum.
  * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
  * @return : number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled)
  *           or an error code if it fails (which can be tested using LZ4F_isError())
- * A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
+ *  A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
  */
 LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
 
@@ -292,7 +292,7 @@ typedef LZ4F_dctx* LZ4F_decompressionContext_t;   /* compatibility with previous
 
 typedef struct {
   unsigned stableDst;    /* pledge that at least 64KB+64Bytes of previously decompressed data remain unmodifed where it was decoded. This optimization skips storage operations in tmp buffers */
-  unsigned reserved[3];
+  unsigned reserved[3];  /* must be set to zero for forward compatibility */
 } LZ4F_decompressOptions_t;
 
 
diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h
index 5f22aed..b585b72 100644
--- a/lib/lz4frame_static.h
+++ b/lib/lz4frame_static.h
@@ -92,10 +92,11 @@ typedef struct LZ4F_CDict_s LZ4F_CDict;
  *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
  *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
  *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- *  `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
+ * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
 LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
 void        LZ4F_freeCDict(LZ4F_CDict* CDict);
 
+
 /*! LZ4_compressFrame_usingCDict() :
  *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
  *  If cdict==NULL, compress without a dictionary.
@@ -104,8 +105,7 @@ void        LZ4F_freeCDict(LZ4F_CDict* CDict);
  *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
  *  but it's not recommended, as it's the only way to provide dictID in the frame header.
  * @return : number of bytes written into dstBuffer.
- *           or an error code if it fails (can be tested using LZ4F_isError())
- */
+ *           or an error code if it fails (can be tested using LZ4F_isError()) */
 size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize,
                                const LZ4F_CDict* cdict,
@@ -118,14 +118,24 @@ size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
  * `prefsPtr` is optional : you may provide NULL as argument,
  *  however, it's the only way to provide dictID in the frame header.
  * @return : number of bytes written into dstBuffer for the header,
- *           or an error code (which can be tested using LZ4F_isError())
- */
+ *           or an error code (which can be tested using LZ4F_isError()) */
 size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
                                      void* dstBuffer, size_t dstCapacity,
                                      const LZ4F_CDict* cdict,
                                      const LZ4F_preferences_t* prefsPtr);
 
 
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding. */
+size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const void* dict, size_t dictSize,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+
 #if defined (__cplusplus)
 }
 #endif
diff --git a/tests/frametest.c b/tests/frametest.c
index a30089f..d0665c5 100644
--- a/tests/frametest.c
+++ b/tests/frametest.c
@@ -505,8 +505,28 @@ int basicTests(U32 seed, double compressibility)
                 LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity,
                                               CNBuffer, dictSize,
                                               cdict, NULL) );
-        DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeWithDict);
+        DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
+                        (unsigned)dictSize, (unsigned)cSizeWithDict);
         if (cSizeWithDict >= cSizeNoDict) goto _output_error;  /* must be more efficient */
+        crcOrig = XXH64(CNBuffer, dictSize, 0);
+
+        DISPLAYLEVEL(3, "LZ4F_decompress_usingDict : ");
+        {   LZ4F_dctx* dctx;
+            size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH;
+            size_t compressedSize = cSizeWithDict;
+            CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) );
+            CHECK( LZ4F_decompress_usingDict(dctx,
+                                        decodedBuffer, &decodedSize,
+                                        compressedBuffer, &compressedSize,
+                                        CNBuffer, dictSize,
+                                        NULL) );
+            if (compressedSize != cSizeWithDict) goto _output_error;
+            if (decodedSize != dictSize) goto _output_error;
+            { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0);
+              if (crcDest != crcOrig) goto _output_error; }
+            DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
+            CHECK( LZ4F_freeDecompressionContext(dctx) );
+        }
 
         DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict, negative level : ");
         {   size_t cSizeLevelMax;
@@ -544,9 +564,30 @@ int basicTests(U32 seed, double compressibility)
                 LZ4F_compressFrame_usingCDict(compressedBuffer, outCapacity,
                                               CNBuffer, inSize,
                                               cdict, &cParams) );
-            DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeContiguous);
+            DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
+                        (unsigned)inSize, (unsigned)cSizeContiguous);
+
+            DISPLAYLEVEL(3, "LZ4F_decompress_usingDict on multiple linked blocks : ");
+            {   LZ4F_dctx* dctx;
+                size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH;
+                size_t compressedSize = cSizeContiguous;
+                CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) );
+                CHECK( LZ4F_decompress_usingDict(dctx,
+                                            decodedBuffer, &decodedSize,
+                                            compressedBuffer, &compressedSize,
+                                            CNBuffer, dictSize,
+                                            NULL) );
+                if (compressedSize != cSizeContiguous) goto _output_error;
+                if (decodedSize != inSize) goto _output_error;
+                crcOrig = XXH64(CNBuffer, inSize, 0);
+                { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0);
+                  if (crcDest != crcOrig) goto _output_error; }
+                DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
+                CHECK( LZ4F_freeDecompressionContext(dctx) );
+            }
         }
 
+
         DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, multiple independent blocks : ");
         {   size_t cSizeIndep;
             size_t const inSize = dictSize * 3;
@@ -559,7 +600,27 @@ int basicTests(U32 seed, double compressibility)
                 LZ4F_compressFrame_usingCDict(compressedBuffer, outCapacity,
                                               CNBuffer, inSize,
                                               cdict, &cParams) );
-            DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeIndep);
+            DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
+                        (unsigned)inSize, (unsigned)cSizeIndep);
+
+            DISPLAYLEVEL(3, "LZ4F_decompress_usingDict on multiple independent blocks : ");
+            {   LZ4F_dctx* dctx;
+                size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH;
+                size_t compressedSize = cSizeIndep;
+                CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) );
+                CHECK( LZ4F_decompress_usingDict(dctx,
+                                            decodedBuffer, &decodedSize,
+                                            compressedBuffer, &compressedSize,
+                                            CNBuffer, dictSize,
+                                            NULL) );
+                if (compressedSize != cSizeIndep) goto _output_error;
+                if (decodedSize != inSize) goto _output_error;
+                crcOrig = XXH64(CNBuffer, inSize, 0);
+                { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0);
+                  if (crcDest != crcOrig) goto _output_error; }
+                DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize);
+                CHECK( LZ4F_freeDecompressionContext(dctx) );
+            }
         }
 
         LZ4F_freeCDict(cdict);
-- 
cgit v0.12