From 3a6832497ba02607dd4ede3a3fd30911e06f037e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 16 Mar 2015 17:52:14 +0100 Subject: skippable frames support --- NEWS | 2 + lib/lz4.h | 2 +- lib/lz4frame.c | 148 +++++++++++++++++++++++++++++++++++++------------- lib/lz4frame.h | 4 +- lib/lz4frame_static.h | 1 + programs/frametest.c | 106 +++++++++++++++++++++++++++--------- 6 files changed, 199 insertions(+), 64 deletions(-) diff --git a/NEWS b/NEWS index 89aa0e6..6da779c 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,8 @@ r128: New : lz4 cli sparse file support New : command -m, to compress multiple files in a single command Fixed : Restored lz4hc compression ratio (slightly lower since r124) +New : lz4frame supports skippable frames +Changed:Default "make install" directory is /usr/local New : g++ compatibility tests New : datagen can generate sparse files New : scan-build tests diff --git a/lib/lz4.h b/lib/lz4.h index df1d839..f04344b 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -49,7 +49,7 @@ extern "C" { **************************************/ #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ #define LZ4_VERSION_MINOR 5 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) int LZ4_versionNumber (void); diff --git a/lib/lz4frame.c b/lib/lz4frame.c index 0c8dc53..99953c1 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -97,6 +97,7 @@ typedef unsigned long long U64; #define _4BITS 0x0F #define _8BITS 0xFF +#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U #define LZ4F_MAGICNUMBER 0x184D2204U #define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U #define LZ4F_MAXHEADERFRAME_SIZE 7 @@ -213,8 +214,9 @@ size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* prefere size_t headerSize; size_t streamSize; - memset(&prefs, 0, sizeof(prefs)); if (preferencesPtr!=NULL) prefs = *preferencesPtr; + else memset(&prefs, 0, sizeof(prefs)); + { blockSizeID_t proposedBSID = max64KB; size_t maxBlockSize = 64 KB; @@ -258,13 +260,14 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf BYTE* const dstEnd = dstStart + dstMaxSize; memset(&cctxI, 0, sizeof(cctxI)); /* works because no allocation */ - memset(&prefs, 0, sizeof(prefs)); memset(&options, 0, sizeof(options)); cctxI.version = LZ4F_VERSION; cctxI.maxBufferSize = 5 MB; /* mess with real buffer size to prevent allocation; works because autoflush==1 & stableSrc==1 */ if (preferencesPtr!=NULL) prefs = *preferencesPtr; + else memset(&prefs, 0, sizeof(prefs)); + { blockSizeID_t proposedBSID = max64KB; size_t maxBlockSize = 64 KB; @@ -625,7 +628,6 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d */ size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr) { - LZ4F_compressOptions_t cOptionsNull; LZ4F_cctx_internal_t* cctxPtr = (LZ4F_cctx_internal_t*)compressionContext; BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; @@ -635,8 +637,6 @@ size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, if (cctxPtr->tmpInSize == 0) return 0; /* nothing to flush */ if (cctxPtr->cStage != 1) return (size_t)-ERROR_GENERIC; if (dstMaxSize < (cctxPtr->tmpInSize + 16)) return (size_t)-ERROR_dstMaxSize_tooSmall; - memset(&cOptionsNull, 0, sizeof(cOptionsNull)); - if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull; (void)compressOptionsPtr; /* not yet useful */ /* select compression function */ @@ -694,9 +694,9 @@ size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstB } -/*********************************** -* Decompression functions -* *********************************/ +/********************************** +* Decompression functions +**********************************/ /* Resource management */ @@ -729,8 +729,28 @@ LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_compressionContext_t LZ4F_de } -/* Decompression */ +/* ******************************************************************** */ +/* ********************* Decompression ******************************** */ +/* ******************************************************************** */ + +typedef enum { dstage_getHeader=0, dstage_storeHeader, + dstage_getCBlockSize, dstage_storeCBlockSize, + dstage_copyDirect, + dstage_getCBlock, dstage_storeCBlock, dstage_decodeCBlock, + dstage_decodeCBlock_intoDst, dstage_decodeCBlock_intoTmp, dstage_flushOut, + dstage_getSuffix, dstage_storeSuffix, + dstage_getSBlockSize, dstage_storeSBlockSize, + dstage_skipSkippable +} dStage_t; + + +/* LZ4F_decodeHeader + return : nb Bytes read from srcVoidPtr (necessarily <= srcSize) + or an error code (testable with LZ4F_isError()) + output : set internal values of dctx, such as + dctxPtr->frameInfo and dctxPtr->dStage. +*/ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const void* srcVoidPtr, size_t srcSize) { BYTE FLG, BD, HC; @@ -741,9 +761,19 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const void* srcVo /* need to decode header to get frameInfo */ if (srcSize < 7) return (size_t)-ERROR_GENERIC; /* minimal header size */ + /* skippable frames */ + if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) + { + memset(&(dctxPtr->frameInfo), 0, sizeof(dctxPtr->frameInfo)); + dctxPtr->frameInfo.frameType = skippableFrame; + dctxPtr->dStage = dstage_getSBlockSize; + return 4; + } + /* control magic number */ if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) return (size_t)-ERROR_GENERIC; srcPtr += 4; + dctxPtr->frameInfo.frameType = LZ4F_frame; /* Flags */ FLG = srcPtr[0]; @@ -799,19 +829,12 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const void* srcVo dctxPtr->tmpOutStart = 0; dctxPtr->tmpOutSize = 0; + dctxPtr->dStage = dstage_getCBlockSize; + return 7; } -typedef enum { dstage_getHeader=0, dstage_storeHeader, dstage_decodeHeader, - dstage_getCBlockSize, dstage_storeCBlockSize, - dstage_copyDirect, - dstage_getCBlock, dstage_storeCBlock, dstage_decodeCBlock, - dstage_decodeCBlock_intoDst, dstage_decodeCBlock_intoTmp, dstage_flushOut, - dstage_getSuffix, dstage_storeSuffix -} dStage_t; - - /* LZ4F_getFrameInfo() * This function decodes frame header information, such as blockSize. * It is optional : you could start by calling directly LZ4F_decompress() instead. @@ -830,11 +853,10 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t decompressionCont { LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, srcBuffer, *srcSizePtr); if (LZ4F_isError(errorCode)) return errorCode; - *srcSizePtr = errorCode; + *srcSizePtr = errorCode; /* nb Bytes consumed */ *frameInfoPtr = dctxPtr->frameInfo; dctxPtr->srcExpect = NULL; - dctxPtr->dStage = dstage_getCBlockSize; - return 4; + return 4; /* nextSrcSizeHint : 4 == block header size */ } /* frameInfo already decoded */ @@ -962,7 +984,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, /* expect to continue decoding src buffer where it left previously */ if (dctxPtr->srcExpect != NULL) { - if (srcStart != dctxPtr->srcExpect) return (size_t)-ERROR_GENERIC; + if (srcStart != dctxPtr->srcExpect) return (size_t)-ERROR_wrongSrcPtr; } /* programmed as a state machine */ @@ -977,14 +999,13 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, { if (srcEnd-srcPtr >= 7) { - selectedIn = srcPtr; - srcPtr += 7; - dctxPtr->dStage = dstage_decodeHeader; + LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, srcPtr, srcEnd-srcPtr); + if (LZ4F_isError(errorCode)) return errorCode; + srcPtr += errorCode; break; } dctxPtr->tmpInSize = 0; dctxPtr->dStage = dstage_storeHeader; - break; } case dstage_storeHeader: @@ -997,19 +1018,17 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, if (dctxPtr->tmpInSize < 7) { nextSrcSizeHint = (7 - dctxPtr->tmpInSize) + 4; - doAnotherStage = 0; /* no enough src, wait to get some more */ + doAnotherStage = 0; /* not enough src data, wait to get some more */ break; } - selectedIn = dctxPtr->header; - dctxPtr->dStage = dstage_decodeHeader; - break; - } - - case dstage_decodeHeader: - { - LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, selectedIn, 7); + LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, dctxPtr->header, 7); if (LZ4F_isError(errorCode)) return errorCode; - dctxPtr->dStage = dstage_getCBlockSize; + if (errorCode==4) + { + memcpy(dctxPtr->header, dctxPtr->header + 4, 3); + dctxPtr->tmpInSize = 3; + dctxPtr->dStage = dstage_storeSBlockSize; + } break; } @@ -1272,6 +1291,59 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, doAnotherStage = 0; break; } + + case dstage_getSBlockSize: + { + if ((srcEnd - srcPtr) >= 4) + { + selectedIn = srcPtr; + srcPtr += 4; + } + else + { + /* not enough input to read cBlockSize field */ + dctxPtr->tmpInSize = 0; + dctxPtr->dStage = dstage_storeSBlockSize; + } + } + + if (dctxPtr->dStage == dstage_storeSBlockSize) + case dstage_storeSBlockSize: + { + size_t sizeToCopy = 4 - dctxPtr->tmpInSize; + if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr; + memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy); + srcPtr += sizeToCopy; + dctxPtr->tmpInSize += sizeToCopy; + if (dctxPtr->tmpInSize < 4) /* not enough input to get full sBlockSize; wait for more */ + { + nextSrcSizeHint = 4 - dctxPtr->tmpInSize; + doAnotherStage=0; + break; + } + selectedIn = dctxPtr->header; + } + + /* case dstage_decodeSBlockSize: */ /* no direct access */ + { + size_t nextSBlockSize = LZ4F_readLE32(selectedIn); + dctxPtr->tmpInTarget = nextSBlockSize; + dctxPtr->dStage = dstage_skipSkippable; + break; + } + + case dstage_skipSkippable: + { + size_t skipSize = dctxPtr->tmpInTarget; + if (skipSize > (size_t)(srcEnd-srcPtr)) skipSize = srcEnd-srcPtr; + srcPtr += skipSize; + dctxPtr->tmpInTarget -= skipSize; + doAnotherStage = 0; + nextSrcSizeHint = dctxPtr->tmpInTarget; + if (nextSrcSizeHint) break; + dctxPtr->dStage = dstage_getHeader; + break; + } } } @@ -1309,10 +1381,12 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, } } - if (srcPtrsrcExpect = srcPtr; else dctxPtr->srcExpect = NULL; + *srcSizePtr = (srcPtr - srcStart); *dstSizePtr = (dstPtr - dstStart); return nextSrcSizeHint; diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 61e461b..71814b0 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -65,12 +65,14 @@ const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /* return error code str typedef enum { LZ4F_default=0, max64KB=4, max256KB=5, max1MB=6, max4MB=7 } blockSizeID_t; typedef enum { blockLinked=0, blockIndependent} blockMode_t; typedef enum { noContentChecksum=0, contentChecksumEnabled } contentChecksum_t; +typedef enum { LZ4F_frame=0, skippableFrame } frameType_t; typedef struct { blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB ; 0 == default */ blockMode_t blockMode; /* blockLinked, blockIndependent ; 0 == default */ contentChecksum_t contentChecksumFlag; /* noContentChecksum, contentChecksumEnabled ; 0 == default */ - unsigned reserved[5]; + frameType_t frameType; /* LZ4F_frame, skippableFrame : 0 == default */ + unsigned reserved[4]; } LZ4F_frameInfo_t; typedef struct { diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h index 4c34c6c..db9d167 100644 --- a/lib/lz4frame_static.h +++ b/lib/lz4frame_static.h @@ -52,6 +52,7 @@ extern "C" { ITEM(ERROR_compressionLevel_invalid) \ ITEM(ERROR_allocation_failed) \ ITEM(ERROR_srcSize_tooLarge) ITEM(ERROR_dstMaxSize_tooSmall) \ + ITEM(ERROR_wrongSrcPtr) \ ITEM(ERROR_decompressionFailed) \ ITEM(ERROR_checksum_invalid) \ ITEM(ERROR_maxCode) diff --git a/programs/frametest.c b/programs/frametest.c index 24cf921..be2be76 100644 --- a/programs/frametest.c +++ b/programs/frametest.c @@ -26,16 +26,10 @@ /************************************** * Compiler specific **************************************/ -#define _CRT_SECURE_NO_WARNINGS // fgets #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ #endif -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -# pragma GCC diagnostic ignored "-Wmissing-field-initializers" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -#endif /* S_ISREG & gettimeofday() are not supported by MSVC */ #if defined(_MSC_VER) || defined(_WIN32) @@ -46,11 +40,11 @@ /************************************** * Includes **************************************/ -#include // free -#include // fgets, sscanf -#include // strcmp +#include /* malloc, free */ +#include /* fprintf */ +#include /* strcmp */ #include "lz4frame_static.h" -#include "xxhash.h" // XXH64 +#include "xxhash.h" /* XXH64 */ /* Use ftime() if gettimeofday() is not available on your target */ #if defined(FUZ_LEGACY_TIMER) @@ -79,6 +73,17 @@ typedef unsigned long long U64; #endif +/* unoptimized version; solves endianess & alignment issues */ +static void FUZ_writeLE32 (void* dstVoidPtr, U32 value32) +{ + BYTE* dstPtr = (BYTE*)dstVoidPtr; + dstPtr[0] = (BYTE)value32; + dstPtr[1] = (BYTE)(value32 >> 8); + dstPtr[2] = (BYTE)(value32 >> 16); + dstPtr[3] = (BYTE)(value32 >> 24); +} + + /************************************** * Constants **************************************/ @@ -86,6 +91,8 @@ typedef unsigned long long U64; # define LZ4_VERSION "" #endif +#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U + #define KB *(1U<<10) #define MB *(1U<<20) #define GB *(1U<<30) @@ -99,7 +106,7 @@ static const U32 prime2 = 2246822519U; /************************************** - Macros +* Macros **************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } @@ -112,7 +119,7 @@ static U32 g_time = 0; /***************************************** - Local Parameters +* Local Parameters *****************************************/ static U32 no_prompt = 0; static char* programName; @@ -178,15 +185,15 @@ static void FUZ_fillCompressibleNoiseBuffer(void* buffer, unsigned bufferSize, d unsigned pos = 0; U32 P32 = (U32)(32768 * proba); - // First Byte + /* First Byte */ BBuffer[pos++] = (BYTE)(FUZ_rand(seed)); while (pos < bufferSize) { - // Select : Literal (noise) or copy (within 64K) + /* Select : Literal (noise) or copy (within 64K) */ if (FUZ_RAND15BITS < P32) { - // Copy (within 64K) + /* Copy (within 64K) */ unsigned match, end; unsigned length = FUZ_RANDLENGTH + 4; unsigned offset = FUZ_RAND15BITS + 1; @@ -198,7 +205,7 @@ static void FUZ_fillCompressibleNoiseBuffer(void* buffer, unsigned bufferSize, d } else { - // Literal (noise) + /* Literal (noise) */ unsigned end; unsigned length = FUZ_RANDLENGTH; if (pos + length > bufferSize) length = bufferSize - pos; @@ -374,6 +381,40 @@ int basicTests(U32 seed, double compressibility) if (LZ4F_isError(cSize)) goto _output_error; DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + DISPLAYLEVEL(3, "Skippable frame test : \n"); + { + size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH; + unsigned maxBits = FUZ_highbit((U32)decodedBufferSize); + BYTE* op = (BYTE*)decodedBuffer; + BYTE* const oend = (BYTE*)decodedBuffer + COMPRESSIBLE_NOISE_LENGTH; + BYTE* ip = (BYTE*)compressedBuffer; + BYTE* const iend = (BYTE*)compressedBuffer + cSize + 8; + + LZ4F_errorCode_t errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) goto _output_error; + + /* generate skippable frame */ + FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START); + FUZ_writeLE32(ip+4, (U32)cSize); + + DISPLAYLEVEL(3, "random segment sizes : \n"); + while (ip < iend) + { + unsigned nbBits = FUZ_rand(&randState) % maxBits; + size_t iSize = (FUZ_rand(&randState) & ((1< (size_t)(iend-ip)) iSize = iend-ip; + errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + op += oSize; + ip += iSize; + } + DISPLAYLEVEL(3, "Skipped %i bytes \n", (int)decodedBufferSize); + + errorCode = LZ4F_freeDecompressionContext(dCtx); + if (LZ4F_isError(errorCode)) goto _output_error; + } + DISPLAY("Basic tests completed \n"); _end: free(CNBuffer); @@ -445,8 +486,8 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi unsigned CCflag = FUZ_rand(&randState) & 1; unsigned autoflush = (FUZ_rand(&randState) & 7) == 2; LZ4F_preferences_t prefs; - LZ4F_compressOptions_t cOptions = { 0 }; - LZ4F_decompressOptions_t dOptions = { 0 }; + LZ4F_compressOptions_t cOptions; + LZ4F_decompressOptions_t dOptions; unsigned nbBits = (FUZ_rand(&randState) % (FUZ_highbit(srcDataLength-1) - 1)) + 1; size_t srcSize = (FUZ_rand(&randState) & ((1<0; 1=>1,2 */ @@ -531,15 +583,19 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi if (result == (size_t)-ERROR_checksum_invalid) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize, nonContiguousDst); CHECK(LZ4F_isError(result), "Decompression failed (error %i:%s)", (int)result, LZ4F_getErrorName((LZ4F_errorCode_t)result)); XXH64_update(&xxh64, op, (U32)oSize); + totalOut += oSize; op += oSize; ip += iSize; op += nonContiguousDst; if (nonContiguousDst==2) op = (BYTE*)decodedBuffer; /* overwritten destination */ } CHECK(result != 0, "Frame decompression failed (error %i)", (int)result); - crcDecoded = XXH64_digest(&xxh64); - if (crcDecoded != crcOrig) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize, nonContiguousDst); - CHECK(crcDecoded != crcOrig, "Decompression corruption"); + if (totalOut) /* otherwise, it's a skippable frame */ + { + crcDecoded = XXH64_digest(&xxh64); + if (crcDecoded != crcOrig) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize, nonContiguousDst); + CHECK(crcDecoded != crcOrig, "Decompression corruption"); + } } } -- cgit v0.12