From f7f67e778c9261fa5843811068f3eb3cc4e70509 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 1 Sep 2014 22:44:02 +0100 Subject: Added : preliminary frame decompression function --- Makefile | 2 +- lz4frame.c | 244 ++++++++++++++++++++++++++++++++++++++++++++++++++- lz4frame.h | 24 ++--- programs/Makefile | 5 +- programs/frametest.c | 36 +++----- 5 files changed, 273 insertions(+), 38 deletions(-) diff --git a/Makefile b/Makefile index 30ecbb7..b21ae2f 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ # ################################################################ # Version numbers -VERSION=122 +VERSION=123 export RELEASE=r$(VERSION) LIBVER_MAJOR=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h` LIBVER_MINOR=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h` diff --git a/lz4frame.c b/lz4frame.c index a094fd0..c604866 100644 --- a/lz4frame.c +++ b/lz4frame.c @@ -151,8 +151,17 @@ static void LZ4F_writeLE32 (BYTE* dstPtr, U32 value32) dstPtr[3] = (BYTE)(value32 >> 24); } +static U32 LZ4F_readLE32 (const BYTE* srcPtr) +{ + U32 value32 = srcPtr[0]; + value32 += (srcPtr[1]<<8); + value32 += (srcPtr[2]<<16); + value32 += (srcPtr[3]<<24); + return value32; +} -static BYTE LZ4F_headerChecksum (BYTE* header, size_t length) + +static BYTE LZ4F_headerChecksum (const BYTE* header, size_t length) { U32 xxh = XXH32(header, length, 0); return (BYTE)(xxh >> 8); @@ -553,3 +562,236 @@ size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstB return dstPtr - dstStart; } + + +/*********************************** + * Decompression functions + * *********************************/ + +typedef struct { + LZ4F_frameInfo_t frameInfo; + unsigned dStage; + BYTE* tmpInputBuffer; + size_t tmpInputFilled; + size_t tmpInputTarget; +} LZ4F_dctx_internal_t; + + + +/* Resource management */ + +/* LZ4F_createDecompressionContext() : + * The first thing to do is to create a decompressionContext object, which will be used in all decompression operations. + * This is achieved using LZ4F_createDecompressionContext(). + * The function will provide a pointer to a fully allocated and initialised LZ4F_decompressionContext object. + * If the result LZ4F_errorCode_t is not zero, there was an error during context creation. + * Object can release its memory using LZ4F_freeDecompressionContext(); + */ +LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_compressionContext_t* LZ4F_decompressionContextPtr); +LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_compressionContext_t LZ4F_decompressionContext); + +/* Decompression */ + +static size_t LZ4F_decodeHeader(LZ4F_frameInfo_t* frameInfoPtr, const BYTE* srcPtr, size_t srcSize) +{ + BYTE FLG, BD, HC; + unsigned version, blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictFlag, blockSizeID; + + /* need to decode header to get frameInfo */ + if (srcSize < 7) return -ERROR_GENERIC; /* minimal decodable size */ + + /* control magic number */ + if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) return -ERROR_GENERIC; + srcPtr += 4; + + /* Flags */ + FLG = srcPtr[0]; + version = (FLG>>6)&_2BITS; + blockMode = (FLG>>5) & _1BIT; + blockChecksumFlag = (FLG>>4) & _1BIT; + contentSizeFlag = (FLG>>3) & _1BIT; + contentChecksumFlag = (FLG>>2) & _1BIT; + dictFlag = (FLG>>0) & _1BIT; + BD = srcPtr[1]; + blockSizeID = (BD>>4) & _3BITS; + + /* check */ + HC = LZ4F_headerChecksum(srcPtr, 2); + if (HC != srcPtr[2]) return -ERROR_GENERIC; /* Bad header checksum error */ + + /* validate */ + if (version != 1) return -ERROR_GENERIC; /* Version Number, only supported value */ + if (blockMode != blockIndependent) return -ERROR_GENERIC; /* Only supported blockMode for the time being */ + if (blockChecksumFlag != 0) return -ERROR_GENERIC; /* Only supported value for the time being */ + if (contentSizeFlag != 0) return -ERROR_GENERIC; /* Only supported value for the time being */ + if (((FLG>>1)&_1BIT) != 0) return -ERROR_GENERIC; /* Reserved bit */ + if (dictFlag != 0) return -ERROR_GENERIC; /* Only supported value for the time being */ + if (((BD>>7)&_1BIT) != 0) return -ERROR_GENERIC; /* Reserved bit */ + if (blockSizeID < 4) return -ERROR_GENERIC; /* Only supported values for the time being */ + if (((BD>>0)&_4BITS) != 0) return -ERROR_GENERIC; /* Reserved bits */ + + /* save */ + frameInfoPtr->blockMode = blockMode; + frameInfoPtr->contentChecksumFlag = contentChecksumFlag; + frameInfoPtr->blockSizeID = blockSizeID; + + return 7; +} + + +/* LZ4F_getFrameInfo() + * This function decodes frame header information, such as blockSize. + * It is optional : you could start by calling directly LZ4F_decompress() instead. + * The objective is to extract header information without starting decompression, typically for allocation purposes. + * LZ4F_getFrameInfo() can also be used *after* starting decompression, on a valid LZ4F_decompressionContext_t. + * The number of bytes read from srcBuffer will be provided within *srcSize (necessarily <= original value). + * The function result is an error code which can be tested using LZ4F_isError(). + */ +LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t decompressionContext, LZ4F_frameInfo_t* frameInfoPtr, const void* srcBuffer, size_t* srcSize) +{ + LZ4F_dctx_internal_t* dctxPtr = (LZ4F_dctx_internal_t*)decompressionContext; + + if (dctxPtr->dStage==0) + { + LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(frameInfoPtr, srcBuffer, *srcSize); + if (LZ4F_isError(errorCode)) return errorCode; + *srcSize = errorCode; + return OK_NoError; + } + + /* frameInfo already decoded */ + *srcSize = 0; + *frameInfoPtr = dctxPtr->frameInfo; + return OK_NoError; +} + +/* LZ4F_decompress() + * Call this function repetitively to regenerate data compressed within srcBuffer. + * The function will attempt to decode *srcSize from srcBuffer, into dstBuffer of maximum size *dstSize. + * + * The number of bytes generated into dstBuffer will be provided within *dstSize (necessarily <= original value). + * + * The number of bytes effectively read from srcBuffer will be provided within *srcSize (necessarily <= original value). + * If the number of bytes read is < number of bytes provided, then the decompression operation is not complete. + * You will have to call it again, using the same src arguments (but eventually different dst arguments). + * + * The function result is an error code which can be tested using LZ4F_isError(). + * When the frame is fully decoded, the function result will be OK_FrameEnd(=1). + */ +LZ4F_errorCode_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, void* dstBuffer, size_t* dstSize, const void* srcBuffer, size_t* srcSize, const LZ4F_decompressOptions_t* decompressOptionsPtr) +{ + LZ4F_dctx_internal_t* dctxPtr = (LZ4F_dctx_internal_t*)decompressionContext; + LZ4F_decompressOptions_t optionsNull = { 0 }; + const BYTE* const srcStart = (const BYTE*)srcBuffer; + const BYTE* const srcEnd = srcStart + *srcSize; + const BYTE* srcPtr = srcStart; + BYTE* const dstStart = (BYTE*)dstBuffer; + BYTE* const dstEnd = dstStart + *dstSize; + BYTE* dstPtr = dstStart; + size_t nextCBlockSize=0; + unsigned nextBlockUncompressedFlag=0; + + if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull; + *srcSize = 0; + + if (dctxPtr->dStage == 0) /* header must be decoded */ + { + LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(&(dctxPtr->frameInfo), srcBuffer, *srcSize); + if (LZ4F_isError(errorCode)) return errorCode; + srcPtr += errorCode; + dctxPtr->dStage = 1; + } + + if (dctxPtr->tmpInputTarget > 0) /* finalize what's already saved*/ + { + size_t sizeToCopy = dctxPtr->tmpInputTarget - dctxPtr->tmpInputFilled; + if (sizeToCopy < (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr; + memcpy(dctxPtr->tmpInputBuffer + dctxPtr->tmpInputFilled, srcPtr, sizeToCopy); + srcPtr += sizeToCopy; + if (dctxPtr->tmpInputFilled < dctxPtr->tmpInputTarget) goto _end; + + switch (dctxPtr->dStage) + { + default: + return -ERROR_GENERIC; /* impossible case */ + case 1: + nextCBlockSize = LZ4F_readLE32(dctxPtr->tmpInputBuffer) & 0x7FFFFFFFU; + nextBlockUncompressedFlag = LZ4F_readLE32(dctxPtr->tmpInputBuffer) & 0x80000000U; + dctxPtr->tmpInputFilled = 0; + dctxPtr->tmpInputTarget = 0; + dctxPtr->dStage = 2; + break; + case 2: + if (nextBlockUncompressedFlag) + { + memcpy(dstPtr, dctxPtr->tmpInputBuffer, nextCBlockSize); + srcPtr += nextCBlockSize; + dstPtr += nextCBlockSize; + } + else + { + int origSize = LZ4_decompress_safe((const char*)(dctxPtr->tmpInputBuffer), (char*)dstPtr, (int)nextCBlockSize, (int)(dstEnd - dstPtr)); /* blindly attempt to decompress, in case there is enough space left */ + srcPtr += nextCBlockSize; + dstPtr += origSize; + } + dctxPtr->tmpInputFilled = 0; + dctxPtr->tmpInputTarget = 0; + dctxPtr->dStage = 1; + break; + } + } + + while (srcPtr < srcEnd) /* can still read */ + { + if (dctxPtr->dStage == 1) /* read next block size */ + { + if ((srcEnd - srcPtr) < 4) + { + size_t nbBytesToCopy = (srcEnd - srcPtr); + memcpy(dctxPtr->tmpInputBuffer, srcPtr, nbBytesToCopy); + dctxPtr->tmpInputFilled = nbBytesToCopy; + dctxPtr->tmpInputTarget = 4; + srcPtr = srcEnd; + break; + } + nextCBlockSize = LZ4F_readLE32(srcPtr) & 0x7FFFFFFFU; + nextBlockUncompressedFlag = LZ4F_readLE32(srcPtr) & 0x80000000U; + srcPtr += 4; + dctxPtr->dStage = 2; + } + + if (dctxPtr->dStage == 2) /* compressed block */ + { + if ((size_t)(srcEnd - srcPtr) < nextCBlockSize) + { + memcpy(dctxPtr->tmpInputBuffer, srcPtr, srcEnd-srcPtr); + dctxPtr->tmpInputFilled = srcEnd - srcPtr; + dctxPtr->tmpInputTarget = nextCBlockSize; + break; + } + if (nextBlockUncompressedFlag) + { + memcpy(dstPtr, srcPtr, nextCBlockSize); + srcPtr += nextCBlockSize; + dstPtr += nextCBlockSize; + } + else + { + int origSize = LZ4_decompress_safe((const char*)srcPtr, (char*)dstPtr, (int)nextCBlockSize, (int)(dstEnd - dstPtr)); /* blindly attempt to decompress, in case there is enough space left */ + srcPtr += nextCBlockSize; + dstPtr += origSize; + } + } + } + + if (srcPtr < srcEnd) + { + dctxPtr->tmpInputFilled = srcEnd-srcPtr; + memcpy(dctxPtr->tmpInputBuffer, srcPtr, dctxPtr->tmpInputFilled); /* save remaining input */ + } + +_end: + *srcSize = (srcPtr - srcStart); + *dstSize = (dstPtr - dstStart); + return OK_NoError; +} diff --git a/lz4frame.h b/lz4frame.h index 851de45..ae44a3a 100644 --- a/lz4frame.h +++ b/lz4frame.h @@ -37,7 +37,7 @@ * All related operations, including memory management, are handled by the library. * You don't need lz4.h when using lz4frame.h. * */ - + #pragma once #if defined (__cplusplus) @@ -56,13 +56,13 @@ extern "C" { **************************************/ typedef size_t LZ4F_errorCode_t; typedef enum { OK_FrameEnd = 1 } LZ4F_successCodes; -typedef enum { OK_NoError = 0, ERROR_GENERIC = 1, +typedef enum { OK_NoError = 0, ERROR_GENERIC = 1, ERROR_maxBlockSize_invalid, ERROR_blockMode_invalid, ERROR_contentChecksumFlag_invalid, ERROR_srcSize_tooLarge, ERROR_dstMaxSize_tooSmall, ERROR_allocation_failed, ERROR_compressionLevel_invalid, ERROR_maxCode - } LZ4F_errorCodes; /* error codes are negative unsigned values. + } LZ4F_errorCodes; /* error codes are negative unsigned values. Compare function result to (-specificCode) */ int LZ4F_isError(LZ4F_errorCode_t code); /* Basically : code > -ERROR_maxCode */ @@ -109,7 +109,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf /********************************** - * Advanced compression functions + * Advanced compression functions * *********************************/ typedef void* LZ4F_compressionContext_t; @@ -163,17 +163,17 @@ size_t LZ4F_compress(LZ4F_compressionContext_t compressionContext, void* dstBuff * The result of the function is the number of bytes written into dstBuffer (it can be zero, meaning input data is just stored within compressionContext for a future block to complete) * The function outputs an error code if it fails (can be tested using LZ4F_isError()) */ - + size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr); /* LZ4F_flush() * Should you need to create compressed data immediately, without waiting for a block to be filled, * you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext. - * The result of the function is the number of bytes written into dstBuffer + * The result of the function is the number of bytes written into dstBuffer * (it can be zero, this means there was no data left within compressionContext) * The function outputs an error code if it fails (can be tested using LZ4F_isError()) * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument. */ - + size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr); /* LZ4F_compressEnd() * When you want to properly finish the compressed frame, just call LZ4F_compressEnd(). @@ -186,8 +186,8 @@ size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstB */ -/********************************** - * Decompression functions +/*********************************** + * Decompression functions * *********************************/ typedef void* LZ4F_decompressionContext_t; @@ -207,7 +207,7 @@ LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_compressionContext_t LZ4F_de * If the result LZ4F_errorCode_t is not zero, there was an error during context creation. * Object can release its memory using LZ4F_freeDecompressionContext(); */ - + /* Decompression */ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t decompressionContext, LZ4F_frameInfo_t* frameInfoPtr, const void* srcBuffer, size_t* srcSize); @@ -226,11 +226,11 @@ LZ4F_errorCode_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContex * The function will attempt to decode *srcSize from srcBuffer, into dstBuffer of maximum size *dstSize. * * The number of bytes generated into dstBuffer will be provided within *dstSize (necessarily <= original value). - * + * * The number of bytes effectively read from srcBuffer will be provided within *srcSize (necessarily <= original value). * If the number of bytes read is < number of bytes provided, then the decompression operation is not complete. * You will have to call it again, using the same src arguments (but eventually different dst arguments). - * + * * The function result is an error code which can be tested using LZ4F_isError(). * When the frame is fully decoded, the function result will be OK_FrameEnd(=1). */ diff --git a/programs/Makefile b/programs/Makefile index 21c7514..82a5e66 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -132,7 +132,7 @@ uninstall: [ -f $(DESTDIR)$(MANDIR)/lz4cat.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz4cat.1 @echo lz4 successfully uninstalled -test-native: test-lz4 test-lz4c test-fullbench test-fuzzer test-mem +test-native: test-lz4 test-lz4c test-frame test-fullbench test-fuzzer test-mem test-force32: test-lz4c32 test-fullbench32 test-fuzzer32 test-mem32 @@ -169,6 +169,9 @@ test-fuzzer: fuzzer test-fuzzer32: fuzzer32 ./fuzzer32 --no-prompt +test-frame: frametest + ./frametest + test-mem: lz4 datagen ./datagen -g16KB > tmp valgrind ./lz4 -9 -BD -f tmp /dev/null diff --git a/programs/frametest.c b/programs/frametest.c index ad93182..e61205a 100644 --- a/programs/frametest.c +++ b/programs/frametest.c @@ -73,18 +73,19 @@ # define LZ4_VERSION "" #endif -#define NB_ATTEMPTS (1<<16) -#define COMPRESSIBLE_NOISE_LENGTH (1 << 21) -#define FUZ_MAX_BLOCK_SIZE (1 << 17) -#define FUZ_MAX_DICT_SIZE (1 << 15) +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +#define NB_ATTEMPTS (64 KB) +#define COMPRESSIBLE_NOISE_LENGTH (2 MB) +#define FUZ_MAX_BLOCK_SIZE (128 KB) +#define FUZ_MAX_DICT_SIZE (32 KB) #define FUZ_COMPRESSIBILITY_DEFAULT 50 #define PRIME1 2654435761U #define PRIME2 2246822519U #define PRIME3 3266489917U -#define KB *(1U<<10) -#define MB *(1U<<20) -#define GB *(1U<<30) /************************************** @@ -114,7 +115,7 @@ static int FUZ_GetMilliStart(void) return nCount; } - +/* static int FUZ_GetMilliSpan( int nTimeStart ) { int nSpan = FUZ_GetMilliStart() - nTimeStart; @@ -122,6 +123,7 @@ static int FUZ_GetMilliSpan( int nTimeStart ) nSpan += 0x100000 * 1000; return nSpan; } +*/ # define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) @@ -176,8 +178,6 @@ static void FUZ_fillCompressibleNoiseBuffer(void* buffer, unsigned bufferSize, d -#define FUZ_MAX(a,b) (a>b?a:b) - int frameTest(U32 seed, int nbCycles, int startCycle, double compressibility) { int testResult = 0; @@ -283,8 +283,8 @@ int FUZ_usage(void) } -int main(int argc, char** argv) { - U32 timestamp = FUZ_GetMilliStart(); +int main(int argc, char** argv) +{ U32 seed=0; int seedset=0; int argNb; @@ -367,17 +367,7 @@ int main(int argc, char** argv) { // Get Seed printf("Starting lz4frame tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION); - if (!seedset) - { - char userInput[50] = {0}; - printf("Select an Initialisation number (default : random) : "); - fflush(stdout); - if ( no_prompt || fgets(userInput, sizeof userInput, stdin) ) - { - if ( sscanf(userInput, "%u", &seed) == 1 ) {} - else seed = FUZ_GetMilliSpan(timestamp); - } - } + if (!seedset) seed = FUZ_GetMilliStart() % 10000; printf("Seed = %u\n", seed); if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) printf("Compressibility : %i%%\n", proba); -- cgit v0.12