From 4c227a487e25c175d98a320386c96dbea6628216 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 30 Mar 2015 21:32:25 +0100 Subject: Added LZ4_compress_fast() --- lib/lz4.c | 90 +++++++++++++++++++++++++++++++++++++--------------- lib/lz4.h | 10 ++++++ programs/fullbench.c | 68 ++++++++++++++++++++------------------- 3 files changed, 109 insertions(+), 59 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 2a1ae57..c8bde3a 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -44,6 +44,12 @@ #define HEAPMODE 0 /* + * ACCELERATION_DEFAULT : + * Select the value of "acceleration" for LZ4_compress_fast() when parameter == 0 + */ +#define ACCELERATION_DEFAULT 17 + +/* * CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS : * By default, the source code expects the compiler to correctly optimize * 4-bytes and 8-bytes read on architectures able to handle it efficiently. @@ -513,15 +519,16 @@ static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t t } static int LZ4_compress_generic( - void* ctx, - const char* source, - char* dest, - int inputSize, - int maxOutputSize, - limitedOutput_directive outputLimited, - tableType_t const tableType, - dict_directive dict, - dictIssue_directive dictIssue) + void* const ctx, + const char* const source, + char* const dest, + const int inputSize, + const int maxOutputSize, + const limitedOutput_directive outputLimited, + const tableType_t tableType, + const dict_directive dict, + const dictIssue_directive dictIssue, + const U32 acceleration) { LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx; @@ -544,7 +551,7 @@ static int LZ4_compress_generic( size_t refDelta=0; /* Init conditions */ - if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ switch(dict) { case noDict: @@ -576,14 +583,14 @@ static int LZ4_compress_generic( { const BYTE* forwardIp = ip; unsigned step=1; - unsigned searchMatchNb = (1U << LZ4_skipTrigger); + unsigned searchMatchNb = ((1+acceleration) << LZ4_skipTrigger); /* Find a match */ do { U32 h = forwardH; ip = forwardIp; forwardIp += step; - step = searchMatchNb++ >> LZ4_skipTrigger; + step = (searchMatchNb++ >> LZ4_skipTrigger); if (unlikely(forwardIp > mflimit)) goto _last_literals; @@ -724,7 +731,7 @@ _last_literals: } -int LZ4_compress(const char* source, char* dest, int inputSize) +int LZ4_compress(const char* source, char* dest, const int inputSize) { #if (HEAPMODE) void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 8); /* Aligned on 8-bytes boundaries */ @@ -734,9 +741,9 @@ int LZ4_compress(const char* source, char* dest, int inputSize) int result; if (inputSize < LZ4_64Klimit) - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, 0); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, 0); #if (HEAPMODE) FREEMEM(ctx); @@ -754,9 +761,40 @@ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, in int result; if (inputSize < LZ4_64Klimit) - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, 0); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, 0); + +#if (HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + +int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, unsigned acceleration) +{ +#if (HEAPMODE) + void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 8); /* Aligned on 8-bytes boundaries */ +#else + U64 ctx[LZ4_STREAMSIZE_U64] = {0}; /* Ensure data is aligned on 8-bytes boundaries */ +#endif + int result; + + if (acceleration == 0) + { + if (inputSize < LZ4_64Klimit) + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, ACCELERATION_DEFAULT); + else + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, ACCELERATION_DEFAULT); + } + else + { + if (inputSize < LZ4_64Klimit) + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + else + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); + } #if (HEAPMODE) FREEMEM(ctx); @@ -875,9 +913,9 @@ FORCE_INLINE int LZ4_compress_continue_generic (void* LZ4_stream, const char* so { int result; if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, dictSmall); + result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, dictSmall, 0); else - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, noDictIssue); + result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, noDictIssue, 0); streamPtr->dictSize += (U32)inputSize; streamPtr->currentOffset += (U32)inputSize; return result; @@ -887,9 +925,9 @@ FORCE_INLINE int LZ4_compress_continue_generic (void* LZ4_stream, const char* so { int result; if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, dictSmall); + result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, dictSmall, 0); else - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, noDictIssue); + result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, noDictIssue, 0); streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)inputSize; streamPtr->currentOffset += (U32)inputSize; @@ -919,7 +957,7 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* if (smallest > (const BYTE*) source) smallest = (const BYTE*) source; LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest); - result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue); + result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 0); streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)inputSize; @@ -1352,9 +1390,9 @@ int LZ4_compress_withState (void* state, const char* source, char* dest, int inp MEM_INIT(state, 0, LZ4_STREAMSIZE); if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, 0); else - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, 0); } int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize) @@ -1363,9 +1401,9 @@ int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* MEM_INIT(state, 0, LZ4_STREAMSIZE); if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, 0); else - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, 0); } /* Obsolete streaming decompression functions */ diff --git a/lib/lz4.h b/lib/lz4.h index de43fc0..eabc40f 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -130,6 +130,16 @@ int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, /* +LZ4_compress_fast() : + Same as LZ4_compress_limitedOutput, but allows to select an "acceleration" factor. + The larger the value, the faster the algorithm, but also the lesser the compression. + So it's a trade-off, which can be fine tuned, selecting whichever value you want. + An acceleration value of "0" means "use Default value", which is typically about 15 (see lz4.c source code). +*/ +int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxOutputSize, unsigned acceleration); + + +/* LZ4_compress_withState() : Same compression functions, but using an externally allocated memory space to store compression state. Use LZ4_sizeofState() to know how much memory must be allocated, diff --git a/programs/fullbench.c b/programs/fullbench.c index 6c42ed0..cd6e3c8 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -146,9 +146,9 @@ struct chunkParameters -//************************************** -// Benchmark Parameters -//************************************** +/************************************** +* Benchmark Parameters +**************************************/ static int chunkSize = DEFAULT_CHUNKSIZE; static int nbIterations = NBLOOPS; static int BMK_pause = 0; @@ -175,17 +175,17 @@ void BMK_SetPause(void) BMK_pause = 1; } -//********************************************************* -// Private functions -//********************************************************* +/********************************************************* +* Private functions +*********************************************************/ #if defined(BMK_LEGACY_TIMER) static int BMK_GetMilliStart(void) { - // Based on Legacy ftime() - // Rolls over every ~ 12.1 days (0x100000/24/60/60) - // Use GetMilliSpan to correct for rollover + /* Based on Legacy ftime() + * Rolls over every ~ 12.1 days (0x100000/24/60/60) + * Use GetMilliSpan to correct for rollover */ struct timeb tb; int nCount; ftime( &tb ); @@ -197,8 +197,8 @@ static int BMK_GetMilliStart(void) static int BMK_GetMilliStart(void) { - // Based on newer gettimeofday() - // Use GetMilliSpan to correct for rollover + /* Based on newer gettimeofday() + * Use GetMilliSpan to correct for rollover */ struct timeval tv; int nCount; gettimeofday(&tv, NULL); @@ -386,6 +386,11 @@ static int local_LZ4_compress_limitedOutput(const char* in, char* out, int inSiz return LZ4_compress_limitedOutput(in, out, inSize, LZ4_compressBound(inSize)); } +static int local_LZ4_compress_fast(const char* in, char* out, int inSize) +{ + return LZ4_compress_fast(in, out, inSize, LZ4_compressBound(inSize), 0); +} + static void* stateLZ4; static int local_LZ4_compress_withState(const char* in, char* out, int inSize) { @@ -530,7 +535,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) { int fileIdx=0; char* orig_buff; -# define NB_COMPRESSION_ALGORITHMS 16 +# define NB_COMPRESSION_ALGORITHMS 17 double totalCTime[NB_COMPRESSION_ALGORITHMS+1] = {0}; double totalCSize[NB_COMPRESSION_ALGORITHMS+1] = {0}; # define NB_DECOMPRESSION_ALGORITHMS 9 @@ -568,22 +573,18 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) return 11; } - /* Init */ - stateLZ4 = LZ4_createStream(); - stateLZ4HC = LZ4_createStreamHC(); - - /* Memory allocation & restrictions */ + /* Memory size adjustments */ inFileSize = BMK_GetFileSize(inFileName); if (inFileSize==0) { DISPLAY( "file is empty\n"); return 11; } - benchedSize = (size_t) BMK_findMaxMem(inFileSize); + benchedSize = (size_t) BMK_findMaxMem(inFileSize*2) / 2; /* because 2 buffers */ if (benchedSize==0) { DISPLAY( "not enough memory\n"); return 11; } if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; if (benchedSize < inFileSize) - { DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20)); - } - // Alloc + /* Allocation */ + stateLZ4 = LZ4_createStream(); + stateLZ4HC = LZ4_createStreamHC(); chunkP = (struct chunkParameters*) malloc(((benchedSize / (size_t)chunkSize)+1) * sizeof(struct chunkParameters)); orig_buff = (char*) malloc((size_t)benchedSize); nbChunks = (int) (((int)benchedSize + (chunkSize-1))/ chunkSize); @@ -602,7 +603,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) return 12; } - // Fill input buffer + /* Fill in src buffer */ DISPLAY("Loading %s... \r", inFileName); readSize = fread(orig_buff, 1, benchedSize, inFile); fclose(inFile); @@ -664,20 +665,21 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) case 4 : compressionFunction = local_LZ4_compress_limitedOutput_withState; compressorName = "LZ4_compress_limitedOutput_withState"; break; case 5 : compressionFunction = local_LZ4_compress_continue; initFunction = LZ4_create; compressorName = "LZ4_compress_continue"; break; case 6 : compressionFunction = local_LZ4_compress_limitedOutput_continue; initFunction = LZ4_create; compressorName = "LZ4_compress_limitedOutput_continue"; break; - case 7 : compressionFunction = LZ4_compressHC; compressorName = "LZ4_compressHC"; break; - case 8 : compressionFunction = local_LZ4_compressHC_limitedOutput; compressorName = "LZ4_compressHC_limitedOutput"; break; - case 9 : compressionFunction = local_LZ4_compressHC_withStateHC; compressorName = "LZ4_compressHC_withStateHC"; break; - case 10: compressionFunction = local_LZ4_compressHC_limitedOutput_withStateHC; compressorName = "LZ4_compressHC_limitedOutput_withStateHC"; break; - case 11: compressionFunction = local_LZ4_compressHC_continue; initFunction = LZ4_createHC; compressorName = "LZ4_compressHC_continue"; break; - case 12: compressionFunction = local_LZ4_compressHC_limitedOutput_continue; initFunction = LZ4_createHC; compressorName = "LZ4_compressHC_limitedOutput_continue"; break; - case 13: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; - case 14: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; + case 7 : compressionFunction = local_LZ4_compress_fast; compressorName = "LZ4_compress_fast"; break; + case 8 : compressionFunction = LZ4_compressHC; compressorName = "LZ4_compressHC"; break; + case 9 : compressionFunction = local_LZ4_compressHC_limitedOutput; compressorName = "LZ4_compressHC_limitedOutput"; break; + case 10 : compressionFunction = local_LZ4_compressHC_withStateHC; compressorName = "LZ4_compressHC_withStateHC"; break; + case 11: compressionFunction = local_LZ4_compressHC_limitedOutput_withStateHC; compressorName = "LZ4_compressHC_limitedOutput_withStateHC"; break; + case 12: compressionFunction = local_LZ4_compressHC_continue; initFunction = LZ4_createHC; compressorName = "LZ4_compressHC_continue"; break; + case 13: compressionFunction = local_LZ4_compressHC_limitedOutput_continue; initFunction = LZ4_createHC; compressorName = "LZ4_compressHC_limitedOutput_continue"; break; + case 14: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; + case 15: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; chunkP[0].origSize = (int)benchedSize; nbChunks=1; break; - case 15: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict"; + case 16: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict"; LZ4_loadDict(&LZ4_dict, chunkP[0].origBuffer, chunkP[0].origSize); break; - case 16: compressionFunction = local_LZ4_saveDictHC; compressorName = "LZ4_saveDictHC"; + case 17: compressionFunction = local_LZ4_saveDictHC; compressorName = "LZ4_saveDictHC"; LZ4_loadDictHC(&LZ4_dictHC, chunkP[0].origBuffer, chunkP[0].origSize); break; default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); free(chunkP); return 1; @@ -724,7 +726,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) totalCSize[cAlgNb] += cSize; } - // Prepare layout for decompression + /* Prepare layout for decompression */ // Init data chunks { int i; -- cgit v0.12