From cc91777c98d71194c9d29544f84880742ffdf86c Mon Sep 17 00:00:00 2001 From: Reto Koradi Date: Mon, 4 Nov 2019 11:51:41 -0800 Subject: Make benchmark compatible with dictionary compression Support the -D command line option for running benchmarks. The benchmark code was slightly restructured to factor out the calls that need to be different for each benchmark scenario. Since there are now 4 scenarios (all combinations of fast/HC and with/without dictionary), the logic was getting somewhat convoluted otherwise. This was done by extending the compressionParameters struct that previously contained just a single function pointer. It now contains 4 function pointers for init/reset/compress/cleanup, with the related state. The functions get a pointer to the structure as their first argument (inspired by C++), so that they can access the state values in the struct. --- programs/bench.c | 276 +++++++++++++++++++++++++++++++++++++++++++++--------- programs/bench.h | 3 +- programs/lz4cli.c | 18 ++-- 3 files changed, 244 insertions(+), 53 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 5934935..3357d14 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -45,17 +45,176 @@ #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" +#include "bench.h" - +#define LZ4_STATIC_LINKING_ONLY #include "lz4.h" -#define COMPRESSOR0 LZ4_compress_local -static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { - int const acceleration = (clevel < 0) ? -clevel + 1 : 1; - return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration); -} +#define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" -#define COMPRESSOR1 LZ4_compress_HC -#define DEFAULTCOMPRESSOR COMPRESSOR0 + + +/* ************************************* +* Compression parameters and functions +***************************************/ + +struct compressionParameters +{ + int cLevel; + const char* dictBuf; + int dictSize; + + LZ4_stream_t* LZ4_stream; + LZ4_stream_t* LZ4_dictStream; + LZ4_streamHC_t* LZ4_streamHC; + LZ4_streamHC_t* LZ4_dictStreamHC; + + void (*initFunction)( + struct compressionParameters* pThis); + void (*resetFunction)( + const struct compressionParameters* pThis); + int (*blockFunction)( + const struct compressionParameters* pThis, + const char* src, char* dst, int srcSize, int dstSize); + void (*cleanupFunction)( + const struct compressionParameters* pThis); +}; + +static void LZ4_compressInitNoStream( + struct compressionParameters* pThis) +{ + pThis->LZ4_stream = NULL; + pThis->LZ4_dictStream = NULL; + pThis->LZ4_streamHC = NULL; + pThis->LZ4_dictStreamHC = NULL; +} + +static void LZ4_compressInitStream( + struct compressionParameters* pThis) +{ + pThis->LZ4_stream = LZ4_createStream(); + pThis->LZ4_dictStream = LZ4_createStream(); + pThis->LZ4_streamHC = NULL; + pThis->LZ4_dictStreamHC = NULL; + LZ4_loadDict(pThis->LZ4_dictStream, pThis->dictBuf, pThis->dictSize); +} + +static void LZ4_compressInitStreamHC( + struct compressionParameters* pThis) +{ + pThis->LZ4_stream = NULL; + pThis->LZ4_dictStream = NULL; + pThis->LZ4_streamHC = LZ4_createStreamHC(); + pThis->LZ4_dictStreamHC = LZ4_createStreamHC(); + LZ4_loadDictHC(pThis->LZ4_dictStreamHC, pThis->dictBuf, pThis->dictSize); +} + +static void LZ4_compressResetNoStream( + const struct compressionParameters* pThis) +{ + (void)pThis; +} + +static void LZ4_compressResetStream( + const struct compressionParameters* pThis) +{ + LZ4_resetStream_fast(pThis->LZ4_stream); + LZ4_attach_dictionary(pThis->LZ4_stream, pThis->LZ4_dictStream); +} + +static void LZ4_compressResetStreamHC( + const struct compressionParameters* pThis) +{ + LZ4_resetStreamHC_fast(pThis->LZ4_streamHC, pThis->cLevel); + LZ4_attach_HC_dictionary(pThis->LZ4_streamHC, pThis->LZ4_dictStreamHC); +} + +static int LZ4_compressBlockNoStream( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1; + return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration); +} + +static int LZ4_compressBlockNoStreamHC( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + return LZ4_compress_HC(src, dst, srcSize, dstSize, pThis->cLevel); +} + +static int LZ4_compressBlockStream( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1; + return LZ4_compress_fast_continue(pThis->LZ4_stream, src, dst, srcSize, dstSize, acceleration); +} + +static int LZ4_compressBlockStreamHC( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + return LZ4_compress_HC_continue(pThis->LZ4_streamHC, src, dst, srcSize, dstSize); +} + +static void LZ4_compressCleanupNoStream( + const struct compressionParameters* pThis) +{ + (void)pThis; +} + +static void LZ4_compressCleanupStream( + const struct compressionParameters* pThis) +{ + LZ4_freeStream(pThis->LZ4_stream); + LZ4_freeStream(pThis->LZ4_dictStream); +} + +static void LZ4_compressCleanupStreamHC( + const struct compressionParameters* pThis) +{ + LZ4_freeStreamHC(pThis->LZ4_streamHC); + LZ4_freeStreamHC(pThis->LZ4_dictStreamHC); +} + +static void LZ4_buildCompressionParameters( + struct compressionParameters* pParams, + int cLevel, const char* dictBuf, int dictSize) +{ + pParams->cLevel = cLevel; + pParams->dictBuf = dictBuf; + pParams->dictSize = dictSize; + + if (dictSize) { + if (cLevel < LZ4HC_CLEVEL_MIN) { + pParams->initFunction = LZ4_compressInitStream; + pParams->resetFunction = LZ4_compressResetStream; + pParams->blockFunction = LZ4_compressBlockStream; + pParams->cleanupFunction = LZ4_compressCleanupStream; + } else { + pParams->initFunction = LZ4_compressInitStreamHC; + pParams->resetFunction = LZ4_compressResetStreamHC; + pParams->blockFunction = LZ4_compressBlockStreamHC; + pParams->cleanupFunction = LZ4_compressCleanupStreamHC; + } + } else { + pParams->initFunction = LZ4_compressInitNoStream; + pParams->resetFunction = LZ4_compressResetNoStream; + pParams->cleanupFunction = LZ4_compressCleanupNoStream; + + if (cLevel < LZ4HC_CLEVEL_MIN) { + pParams->blockFunction = LZ4_compressBlockNoStream; + } else { + pParams->blockFunction = LZ4_compressBlockNoStreamHC; + } + } +} + #define LZ4_isError(errcode) (errcode==0) @@ -79,6 +238,8 @@ static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSi #define MB *(1 <<20) #define GB *(1U<<30) +#define LZ4_MAX_DICT_SIZE (64 KB) + static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); static U32 g_compressibilityDefault = 50; @@ -152,17 +313,13 @@ typedef struct { size_t resSize; } blockParam_t; -struct compressionParameters -{ - int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel); -}; - #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* displayName, int cLevel, - const size_t* fileSizes, U32 nbFiles) + const size_t* fileSizes, U32 nbFiles, + const char* dictBuf, int dictSize) { size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; @@ -172,27 +329,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, void* const resultBuffer = malloc(srcSize); U32 nbBlocks; struct compressionParameters compP; - int cfunctionId; /* checks */ if (!compressedBuffer || !resultBuffer || !blockTable) EXM_THROW(31, "allocation error : not enough memory"); - /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ - /* Init */ - if (cLevel < LZ4HC_CLEVEL_MIN) cfunctionId = 0; else cfunctionId = 1; - switch (cfunctionId) - { -#ifdef COMPRESSOR0 - case 0 : compP.compressionFunction = COMPRESSOR0; break; -#endif -#ifdef COMPRESSOR1 - case 1 : compP.compressionFunction = COMPRESSOR1; break; -#endif - default : compP.compressionFunction = DEFAULTCOMPRESSOR; - } + /* init */ + LZ4_buildCompressionParameters(&compP, cLevel, dictBuf, dictSize); + compP.initFunction(&compP); /* Init blockTable data */ { const char* srcPtr = (const char*)srcBuffer; @@ -256,8 +402,12 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, U32 nbLoops; for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) { U32 blockNb; + compP.resetFunction(&compP); for (blockNb=0; blockNb 1) ? mfName : fileNamesTable[0]; BMK_benchCLevel(srcBuffer, benchedSize, displayName, cLevel, cLevelLast, - fileSizes, nbFiles); + fileSizes, nbFiles, + dictBuf, dictSize); } /* clean up */ @@ -497,7 +655,8 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, } -static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility) +static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, + const char* dictBuf, int dictSize) { char name[20] = {0}; size_t benchedSize = 10000000; @@ -511,7 +670,7 @@ static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1); + BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, dictBuf, dictSize); /* clean up */ free(srcBuffer); @@ -519,7 +678,8 @@ static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility int BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles, - int cLevel, int cLevelLast) + int cLevel, int cLevelLast, + const char* dictBuf, int dictSize) { unsigned fileNb; if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX; @@ -528,29 +688,59 @@ int BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles, if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); for (fileNb=0; fileNb LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX; if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX; if (cLevelLast < cLevel) cLevelLast = cLevel; if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + if (dictFileName) { + FILE* dictFile = NULL; + U64 dictFileSize = UTIL_getFileSize(dictFileName); + if (!dictFileSize) EXM_THROW(25, "Dictionary error : could not stat dictionary file"); + + dictFile = fopen(dictFileName, "rb"); + if (!dictFile) EXM_THROW(25, "Dictionary error : could not open dictionary file"); + + if (dictFileSize > LZ4_MAX_DICT_SIZE) { + dictSize = LZ4_MAX_DICT_SIZE; + if (UTIL_fseek(dictFile, dictFileSize - dictSize, SEEK_SET)) + EXM_THROW(25, "Dictionary error : could not seek dictionary file"); + } else { + dictSize = (int)dictFileSize; + } + + dictBuf = (char *)malloc(dictSize); + if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory"); + + if (fread(dictBuf, 1, dictSize, dictFile) != (size_t)dictSize) + EXM_THROW(25, "Dictionary error : could not read dictionary file"); + + fclose(dictFile); + } + if (nbFiles == 0) - BMK_syntheticTest(cLevel, cLevelLast, compressibility); + BMK_syntheticTest(cLevel, cLevelLast, compressibility, dictBuf, dictSize); else { if (g_benchSeparately) - BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast); + BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize); else - BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast); + BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize); } + + free(dictBuf); return 0; } diff --git a/programs/bench.h b/programs/bench.h index bb67bee..22ebf60 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -26,7 +26,8 @@ #include int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, - int cLevel, int cLevelLast); + int cLevel, int cLevelLast, + const char* dictFileName); /* Set Parameters */ void BMK_setNbSeconds(unsigned nbLoops); diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 5da7654..e95050b 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -625,10 +625,18 @@ int main(int argc, const char** argv) #endif } + if (dictionary_filename) { + if (!strcmp(dictionary_filename, stdinmark) && IS_CONSOLE(stdin)) { + DISPLAYLEVEL(1, "refusing to read from a console\n"); + exit(1); + } + LZ4IO_setDictionaryFilename(prefs, dictionary_filename); + } + /* benchmark and test modes */ if (mode == om_bench) { BMK_setNotificationLevel(displayLevel); - operationResult = BMK_benchFiles(inFileNames, ifnIdx, cLevel, cLevelLast); + operationResult = BMK_benchFiles(inFileNames, ifnIdx, cLevel, cLevelLast, dictionary_filename); goto _cleanup; } @@ -638,14 +646,6 @@ int main(int argc, const char** argv) mode = om_decompress; /* defer to decompress */ } - if (dictionary_filename) { - if (!strcmp(dictionary_filename, stdinmark) && IS_CONSOLE(stdin)) { - DISPLAYLEVEL(1, "refusing to read from a console\n"); - exit(1); - } - LZ4IO_setDictionaryFilename(prefs, dictionary_filename); - } - /* compress or decompress */ if (!input_filename) input_filename = stdinmark; /* Check if input is defined as console; trigger an error in this case */ -- cgit v0.12