diff options
Diffstat (limited to 'lib/dictBuilder/zdict.c')
-rw-r--r-- | lib/dictBuilder/zdict.c | 59 |
1 files changed, 29 insertions, 30 deletions
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 6d0b042..459cbe4 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -23,9 +23,13 @@ /* Unix Large Files support (>4GB) */ #define _FILE_OFFSET_BITS 64 #if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# ifndef _LARGEFILE_SOURCE # define _LARGEFILE_SOURCE +# endif #elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# ifndef _LARGEFILE64_SOURCE # define _LARGEFILE64_SOURCE +# endif #endif @@ -37,18 +41,19 @@ #include <stdio.h> /* fprintf, fopen, ftello64 */ #include <time.h> /* clock */ +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif +#define HUF_STATIC_LINKING_ONLY + #include "../common/mem.h" /* read */ #include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */ -#define HUF_STATIC_LINKING_ONLY #include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */ #include "../common/zstd_internal.h" /* includes zstd.h */ #include "../common/xxhash.h" /* XXH64 */ -#include "divsufsort.h" -#ifndef ZDICT_STATIC_LINKING_ONLY -# define ZDICT_STATIC_LINKING_ONLY -#endif -#include "zdict.h" #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */ +#include "../zdict.h" +#include "divsufsort.h" /*-************************************* @@ -62,14 +67,15 @@ #define NOISELENGTH 32 -static const int g_compressionLevel_default = 3; static const U32 g_selectivity_default = 9; /*-************************************* * Console display ***************************************/ +#undef DISPLAY #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#undef DISPLAYLEVEL #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } @@ -105,20 +111,17 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) size_t headerSize; if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); - { unsigned offcodeMaxValue = MaxOff; - ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); - short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short)); - if (!bs || !wksp || !offcodeNCount) { + if (!bs || !wksp) { headerSize = ERROR(memory_allocation); } else { ZSTD_reset_compressedBlockState(bs); - headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); + headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize); } free(bs); free(wksp); - free(offcodeNCount); } return headerSize; @@ -532,6 +535,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, clock_t displayClock = 0; clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10; +# undef DISPLAYUPDATE # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \ if (ZDICT_clockSpan(displayClock) > refreshRate) \ { displayClock = clock(); DISPLAY(__VA_ARGS__); \ @@ -706,7 +710,7 @@ static void ZDICT_flatLit(unsigned* countLit) #define OFFCODE_MAX 30 /* only applicable to first block */ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, - unsigned compressionLevel, + int compressionLevel, const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, const void* dictBuffer, size_t dictBufferSize, unsigned notificationLevel) @@ -741,7 +745,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, memset(repOffset, 0, sizeof(repOffset)); repOffset[1] = repOffset[4] = repOffset[8] = 1; memset(bestRepOffset, 0, sizeof(bestRepOffset)); - if (compressionLevel==0) compressionLevel = g_compressionLevel_default; + if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT; params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem); @@ -786,7 +790,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* note : the result of this phase should be used to better appreciate the impact on statistics */ total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; - errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); + errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); @@ -795,7 +799,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, Offlog = (U32)errorCode; total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; - errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); + errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); @@ -804,7 +808,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, mlLog = (U32)errorCode; total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; - errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); + errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); @@ -893,7 +897,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, size_t hSize; #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */ BYTE header[HBUFFSIZE]; - int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel; + int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel; U32 const notificationLevel = params.notificationLevel; /* check conditions */ @@ -939,7 +943,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced( const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_params_t params) { - int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel; + int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel; U32 const notificationLevel = params.notificationLevel; size_t hSize = 8; @@ -968,16 +972,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced( return MIN(dictBufferCapacity, hSize+dictContentSize); } -/* Hidden declaration for dbio.c */ -size_t ZDICT_trainFromBuffer_unsafe_legacy( - void* dictBuffer, size_t maxDictSize, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_legacy_params_t params); /*! ZDICT_trainFromBuffer_unsafe_legacy() : -* Warning : `samplesBuffer` must be followed by noisy guard band. +* Warning : `samplesBuffer` must be followed by noisy guard band !!! * @return : size of dictionary, or an error code which can be tested with ZDICT_isError() */ -size_t ZDICT_trainFromBuffer_unsafe_legacy( +static size_t ZDICT_trainFromBuffer_unsafe_legacy( void* dictBuffer, size_t maxDictSize, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t params) @@ -1114,8 +1113,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, memset(¶ms, 0, sizeof(params)); params.d = 8; params.steps = 4; - /* Default to level 6 since no compression level information is available */ - params.zParams.compressionLevel = 3; + /* Use default level since no compression level information is available */ + params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) params.zParams.notificationLevel = DEBUGLEVEL; #endif |