summaryrefslogtreecommitdiffstats
path: root/Utilities/cmzstd/lib/dictBuilder
diff options
context:
space:
mode:
Diffstat (limited to 'Utilities/cmzstd/lib/dictBuilder')
-rw-r--r--Utilities/cmzstd/lib/dictBuilder/cover.c82
-rw-r--r--Utilities/cmzstd/lib/dictBuilder/cover.h13
-rw-r--r--Utilities/cmzstd/lib/dictBuilder/divsufsort.c2
-rw-r--r--Utilities/cmzstd/lib/dictBuilder/fastcover.c62
-rw-r--r--Utilities/cmzstd/lib/dictBuilder/zdict.c59
-rw-r--r--Utilities/cmzstd/lib/dictBuilder/zdict.h305
6 files changed, 115 insertions, 408 deletions
diff --git a/Utilities/cmzstd/lib/dictBuilder/cover.c b/Utilities/cmzstd/lib/dictBuilder/cover.c
index da54ef1..8364444 100644
--- a/Utilities/cmzstd/lib/dictBuilder/cover.c
+++ b/Utilities/cmzstd/lib/dictBuilder/cover.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -26,47 +26,57 @@
#include <string.h> /* memset */
#include <time.h> /* clock */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+# define ZDICT_STATIC_LINKING_ONLY
+#endif
+
#include "../common/mem.h" /* read */
#include "../common/pool.h"
#include "../common/threading.h"
-#include "cover.h"
#include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
+#include "cover.h"
/*-*************************************
* Constants
***************************************/
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
-#define DEFAULT_SPLITPOINT 1.0
+#define COVER_DEFAULT_SPLITPOINT 1.0
/*-*************************************
* Console display
***************************************/
+#ifndef LOCALDISPLAYLEVEL
static int g_displayLevel = 2;
+#endif
+#undef DISPLAY
#define DISPLAY(...) \
{ \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
+#undef LOCALDISPLAYLEVEL
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
if (displayLevel >= l) { \
DISPLAY(__VA_ARGS__); \
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
+#undef DISPLAYLEVEL
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+#ifndef LOCALDISPLAYUPDATE
+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+#endif
+#undef LOCALDISPLAYUPDATE
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
if (displayLevel >= l) { \
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
}
+#undef DISPLAYUPDATE
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;
/*-*************************************
* Hash table
@@ -120,9 +130,9 @@ static int COVER_map_init(COVER_map_t *map, U32 size) {
/**
* Internal hash function
*/
-static const U32 prime4bytes = 2654435761U;
+static const U32 COVER_prime4bytes = 2654435761U;
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
- return (key * prime4bytes) >> (32 - map->sizeLog);
+ return (key * COVER_prime4bytes) >> (32 - map->sizeLog);
}
/**
@@ -215,7 +225,7 @@ typedef struct {
} COVER_ctx_t;
/* We need a global context for qsort... */
-static COVER_ctx_t *g_ctx = NULL;
+static COVER_ctx_t *g_coverCtx = NULL;
/*-*************************************
* Helper functions
@@ -258,11 +268,11 @@ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
/**
* Same as COVER_cmp() except ties are broken by pointer value
- * NOTE: g_ctx must be set to call this function. A global is required because
+ * NOTE: g_coverCtx must be set to call this function. A global is required because
* qsort doesn't take an opaque pointer.
*/
-static int COVER_strict_cmp(const void *lp, const void *rp) {
- int result = COVER_cmp(g_ctx, lp, rp);
+static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) {
+ int result = COVER_cmp(g_coverCtx, lp, rp);
if (result == 0) {
result = lp < rp ? -1 : 1;
}
@@ -271,8 +281,8 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
/**
* Faster version for d <= 8.
*/
-static int COVER_strict_cmp8(const void *lp, const void *rp) {
- int result = COVER_cmp8(g_ctx, lp, rp);
+static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) {
+ int result = COVER_cmp8(g_coverCtx, lp, rp);
if (result == 0) {
result = lp < rp ? -1 : 1;
}
@@ -603,7 +613,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
/* qsort doesn't take an opaque pointer, so pass as a global.
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
*/
- g_ctx = ctx;
+ g_coverCtx = ctx;
#if defined(__OpenBSD__)
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
@@ -946,7 +956,7 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
free(selection.dictContent);
}
-COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
@@ -954,8 +964,8 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
size_t largestCompressed = 0;
BYTE* customDictContentEnd = customDictContent + dictContentSize;
- BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
- BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
if (!largestDictbuffer || !candidateDictBuffer) {
@@ -967,7 +977,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
/* Initial dictionary size and compressed size */
memcpy(largestDictbuffer, customDictContent, dictContentSize);
dictContentSize = ZDICT_finalizeDictionary(
- largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
+ largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
if (ZDICT_isError(dictContentSize)) {
@@ -1001,7 +1011,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
while (dictContentSize < largestDict) {
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
dictContentSize = ZDICT_finalizeDictionary(
- candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
+ candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
if (ZDICT_isError(dictContentSize)) {
@@ -1053,18 +1063,19 @@ typedef struct COVER_tryParameters_data_s {
* This function is thread safe if zstd is compiled with multithreaded support.
* It takes its parameters as an *OWNING* opaque pointer to support threading.
*/
-static void COVER_tryParameters(void *opaque) {
+static void COVER_tryParameters(void *opaque)
+{
/* Save parameters as local variables */
- COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
+ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque;
const COVER_ctx_t *const ctx = data->ctx;
const ZDICT_cover_params_t parameters = data->parameters;
size_t dictBufferCapacity = data->dictBufferCapacity;
size_t totalCompressedSize = ERROR(GENERIC);
/* Allocate space for hash table, dict, and freqs */
COVER_map_t activeDmers;
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+ BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
- U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+ U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
goto _cleanup;
@@ -1079,7 +1090,7 @@ static void COVER_tryParameters(void *opaque) {
{
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
dictBufferCapacity, parameters);
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
totalCompressedSize);
@@ -1094,19 +1105,18 @@ _cleanup:
free(data);
COVER_map_destroy(&activeDmers);
COVER_dictSelectionFree(selection);
- if (freqs) {
- free(freqs);
- }
+ free(freqs);
}
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_cover_params_t *parameters) {
+ void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer,
+ const size_t* samplesSizes, unsigned nbSamples,
+ ZDICT_cover_params_t* parameters)
+{
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const double splitPoint =
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
+ parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
diff --git a/Utilities/cmzstd/lib/dictBuilder/cover.h b/Utilities/cmzstd/lib/dictBuilder/cover.h
index f2aa0e3..1aacddd 100644
--- a/Utilities/cmzstd/lib/dictBuilder/cover.h
+++ b/Utilities/cmzstd/lib/dictBuilder/cover.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,10 @@
* You may select, at your option, one of the above-listed licenses.
*/
+#ifndef ZDICT_STATIC_LINKING_ONLY
+# define ZDICT_STATIC_LINKING_ONLY
+#endif
+
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
@@ -16,10 +20,7 @@
#include "../common/pool.h"
#include "../common/threading.h"
#include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
/**
* COVER_best_t is used for two purposes:
@@ -152,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
* smallest dictionary within a specified regression of the compressed size
* from the largest dictionary.
*/
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
diff --git a/Utilities/cmzstd/lib/dictBuilder/divsufsort.c b/Utilities/cmzstd/lib/dictBuilder/divsufsort.c
index ead9220..a2870fb 100644
--- a/Utilities/cmzstd/lib/dictBuilder/divsufsort.c
+++ b/Utilities/cmzstd/lib/dictBuilder/divsufsort.c
@@ -1576,7 +1576,7 @@ note:
/* Construct the inverse suffix array of type B* suffixes using trsort. */
trsort(ISAb, SA, m, 1);
- /* Set the sorted order of tyoe B* suffixes. */
+ /* Set the sorted order of type B* suffixes. */
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
if(0 <= i) {
diff --git a/Utilities/cmzstd/lib/dictBuilder/fastcover.c b/Utilities/cmzstd/lib/dictBuilder/fastcover.c
index 485c333..ed789f9 100644
--- a/Utilities/cmzstd/lib/dictBuilder/fastcover.c
+++ b/Utilities/cmzstd/lib/dictBuilder/fastcover.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -16,15 +16,17 @@
#include <string.h> /* memset */
#include <time.h> /* clock */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+# define ZDICT_STATIC_LINKING_ONLY
+#endif
+
#include "../common/mem.h" /* read */
#include "../common/pool.h"
#include "../common/threading.h"
-#include "cover.h"
#include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
+#include "../zdict.h"
+#include "cover.h"
/*-*************************************
@@ -33,7 +35,7 @@
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
#define FASTCOVER_MAX_F 31
#define FASTCOVER_MAX_ACCEL 10
-#define DEFAULT_SPLITPOINT 0.75
+#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
#define DEFAULT_F 20
#define DEFAULT_ACCEL 1
@@ -41,50 +43,50 @@
/*-*************************************
* Console display
***************************************/
+#ifndef LOCALDISPLAYLEVEL
static int g_displayLevel = 2;
+#endif
+#undef DISPLAY
#define DISPLAY(...) \
{ \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
+#undef LOCALDISPLAYLEVEL
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
if (displayLevel >= l) { \
DISPLAY(__VA_ARGS__); \
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
+#undef DISPLAYLEVEL
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+#ifndef LOCALDISPLAYUPDATE
+static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+#endif
+#undef LOCALDISPLAYUPDATE
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
if (displayLevel >= l) { \
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
}
+#undef DISPLAYUPDATE
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;
/*-*************************************
* Hash Functions
***************************************/
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
-
-
/**
- * Hash the d-byte value pointed to by p and mod 2^f
+ * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
*/
-static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
+static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
if (d == 6) {
- return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
+ return ZSTD_hash6Ptr(p, f);
}
- return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
+ return ZSTD_hash8Ptr(p, f);
}
@@ -461,20 +463,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
* This function is thread safe if zstd is compiled with multithreaded support.
* It takes its parameters as an *OWNING* opaque pointer to support threading.
*/
-static void FASTCOVER_tryParameters(void *opaque)
+static void FASTCOVER_tryParameters(void* opaque)
{
/* Save parameters as local variables */
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
+ FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
const FASTCOVER_ctx_t *const ctx = data->ctx;
const ZDICT_cover_params_t parameters = data->parameters;
size_t dictBufferCapacity = data->dictBufferCapacity;
size_t totalCompressedSize = ERROR(GENERIC);
/* Initialize array to keep track of frequency of dmer within activeSegment */
- U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
+ U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
/* Allocate space for hash table, dict, and freqs */
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
- U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
+ U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
if (!segmentFreqs || !dict || !freqs) {
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
goto _cleanup;
@@ -486,7 +488,7 @@ static void FASTCOVER_tryParameters(void *opaque)
parameters, segmentFreqs);
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
totalCompressedSize);
@@ -617,7 +619,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const double splitPoint =
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
+ parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
diff --git a/Utilities/cmzstd/lib/dictBuilder/zdict.c b/Utilities/cmzstd/lib/dictBuilder/zdict.c
index 6d0b042..459cbe4 100644
--- a/Utilities/cmzstd/lib/dictBuilder/zdict.c
+++ b/Utilities/cmzstd/lib/dictBuilder/zdict.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -23,9 +23,13 @@
/* Unix Large Files support (>4GB) */
#define _FILE_OFFSET_BITS 64
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
+# ifndef _LARGEFILE_SOURCE
# define _LARGEFILE_SOURCE
+# endif
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
+# ifndef _LARGEFILE64_SOURCE
# define _LARGEFILE64_SOURCE
+# endif
#endif
@@ -37,18 +41,19 @@
#include <stdio.h> /* fprintf, fopen, ftello64 */
#include <time.h> /* clock */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+# define ZDICT_STATIC_LINKING_ONLY
+#endif
+#define HUF_STATIC_LINKING_ONLY
+
#include "../common/mem.h" /* read */
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
-#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
#include "../common/zstd_internal.h" /* includes zstd.h */
#include "../common/xxhash.h" /* XXH64 */
-#include "divsufsort.h"
-#ifndef ZDICT_STATIC_LINKING_ONLY
-# define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
+#include "../zdict.h"
+#include "divsufsort.h"
/*-*************************************
@@ -62,14 +67,15 @@
#define NOISELENGTH 32
-static const int g_compressionLevel_default = 3;
static const U32 g_selectivity_default = 9;
/*-*************************************
* Console display
***************************************/
+#undef DISPLAY
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
+#undef DISPLAYLEVEL
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
@@ -105,20 +111,17 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
size_t headerSize;
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
- { unsigned offcodeMaxValue = MaxOff;
- ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
+ { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
- short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short));
- if (!bs || !wksp || !offcodeNCount) {
+ if (!bs || !wksp) {
headerSize = ERROR(memory_allocation);
} else {
ZSTD_reset_compressedBlockState(bs);
- headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize);
+ headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
}
free(bs);
free(wksp);
- free(offcodeNCount);
}
return headerSize;
@@ -532,6 +535,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
clock_t displayClock = 0;
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
+# undef DISPLAYUPDATE
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
if (ZDICT_clockSpan(displayClock) > refreshRate) \
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
@@ -706,7 +710,7 @@ static void ZDICT_flatLit(unsigned* countLit)
#define OFFCODE_MAX 30 /* only applicable to first block */
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
- unsigned compressionLevel,
+ int compressionLevel,
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
const void* dictBuffer, size_t dictBufferSize,
unsigned notificationLevel)
@@ -741,7 +745,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
memset(repOffset, 0, sizeof(repOffset));
repOffset[1] = repOffset[4] = repOffset[8] = 1;
memset(bestRepOffset, 0, sizeof(bestRepOffset));
- if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
+ if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
@@ -786,7 +790,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* note : the result of this phase should be used to better appreciate the impact on statistics */
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
- errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
+ errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
if (FSE_isError(errorCode)) {
eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
@@ -795,7 +799,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
Offlog = (U32)errorCode;
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
- errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
+ errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
if (FSE_isError(errorCode)) {
eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
@@ -804,7 +808,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
mlLog = (U32)errorCode;
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
- errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
+ errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
if (FSE_isError(errorCode)) {
eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
@@ -893,7 +897,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
size_t hSize;
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
BYTE header[HBUFFSIZE];
- int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
+ int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
U32 const notificationLevel = params.notificationLevel;
/* check conditions */
@@ -939,7 +943,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
{
- int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
+ int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
U32 const notificationLevel = params.notificationLevel;
size_t hSize = 8;
@@ -968,16 +972,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
return MIN(dictBufferCapacity, hSize+dictContentSize);
}
-/* Hidden declaration for dbio.c */
-size_t ZDICT_trainFromBuffer_unsafe_legacy(
- void* dictBuffer, size_t maxDictSize,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_legacy_params_t params);
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
-* Warning : `samplesBuffer` must be followed by noisy guard band.
+* Warning : `samplesBuffer` must be followed by noisy guard band !!!
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
*/
-size_t ZDICT_trainFromBuffer_unsafe_legacy(
+static size_t ZDICT_trainFromBuffer_unsafe_legacy(
void* dictBuffer, size_t maxDictSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_legacy_params_t params)
@@ -1114,8 +1113,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
memset(&params, 0, sizeof(params));
params.d = 8;
params.steps = 4;
- /* Default to level 6 since no compression level information is available */
- params.zParams.compressionLevel = 3;
+ /* Use default level since no compression level information is available */
+ params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
params.zParams.notificationLevel = DEBUGLEVEL;
#endif
diff --git a/Utilities/cmzstd/lib/dictBuilder/zdict.h b/Utilities/cmzstd/lib/dictBuilder/zdict.h
deleted file mode 100644
index ff2e77f..0000000
--- a/Utilities/cmzstd/lib/dictBuilder/zdict.h
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef DICTBUILDER_H_001
-#define DICTBUILDER_H_001
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-/*====== Dependencies ======*/
-#include <stddef.h> /* size_t */
-
-
-/* ===== ZDICTLIB_API : control library symbols visibility ===== */
-#ifndef ZDICTLIB_VISIBILITY
-# if defined(__GNUC__) && (__GNUC__ >= 4)
-# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
-# else
-# define ZDICTLIB_VISIBILITY
-# endif
-#endif
-#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
-#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#else
-# define ZDICTLIB_API ZDICTLIB_VISIBILITY
-#endif
-
-
-/*! ZDICT_trainFromBuffer():
- * Train a dictionary from an array of samples.
- * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
- * f=20, and accel=1.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * Note: Dictionary training will fail if there are not enough samples to construct a
- * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
- * If dictionary training fails, you should use zstd without a dictionary, as the dictionary
- * would've been ineffective anyways. If you believe your samples would benefit from a dictionary
- * please open an issue with details, and we can look into it.
- * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer,
- const size_t* samplesSizes, unsigned nbSamples);
-
-typedef struct {
- int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
- unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
- unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */
-} ZDICT_params_t;
-
-/*! ZDICT_finalizeDictionary():
- * Given a custom content as a basis for dictionary, and a set of samples,
- * finalize dictionary by adding headers and statistics according to the zstd
- * dictionary format.
- *
- * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each
- * sample in order. The samples are used to construct the statistics, so they
- * should be representative of what you will compress with this dictionary.
- *
- * The compression level can be set in `parameters`. You should pass the
- * compression level you expect to use in production. The statistics for each
- * compression level differ, so tuning the dictionary for the compression level
- * can help quite a bit.
- *
- * You can set an explicit dictionary ID in `parameters`, or allow us to pick
- * a random dictionary ID for you, but we can't guarantee no collisions.
- *
- * The dstDictBuffer and the dictContent may overlap, and the content will be
- * appended to the end of the header. If the header + the content doesn't fit in
- * maxDictSize the beginning of the content is truncated to make room, since it
- * is presumed that the most profitable content is at the end of the dictionary,
- * since that is the cheapest to reference.
- *
- * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
- * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
- *
- * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
- * or an error code, which can be tested by ZDICT_isError().
- * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
- * instructed to, using notificationLevel>0.
- * NOTE: This function currently may fail in several edge cases including:
- * * Not enough samples
- * * Samples are uncompressible
- * * Samples are all exactly the same
- */
-ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
- const void* dictContent, size_t dictContentSize,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_params_t parameters);
-
-
-/*====== Helper functions ======*/
-ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
-ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
-ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
-ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
-
-
-
-#ifdef ZDICT_STATIC_LINKING_ONLY
-
-/* ====================================================================================
- * The definitions in this section are considered experimental.
- * They should never be used with a dynamic library, as they may change in the future.
- * They are provided for advanced usages.
- * Use them only in association with static linking.
- * ==================================================================================== */
-
-#define ZDICT_CONTENTSIZE_MIN 128
-#define ZDICT_DICTSIZE_MIN 256
-
-/*! ZDICT_cover_params_t:
- * k and d are the only required parameters.
- * For others, value 0 means default.
- */
-typedef struct {
- unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
- unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
- unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
- unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
- double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
- unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
- unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
- ZDICT_params_t zParams;
-} ZDICT_cover_params_t;
-
-typedef struct {
- unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
- unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
- unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
- unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
- unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
- double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
- unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
- unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
- unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
-
- ZDICT_params_t zParams;
-} ZDICT_fastCover_params_t;
-
-/*! ZDICT_trainFromBuffer_cover():
- * Train a dictionary from an array of samples using the COVER algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * See ZDICT_trainFromBuffer() for details on failure modes.
- * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
- void *dictBuffer, size_t dictBufferCapacity,
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_cover_params_t parameters);
-
-/*! ZDICT_optimizeTrainFromBuffer_cover():
- * The same requirements as above hold for all the parameters except `parameters`.
- * This function tries many parameter combinations and picks the best parameters.
- * `*parameters` is filled with the best parameters found,
- * dictionary constructed with those parameters is stored in `dictBuffer`.
- *
- * All of the parameters d, k, steps are optional.
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
- * if steps is zero it defaults to its default value.
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
- *
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * On success `*parameters` contains the parameters selected.
- * See ZDICT_trainFromBuffer() for details on failure modes.
- * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
- */
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
- void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_cover_params_t* parameters);
-
-/*! ZDICT_trainFromBuffer_fastCover():
- * Train a dictionary from an array of samples using a modified version of COVER algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * d and k are required.
- * All other parameters are optional, will use default values if not provided
- * The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * See ZDICT_trainFromBuffer() for details on failure modes.
- * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
- size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_fastCover_params_t parameters);
-
-/*! ZDICT_optimizeTrainFromBuffer_fastCover():
- * The same requirements as above hold for all the parameters except `parameters`.
- * This function tries many parameter combinations (specifically, k and d combinations)
- * and picks the best parameters. `*parameters` is filled with the best parameters found,
- * dictionary constructed with those parameters is stored in `dictBuffer`.
- * All of the parameters d, k, steps, f, and accel are optional.
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
- * if steps is zero it defaults to its default value.
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
- * If f is zero, default value of 20 is used.
- * If accel is zero, default value of 1 is used.
- *
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * On success `*parameters` contains the parameters selected.
- * See ZDICT_trainFromBuffer() for details on failure modes.
- * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
- */
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
- size_t dictBufferCapacity, const void* samplesBuffer,
- const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_fastCover_params_t* parameters);
-
-typedef struct {
- unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
- ZDICT_params_t zParams;
-} ZDICT_legacy_params_t;
-
-/*! ZDICT_trainFromBuffer_legacy():
- * Train a dictionary from an array of samples.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * `parameters` is optional and can be provided with values set to 0 to mean "default".
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * See ZDICT_trainFromBuffer() for details on failure modes.
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
- void *dictBuffer, size_t dictBufferCapacity,
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_legacy_params_t parameters);
-
-/* Deprecation warnings */
-/* It is generally possible to disable deprecation warnings from compiler,
- for example with -Wno-deprecated-declarations for gcc
- or _CRT_SECURE_NO_WARNINGS in Visual.
- Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
-#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
-#else
-# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
-# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
-# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
-# elif (ZDICT_GCC_VERSION >= 301)
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
-# elif defined(_MSC_VER)
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
-# else
-# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API
-# endif
-#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
-
-ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
-size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
-
-
-#endif /* ZDICT_STATIC_LINKING_ONLY */
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* DICTBUILDER_H_001 */