diff options
author | Cristian Adam <cristian.adam@qt.io> | 2020-09-23 16:49:33 (GMT) |
---|---|---|
committer | Cristian Adam <cristian.adam@qt.io> | 2020-09-23 16:49:33 (GMT) |
commit | 0b3e9259dd4f36000ded4240dcd39f60d5b600d7 (patch) | |
tree | 54ef4f3f66eb0971af4e56a9529887df0c7fed5b /Utilities/cmzstd/lib/dictBuilder/cover.h | |
parent | 145730c74630f09e0cb7a0167059ec7ee470d245 (diff) | |
parent | 4676ad8c32d279c159895372f2bd15769197bf09 (diff) | |
download | CMake-0b3e9259dd4f36000ded4240dcd39f60d5b600d7.zip CMake-0b3e9259dd4f36000ded4240dcd39f60d5b600d7.tar.gz CMake-0b3e9259dd4f36000ded4240dcd39f60d5b600d7.tar.bz2 |
Merge branch 'upstream-zstd'
# By zstd upstream
* upstream-zstd:
zstd 2020-05-21 (b706286a)
Diffstat (limited to 'Utilities/cmzstd/lib/dictBuilder/cover.h')
-rw-r--r-- | Utilities/cmzstd/lib/dictBuilder/cover.h | 88 |
1 files changed, 81 insertions, 7 deletions
diff --git a/Utilities/cmzstd/lib/dictBuilder/cover.h b/Utilities/cmzstd/lib/dictBuilder/cover.h index 82e2e1c..f2aa0e3 100644 --- a/Utilities/cmzstd/lib/dictBuilder/cover.h +++ b/Utilities/cmzstd/lib/dictBuilder/cover.h @@ -1,11 +1,21 @@ +/* + * Copyright (c) 2017-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #include <stdio.h> /* fprintf */ #include <stdlib.h> /* malloc, free, qsort */ #include <string.h> /* memset */ #include <time.h> /* clock */ -#include "mem.h" /* read */ -#include "pool.h" -#include "threading.h" -#include "zstd_internal.h" /* includes zstd.h */ +#include "../common/mem.h" /* read */ +#include "../common/pool.h" +#include "../common/threading.h" +#include "../common/zstd_internal.h" /* includes zstd.h */ #ifndef ZDICT_STATIC_LINKING_ONLY #define ZDICT_STATIC_LINKING_ONLY #endif @@ -39,6 +49,44 @@ typedef struct { } COVER_segment_t; /** + *Number of epochs and size of each epoch. + */ +typedef struct { + U32 num; + U32 size; +} COVER_epoch_info_t; + +/** + * Struct used for the dictionary selection function. + */ +typedef struct COVER_dictSelection { + BYTE* dictContent; + size_t dictSize; + size_t totalCompressedSize; +} COVER_dictSelection_t; + +/** + * Computes the number of epochs and the size of each epoch. + * We will make sure that each epoch gets at least 10 * k bytes. + * + * The COVER algorithms divide the data up into epochs of equal size and + * select one segment from each epoch. + * + * @param maxDictSize The maximum allowed dictionary size. + * @param nbDmers The number of dmers we are training on. + * @param k The parameter k (segment size). + * @param passes The target number of passes over the dmer corpus. + * More passes means a better dictionary. + */ +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, + U32 k, U32 passes); + +/** + * Warns the user when their corpus is too small. + */ +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); + +/** * Checks total compressed size of a dictionary */ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, @@ -78,6 +126,32 @@ void COVER_best_start(COVER_best_t *best); * Decrements liveJobs and signals any waiting threads if liveJobs == 0. * If this dictionary is the best so far save it and its parameters. */ -void COVER_best_finish(COVER_best_t *best, size_t compressedSize, - ZDICT_cover_params_t parameters, void *dict, - size_t dictSize); +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection); +/** + * Error function for COVER_selectDict function. Checks if the return + * value is an error. + */ +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); + + /** + * Error function for COVER_selectDict function. Returns a struct where + * return.totalCompressedSize is a ZSTD error. + */ +COVER_dictSelection_t COVER_dictSelectionError(size_t error); + +/** + * Always call after selectDict is called to free up used memory from + * newly created dictionary. + */ +void COVER_dictSelectionFree(COVER_dictSelection_t selection); + +/** + * Called to finalize the dictionary and select one based on whether or not + * the shrink-dict flag was enabled. If enabled the dictionary used is the + * smallest dictionary within a specified regression of the compressed size + * from the largest dictionary. + */ + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); |