summaryrefslogtreecommitdiffstats
path: root/Utilities/cmzstd/lib/compress/zstd_ldm.c
diff options
context:
space:
mode:
Diffstat (limited to 'Utilities/cmzstd/lib/compress/zstd_ldm.c')
-rw-r--r--Utilities/cmzstd/lib/compress/zstd_ldm.c44
1 files changed, 33 insertions, 11 deletions
diff --git a/Utilities/cmzstd/lib/compress/zstd_ldm.c b/Utilities/cmzstd/lib/compress/zstd_ldm.c
index 58eb2ff..8c47948 100644
--- a/Utilities/cmzstd/lib/compress/zstd_ldm.c
+++ b/Utilities/cmzstd/lib/compress/zstd_ldm.c
@@ -1,15 +1,16 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
#include "zstd_ldm.h"
-#include "debug.h"
+#include "../common/debug.h"
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
@@ -49,9 +50,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
{
size_t const ldmHSize = ((size_t)1) << params.hashLog;
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
- size_t const ldmBucketSize =
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0;
}
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
return rollingHash;
}
+void ZSTD_ldm_fillHashTable(
+ ldmState_t* state, const BYTE* ip,
+ const BYTE* iend, ldmParams_t const* params)
+{
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+ if ((size_t)(iend - ip) >= params->minMatchLength) {
+ U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
+ ZSTD_ldm_fillLdmHashTable(
+ state, startingHash, ip, iend - params->minMatchLength, state->window.base,
+ params->hashLog - params->bucketSizeLog,
+ *params);
+ }
+}
+
/** ZSTD_ldm_limitTableUpdate() :
*
@@ -429,7 +444,7 @@ size_t ZSTD_ldm_generateSequences(
*/
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
/* The input could be very large (in zstdmt), so it must be broken up into
- * chunks to enforce the maximmum distance and handle overflow correction.
+ * chunks to enforce the maximum distance and handle overflow correction.
*/
assert(sequences->pos <= sequences->size);
assert(sequences->size <= sequences->capacity);
@@ -447,8 +462,10 @@ size_t ZSTD_ldm_generateSequences(
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
- &ldmState->window, /* cycleLog */ 0, maxDist, src);
+ &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+ /* invalidate dictionaries on overflow correction */
+ ldmState->loadedDictEnd = 0;
}
/* 2. We enforce the maximum offset allowed.
*
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
* TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
+ *
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+ * that any offset used is valid at the END of the sequence, since it may
+ * be split into two sequences. This condition holds when using
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+ * against maxDist directly, we'll have to carefully handle that case.
*/
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
ldmState, sequences, params, chunkStart, chunkSize);
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
if (sequence.offset == 0)
break;
- assert(sequence.offset <= (1U << cParams->windowLog));
assert(ip + sequence.litLength + sequence.matchLength <= iend);
/* Fill tables for block compressor */
ZSTD_ldm_limitTableUpdate(ms, ip);
ZSTD_ldm_fillFastTables(ms, ip);
/* Run the block compressor */
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
{
size_t const newLitLength =
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
@@ -583,7 +605,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[i] = rep[i-1];
rep[0] = sequence.offset;
/* Store the sequence */
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE,
sequence.matchLength - MINMATCH);
ip += sequence.matchLength;