summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorYann Collet <Cyan4973@users.noreply.github.com>2019-07-17 20:25:41 (GMT)
committerGitHub <noreply@github.com>2019-07-17 20:25:41 (GMT)
commit19b099986aeccc12bb46ad207fe8de5b36bdb7bc (patch)
treecbc38b9af41d1b2c21c56d019f0daf0f76b1093b /lib
parent7654a5a6d287e15e4c42b7d7386fbbd1b857f9e2 (diff)
parent13a2d9e34ffc4170720ce417c73e396d0ac1471a (diff)
downloadlz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.zip
lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.tar.gz
lz4-19b099986aeccc12bb46ad207fe8de5b36bdb7bc.tar.bz2
Merge pull request #756 from terrelln/destSize
[LZ4_compress_destSize + multi-blocks streaming] Fix rare data corruption bug
Diffstat (limited to 'lib')
-rw-r--r--lib/lz4.c34
1 files changed, 32 insertions, 2 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 5b03e3d..74a9247 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -648,6 +648,18 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
return LZ4_hash4(LZ4_read32(p), tableType);
}
+static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+ switch (tableType)
+ {
+ default: /* fallthrough */
+ case clearedTable: { /* illegal! */ assert(0); return; }
+ case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+ }
+}
+
static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
{
switch (tableType)
@@ -848,6 +860,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
for ( ; ; ) {
const BYTE* match;
BYTE* token;
+ const BYTE* filledIp;
/* Find a match */
if (tableType == byPtr) {
@@ -934,6 +947,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
}
/* Catch up */
+ filledIp = ip;
while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
/* Encode Literals */
@@ -1013,12 +1027,26 @@ _next_match:
}
if ((outputDirective) && /* Check output buffer overflow */
- (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
+ (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
if (outputDirective == fillOutput) {
/* Match description too long : reduce it */
- U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
+ U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
ip -= matchCode - newMatchCode;
+ assert(newMatchCode < matchCode);
matchCode = newMatchCode;
+ if (unlikely(ip < filledIp)) {
+ /* We have already filled up to filledIp so if ip ends up less than filledIp
+ * we have positions in the hash table beyond the current position. This is
+ * a problem if we reuse the hash table. So we have to remove these positions
+ * from the hash table.
+ */
+ const BYTE* ptr;
+ DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+ for (ptr = ip + 1; ptr <= filledIp; ++ptr) {
+ U32 const h = LZ4_hashPosition(ptr, tableType);
+ LZ4_clearHash(h, cctx->hashTable, tableType);
+ }
+ }
} else {
assert(outputDirective == limitedOutput);
return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
@@ -1038,6 +1066,8 @@ _next_match:
} else
*token += (BYTE)(matchCode);
}
+ /* Ensure we have enough space for the last literals. */
+ assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
anchor = ip;