summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorW. Felix Handte <w@felixhandte.com>2019-08-06 19:24:51 (GMT)
committerW. Felix Handte <w@felixhandte.com>2019-08-06 22:50:33 (GMT)
commit918269a4e395d88364a517b889473922ac5521ec (patch)
treeffc36dc63f0d0ddeab7bba587cd3df6871f50dda
parentb5b9760c80d70aaa5805c460cb61faad31cbf234 (diff)
downloadlz4-918269a4e395d88364a517b889473922ac5521ec.zip
lz4-918269a4e395d88364a517b889473922ac5521ec.tar.gz
lz4-918269a4e395d88364a517b889473922ac5521ec.tar.bz2
Make Attaching an Empty Dict Behave the Same as Using it Directly
When using an empty dictionary, we bail out of loading or attaching it in ways that leave the working context in potentially slightly different states. In particular, in some paths, we will cause the currentOffset to be non-zero, while in others we would allow it to remain 0. This difference in behavior is perfectly harmless, but in some situations, it can produce slight differences in the compressed output. For sanity's sake, we currently try to maintain a strict correspondence between the behavior of the dict attachment and the dict loading paths. This patch restores them to behaving identically. This shouldn't have any negative side-effects, as far as I can tell. When writing the dict attachment code, I tried to preserve zeroed currentOffsets when possible, since they benchmarked as very slightly faster. However, the case of attaching an empty dictionary is probably rare enought that it's acceptable to minisculely degrade performance in that corner case.
-rw-r--r--lib/lz4.c29
1 files changed, 15 insertions, 14 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 0849505..147a8d6 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1408,18 +1408,18 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
* there are only valid offsets in the window, which allows an optimization
* in LZ4_compress_fast_continue() where it uses noDictIssue even when the
* dictionary isn't a full 64k. */
-
- if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
- base = dictEnd - 64 KB - dict->currentOffset;
- dict->dictionary = p;
- dict->dictSize = (U32)(dictEnd - p);
dict->currentOffset += 64 KB;
- dict->tableType = tableType;
if (dictSize < (int)HASH_UNIT) {
return 0;
}
+ if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+ base = dictEnd - dict->currentOffset;
+ dict->dictionary = p;
+ dict->dictSize = (U32)(dictEnd - p);
+ dict->tableType = tableType;
+
while (p <= dictEnd-HASH_UNIT) {
LZ4_putPosition(p, dict->hashTable, tableType, base);
p+=3;
@@ -1435,15 +1435,16 @@ void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dict
*/
LZ4_resetStream_fast(workingStream);
+ /* If the current offset is zero, we will never look in the
+ * external dictionary context, since there is no value a table
+ * entry can take that indicate a miss. In that case, we need
+ * to bump the offset to something non-zero.
+ */
+ if (workingStream->internal_donotuse.currentOffset == 0) {
+ workingStream->internal_donotuse.currentOffset = 64 KB;
+ }
+
if (dictionaryStream != NULL && dictionaryStream->internal_donotuse.dictSize > 0) {
- /* If the current offset is zero, we will never look in the
- * external dictionary context, since there is no value a table
- * entry can take that indicate a miss. In that case, we need
- * to bump the offset to something non-zero.
- */
- if (workingStream->internal_donotuse.currentOffset == 0) {
- workingStream->internal_donotuse.currentOffset = 64 KB;
- }
workingStream->internal_donotuse.dictCtx = &(dictionaryStream->internal_donotuse);
} else {
workingStream->internal_donotuse.dictCtx = NULL;