diff options
author | W. Felix Handte <w@felixhandte.com> | 2018-02-12 17:18:24 (GMT) |
---|---|---|
committer | W. Felix Handte <w@felixhandte.com> | 2018-03-12 18:58:43 (GMT) |
commit | 73cc39327e3abc28a360323c4f26c3c34d87ff07 (patch) | |
tree | 0289a085b99724966c11aefcdcc03ec3498c4d45 /lib | |
parent | 62cb52b3410bdfd696669910de010ab03666edc8 (diff) | |
download | lz4-73cc39327e3abc28a360323c4f26c3c34d87ff07.zip lz4-73cc39327e3abc28a360323c4f26c3c34d87ff07.tar.gz lz4-73cc39327e3abc28a360323c4f26c3c34d87ff07.tar.bz2 |
Lookup Matches in Separate Dictionary Context
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lz4.c | 70 | ||||
-rw-r--r-- | lib/lz4.h | 14 |
2 files changed, 65 insertions, 19 deletions
@@ -448,7 +448,7 @@ static const U32 LZ4_skipTrigger = 6; /* Increase this value ==> compression ru typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive; typedef enum { unusedTable = 0, byPtr = 1, byU32 = 2, byU16 = 3 } tableType_t; -typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingExtDictCtx } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; @@ -533,7 +533,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( const int maxOutputSize, const limitedOutput_directive outputLimited, const tableType_t tableType, - const dict_directive dict, + const dict_directive dictDirective, const dictIssue_directive dictIssue, const U32 acceleration) { @@ -553,15 +553,27 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( !resetTable) ? cctx->currentOffset : 1; const BYTE* base = (const BYTE*) source - currentOffset; const BYTE* lowLimit; - const BYTE* const lowRefLimit = ip - cctx->dictSize; - const BYTE* const dictionary = cctx->dictionary; - const BYTE* const dictEnd = dictionary + cctx->dictSize; - const ptrdiff_t dictDelta = dictEnd - (const BYTE*)source; + + const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; + const BYTE* const dictionary = + (dictDirective == usingExtDictCtx ? dictCtx : cctx)->dictionary; + const U32 dictSize = + (dictDirective == usingExtDictCtx ? dictCtx : cctx)->dictSize; + + const BYTE* const lowRefLimit = (const BYTE*) source - dictSize; + const BYTE* const dictEnd = dictionary + dictSize; const BYTE* anchor = (const BYTE*) source; const BYTE* const iend = ip + inputSize; const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; const BYTE* const matchlimit = iend - LASTLITERALS; + /* the dictCtx currentOffset is indexed on the start of the dictionary, + * while a dictionary in the current context precedes the currentOffset */ + const BYTE* dictBase = dictDirective == usingExtDictCtx ? + (const BYTE*) source - dictCtx->currentOffset : + (const BYTE*) source - dictSize - currentOffset; + const ptrdiff_t dictDelta = dictionary ? dictEnd - (const BYTE*) source : 0; + BYTE* op = (BYTE*) dest; BYTE* const olimit = op + maxOutputSize; @@ -574,17 +586,19 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( /* Init conditions */ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported inputSize, too large (or negative) */ - switch(dict) + switch(dictDirective) { case noDict: default: lowLimit = (const BYTE*)source; break; case withPrefix64k: - lowLimit = (const BYTE*)source - cctx->dictSize; + lowLimit = (const BYTE*)source - dictSize; break; case usingExtDict: - base = (const BYTE*)source - cctx->currentOffset; + lowLimit = (const BYTE*)source; + break; + case usingExtDictCtx: lowLimit = (const BYTE*)source; break; } @@ -622,8 +636,19 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( assert(ip < mflimitPlusOne); match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); - if (dict==usingExtDict) { + if (dictDirective == usingExtDictCtx) { if (match < (const BYTE*)source) { + /* there was no match, try the dictionary */ + /* TODO: use precalc-ed hash? */ + match = LZ4_getPosition(ip, dictCtx->hashTable, byU32, dictBase); + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective==usingExtDict) { + if (match < (const BYTE*)source && dictionary != NULL) { refDelta = dictDelta; lowLimit = dictionary; } else { @@ -667,7 +692,7 @@ _next_match: /* Encode MatchLength */ { unsigned matchCode; - if ((dict==usingExtDict) && (lowLimit==dictionary)) { + if ((dictDirective==usingExtDict || dictDirective==usingExtDictCtx) && (dictionary != NULL) && (lowLimit==dictionary)) { const BYTE* limit; match += refDelta; limit = ip + (dictEnd-match); @@ -712,8 +737,19 @@ _next_match: /* Test next position */ match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); - if (dict==usingExtDict) { + if (dictDirective == usingExtDictCtx) { if (match < (const BYTE*)source) { + /* there was no match, try the dictionary */ + /* TODO: use precalc-ed hash? */ + match = LZ4_getPosition(ip, dictCtx->hashTable, byU32, dictBase); + refDelta = dictDelta; + lowLimit = dictionary; + } else { + refDelta = 0; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective==usingExtDict) { + if (match < (const BYTE*)source && dictionary != NULL) { refDelta = dictDelta; lowLimit = dictionary; } else { @@ -751,8 +787,15 @@ _last_literals: retval = (((char*)op)-dest); _clean_up: + if (dictDirective == usingExtDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } cctx->currentOffset += (U32)inputSize; - cctx->dictSize += (U32)inputSize; cctx->tableType = tableType; /* End */ @@ -773,6 +816,7 @@ int LZ4_compress_fast_safeExtState(void* state, const char* source, char* dest, if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; ctx->dictionary = NULL; ctx->dictSize = 0; + ctx->dictCtx = NULL; if (maxOutputSize >= LZ4_compressBound(inputSize)) { if (inputSize < LZ4_64Klimit) @@ -365,15 +365,16 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) #include <stdint.h> -typedef struct { +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { uint32_t hashTable[LZ4_HASH_SIZE_U32]; uint32_t currentOffset; uint16_t initCheck; uint16_t tableType; const uint8_t* dictionary; - uint8_t* bufferStart; /* obsolete, used for slideInputBuffer */ + const LZ4_stream_t_internal* dictCtx; uint32_t dictSize; -} LZ4_stream_t_internal; +}; typedef struct { const uint8_t* externalDict; @@ -384,15 +385,16 @@ typedef struct { #else -typedef struct { +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { unsigned int hashTable[LZ4_HASH_SIZE_U32]; unsigned int currentOffset; unsigned short initCheck; unsigned short tableType; const unsigned char* dictionary; - unsigned char* bufferStart; /* obsolete, used for slideInputBuffer */ + const LZ4_stream_t_internal* dictCtx; unsigned int dictSize; -} LZ4_stream_t_internal; +}; typedef struct { const unsigned char* externalDict; |