From e2a985f52444ad2e1438d2a71575185f31a08540 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 19 Oct 2014 17:41:42 +0100 Subject: minor refactoring of LZ4 HC, to prepare for external dictionaries --- lz4hc.c | 72 ++++++++++++++++++++++++++++++++++++++++++++--------------------- lz4hc.h | 7 +++---- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/lz4hc.c b/lz4hc.c index 2cf494e..5e204c6 100644 --- a/lz4hc.c +++ b/lz4hc.c @@ -253,8 +253,11 @@ typedef struct const BYTE* inputBuffer; const BYTE* base; const BYTE* end; + const BYTE* dictBase; + U32 dictLimit; U32 nextToUpdate; U32 compressionLevel; + U32 lowLimit; } LZ4HC_Data_Structure; @@ -348,6 +351,9 @@ FORCE_INLINE void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* base) hc4->base = base; hc4->inputBuffer = base; hc4->end = base; + hc4->dictBase = base; + hc4->dictLimit = 0; + hc4->lowLimit = 0; } @@ -374,31 +380,36 @@ FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) } -static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) +static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const p1Limit) { - const BYTE* p1t = p1; + const BYTE* const p1Start = p1; - while (p1tchainTable; U32* const HashTable = hc4->hashTable; const BYTE* const base = hc4->base; - int matchIndex; - const int idxLow = (ip-base) > 64 KB ? (ip - base) - 64 KB : 0; + const BYTE* const dictBase = hc4->dictBase; + const U32 dictLimit = hc4->dictLimit; + U32 matchIndex; + const U32 idxLow = (ip-base) > 64 KB ? (ip - base) - 64 KB : 0; const BYTE* match; int nbAttempts=maxNbAttempts; size_t ml=0; @@ -410,11 +421,11 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const while ((matchIndex>idxLow) && (nbAttempts)) { nbAttempts--; - match = base + matchIndex; - if (*(match+ml) == *(ip+ml)) - if (A32(match) == A32(ip)) + match = ((matchIndex ml) { ml = mlt; *matchpos = match; } } matchIndex -= chainTable[matchIndex & 0xFFFF]; @@ -474,6 +485,19 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( } +static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock) +{ + LZ4HC_Insert (ctxPtr, ctxPtr->end-3); // finish referencing dictionary content + // Use only one memory segment for dict, so any previous External Dict is lost at this stage + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); + ctxPtr->dictBase = ctxPtr->base; + ctxPtr->base = newBlock - ctxPtr->dictLimit; + ctxPtr->end = newBlock; + ctxPtr->nextToUpdate = ctxPtr->dictLimit; // reference table must skip to from beginning of block +} + + typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; FORCE_INLINE int LZ4HC_encodeSequence ( @@ -488,10 +512,12 @@ FORCE_INLINE int LZ4HC_encodeSequence ( int length; BYTE* token; + //printf("literal : %u -- match : %u \n", (U32)(*ip - *anchor), (U32)matchLength); // debug + /* Encode Literal length */ length = (int)(*ip - *anchor); token = (*op)++; - if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; /* Check output limit */ + if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<>8) > oend)) return 1; /* Check output limit */ + if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } else *token += (BYTE)(length); @@ -551,8 +577,8 @@ static int LZ4HC_compress_generic ( if (compressionLevel > MAX_COMPRESSION_LEVEL) compressionLevel = MAX_COMPRESSION_LEVEL; if (compressionLevel == 0) compressionLevel = LZ4HC_DEFAULT_COMPRESSIONLEVEL; maxNbAttempts = 1 << compressionLevel; - /* Ensure blocks follow each other */ - if (ip != ctx->end) return 0; + /* check if blocks follow each other */ + if (ip != ctx->end) LZ4HC_setExternalDict(ctx, ip); ctx->end += inputSize; ip++; @@ -747,7 +773,7 @@ int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, /***************************** Using external allocation *****************************/ -int LZ4_sizeofStateHC() { return sizeof(LZ4HC_Data_Structure); } +int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); } int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel) diff --git a/lz4hc.h b/lz4hc.h index 29a05f5..72979b5 100644 --- a/lz4hc.h +++ b/lz4hc.h @@ -95,18 +95,17 @@ int LZ4_sizeofStateHC(); Note that tables must be aligned for pointer (32 or 64 bits), otherwise compression will fail (return code 0). -The allocated memory can be provided to the compressions functions using 'void* state' parameter. +The allocated memory can be provided to the compression functions using 'void* state' parameter. LZ4_compress_withStateHC() and LZ4_compress_limitedOutput_withStateHC() are equivalent to previously described functions. -They just use the externally allocated memory area instead of allocating their own (on stack, or on heap). +They just use the externally allocated memory for state instead of allocating their own (on stack, or on heap). */ - /************************************** Experimental Streaming Functions **************************************/ -#define LZ4_STREAMHCSIZE_U32 65546 +#define LZ4_STREAMHCSIZE_U32 65548 #define LZ4_STREAMHCSIZE (LZ4_STREAMHCSIZE_U32 * sizeof(unsigned int)) typedef struct { unsigned int table[LZ4_STREAMHCSIZE_U32]; } LZ4_streamHC_t; -- cgit v0.12