From 002ec60f0feadc07a25a6f18a7b2b4ace3c1b718 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 30 Mar 2015 15:57:26 +0100 Subject: restored lz4hc compression ratio --- lib/lz4hc.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index a03c511..c7a94a0 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -37,7 +37,7 @@ You can contact the author at : /************************************** Tuning Parameter **************************************/ -static const int LZ4HC_compressionLevel_default = 8; +static const int LZ4HC_compressionLevel_default = 9; /************************************** @@ -206,9 +206,9 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, /* I FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( LZ4HC_Data_Structure* hc4, - const BYTE* ip, - const BYTE* iLowLimit, - const BYTE* iHighLimit, + const BYTE* const ip, + const BYTE* const iLowLimit, + const BYTE* const iHighLimit, int longest, const BYTE** matchpos, const BYTE** startpos, @@ -218,9 +218,9 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( U32* const HashTable = hc4->hashTable; const BYTE* const base = hc4->base; const U32 dictLimit = hc4->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); const BYTE* const dictBase = hc4->dictBase; - const BYTE* match; U32 matchIndex; int nbAttempts = maxNbAttempts; int delta = (int)(ip-iLowLimit); @@ -235,37 +235,41 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( nbAttempts--; if (matchIndex >= dictLimit) { - match = base + matchIndex; - if (*(iLowLimit + longest) == *(match - delta + longest)) - if (LZ4_read32(match) == LZ4_read32(ip)) + const BYTE* matchPtr = base + matchIndex; + if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) + if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { - const BYTE* startt = ip; - const BYTE* tmpMatch = match; - const BYTE* const matchEnd = ip + MINMATCH + LZ4_count(ip+MINMATCH, match+MINMATCH, iHighLimit); + int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + int back = 0; + + while ((ip+back>iLowLimit) + && (matchPtr+back > lowPrefixPtr) + && (ip[back-1] == matchPtr[back-1])) + back--; - while ((startt>iLowLimit) && (tmpMatch > iLowLimit) && (startt[-1] == tmpMatch[-1])) {startt--; tmpMatch--;} + mlt -= back; - if ((matchEnd-startt) > longest) + if (mlt > longest) { - longest = (int)(matchEnd-startt); - *matchpos = tmpMatch; - *startpos = startt; + longest = (int)mlt; + *matchpos = matchPtr+back; + *startpos = ip+back; } } } else { - match = dictBase + matchIndex; - if (LZ4_read32(match) == LZ4_read32(ip)) + const BYTE* matchPtr = dictBase + matchIndex; + if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { size_t mlt; int back=0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); - while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--; + while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--; mlt -= back; if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } } -- cgit v0.12 From eeb8bea34c2df279307eaed922f3a89cc420316c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 30 Mar 2015 16:36:57 +0100 Subject: Updated comments on LZ4F_getFrameInfo() --- lib/lz4frame.c | 8 ++++---- lib/lz4frame.h | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/lz4frame.c b/lib/lz4frame.c index 5683eee..5f69c95 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -918,10 +918,10 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t decompressionCont if (dctxPtr->dStage == dstage_getHeader) { - LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, srcBuffer, *srcSizePtr); - if (LZ4F_isError(errorCode)) return errorCode; - *srcSizePtr = errorCode; /* nb Bytes consumed */ - *frameInfoPtr = dctxPtr->frameInfo; + size_t frameHeaderSize = LZ4F_decodeHeader(dctxPtr, srcBuffer, *srcSizePtr); + if (LZ4F_isError(frameHeaderSize)) return frameHeaderSize; + *srcSizePtr = frameHeaderSize; /* nb Bytes consumed */ + *frameInfoPtr = dctxPtr->frameInfo; /* copy into */ dctxPtr->srcExpect = NULL; return 4; /* nextSrcSizeHint : 4 == block header size */ } diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 01a756a..9d12c7d 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -214,8 +214,12 @@ size_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t dctx, * This function decodes frame header information, such as blockSize. * It is optional : you could start by calling directly LZ4F_decompress() instead. * The objective is to extract header information without starting decompression, typically for allocation purposes. + * The function will work only if srcBuffer points at the beginning of the frame, + * and *srcSizePtr is large enough to decode the whole header (typically, between 7 & 15 bytes). + * The result is copied into an LZ4F_frameInfo_t structure, which is pointed by frameInfoPtr, and must be already allocated. * LZ4F_getFrameInfo() can also be used *after* starting decompression, on a valid LZ4F_decompressionContext_t. * The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value). + * It is basically the frame header size. * You are expected to resume decompression from where it stopped (srcBuffer + *srcSizePtr) * The function result is an hint of how many srcSize bytes LZ4F_decompress() expects for next call, * or an error code which can be tested using LZ4F_isError(). -- cgit v0.12