summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/lz4.h14
-rw-r--r--lib/lz4hc.c94
-rw-r--r--programs/util.h2
-rw-r--r--tests/fullbench.c34
-rw-r--r--visual/.gitignore2
5 files changed, 79 insertions, 67 deletions
diff --git a/lib/lz4.h b/lib/lz4.h
index db6353c..2745260 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -317,15 +317,19 @@ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const
* If less than 64KB of data has been decoded all the data must be present.
*
* Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
- * - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
- * In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
- * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- * maxBlockSize is implementation dependent. It's the maximum size of any single block.
+ * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * maxBlockSize is the maximum size of any single block. It is implementation dependent, and can have any value (presumed > 16 bytes).
* In which case, encoding and decoding buffers do not need to be synchronized,
* and encoding ring buffer can have any size, including small ones ( < 64 KB).
- * - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ * - Decompression buffer size is _at least_ 64 KB + 8 bytes + maxBlockSize.
* In which case, encoding and decoding buffers do not need to be synchronized,
* and encoding ring buffer can have any size, including larger than decoding buffer.
+ * - Decompression buffer size is exactly the same as compression buffer size,
+ * and follows exactly same update rule (block boundaries at same positions).
+ * If the decoding function is provided with the exact decompressed size of each block,
+ * then decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
+ * If the decoding function only knows the compressed size,
+ * then buffer size must be a minimum of 64 KB + 8 bytes + maxBlockSize.
* Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
* and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
*/
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 948d66d..bb1b1a6 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -138,6 +138,7 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
{
int back = 0;
int const min = (int)MAX(iMin - ip, mMin - match);
+ assert(min <= 0);
assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
while ( (back > min)
@@ -222,9 +223,9 @@ LZ4HC_InsertAndGetWiderMatch (
const U32 dictLimit = hc4->dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const U32 ipIndex = (U32)(ip - base);
- const U32 lowLimit = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
+ const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
const BYTE* const dictBase = hc4->dictBase;
- int const delta = (int)(ip-iLowLimit);
+ int const lookBackLength = (int)(ip-iLowLimit);
int nbAttempts = maxNbAttempts;
U32 const pattern = LZ4_read32(ip);
U32 matchIndex;
@@ -236,10 +237,10 @@ LZ4HC_InsertAndGetWiderMatch (
/* First Match */
LZ4HC_Insert(hc4, ip);
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
- DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
- matchIndex, lowLimit);
+ DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+ matchIndex, lowestMatchIndex);
- while ((matchIndex>=lowLimit) && (nbAttempts)) {
+ while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
nbAttempts--;
assert(matchIndex < ipIndex);
@@ -247,34 +248,35 @@ LZ4HC_InsertAndGetWiderMatch (
/* do nothing */
} else if (matchIndex >= dictLimit) {
const BYTE* const matchPtr = base + matchIndex;
+ assert(matchPtr >= lowPrefixPtr);
+ assert(matchPtr < ip);
assert(longest >= 1);
- if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - delta + longest - 1)) {
+ if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
if (LZ4_read32(matchPtr) == pattern) {
int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
- int const back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+ int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
mlt -= back;
-
if (mlt > longest) {
longest = mlt;
*matchpos = matchPtr+back;
*startpos = ip+back;
- } }
- }
+ } } }
} else { /* matchIndex < dictLimit */
const BYTE* const matchPtr = dictBase + matchIndex;
if (LZ4_read32(matchPtr) == pattern) {
+ const BYTE* const dictStart = dictBase + hc4->lowLimit;
int mlt;
int back = 0;
const BYTE* vLimit = ip + (dictLimit - matchIndex);
if (vLimit > iHighLimit) vLimit = iHighLimit;
mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
- mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
- back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictBase+lowLimit) : 0;
+ mlt += LZ4_count(ip+mlt, lowPrefixPtr, iHighLimit);
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
mlt -= back;
if (mlt > longest) {
longest = mlt;
- *matchpos = base + matchIndex + back;
+ *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */
*startpos = ip + back;
} } }
@@ -306,13 +308,13 @@ LZ4HC_InsertAndGetWiderMatch (
matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
}
} } } }
- } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
+ } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
- if (dict == usingDictCtx && nbAttempts && ipIndex - lowLimit < MAX_DISTANCE) {
+ if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) {
size_t const dictEndOffset = dictCtx->end - dictCtx->base;
assert(dictEndOffset <= 1 GB);
dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
- matchIndex = dictMatchIndex + lowLimit - (U32)dictEndOffset;
+ matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
while (ipIndex - matchIndex <= MAX_DISTANCE && nbAttempts--) {
const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
@@ -322,7 +324,7 @@ LZ4HC_InsertAndGetWiderMatch (
const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
if (vLimit > iHighLimit) vLimit = iHighLimit;
mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
- back = delta ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
mlt -= back;
if (mlt > longest) {
longest = mlt;
@@ -459,14 +461,14 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
BYTE* op = (BYTE*) dest;
BYTE* oend = op + maxOutputSize;
- int ml, ml2, ml3, ml0;
+ int ml0, ml, ml2, ml3;
+ const BYTE* start0;
+ const BYTE* ref0;
const BYTE* ref = NULL;
const BYTE* start2 = NULL;
const BYTE* ref2 = NULL;
const BYTE* start3 = NULL;
const BYTE* ref3 = NULL;
- const BYTE* start0;
- const BYTE* ref0;
/* init */
*srcSizePtr = 0;
@@ -479,31 +481,27 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
if (ml<MINMATCH) { ip++; continue; }
/* saved, in case we would skip too much */
- start0 = ip;
- ref0 = ref;
- ml0 = ml;
+ start0 = ip; ref0 = ref; ml0 = ml;
_Search2:
- if (ip+ml <= mflimit)
+ if (ip+ml <= mflimit) {
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
maxNbAttempts, patternAnalysis, dict, favorCompressionRatio);
- else
+ } else {
ml2 = ml;
+ }
- if (ml2 == ml) { /* No better match */
+ if (ml2 == ml) { /* No better match => encode ML1 */
optr = op;
if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
continue;
}
- if (start0 < ip) {
- if (start2 < ip + ml0) { /* empirical */
- ip = start0;
- ref = ref0;
- ml = ml0;
- }
- }
+ if (start0 < ip) { /* first match was skipped at least once */
+ if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */
+ ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */
+ } }
/* Here, start0==ip */
if ((start2 - ip) < 3) { /* First Match too small : removed */
@@ -531,14 +529,15 @@ _Search3:
}
/* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
- if (start2 + ml2 <= mflimit)
+ if (start2 + ml2 <= mflimit) {
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
maxNbAttempts, patternAnalysis, dict, favorCompressionRatio);
- else
+ } else {
ml3 = ml2;
+ }
- if (ml3 == ml2) { /* No better match : 2 sequences to encode */
+ if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */
/* ip & ref are known; Now for ml */
if (start2 < ip+ml) ml = (int)(start2 - ip);
/* Now, encode 2 sequences */
@@ -583,11 +582,12 @@ _Search3:
}
/*
- * OK, now we have 3 ascending matches; let's write at least the first one
- * ip & ref are known; Now for ml
+ * OK, now we have 3 ascending matches;
+ * let's write the first one ML1.
+ * ip & ref are known; Now decide ml.
*/
if (start2 < ip+ml) {
- if ((start2 - ip) < (int)ML_MASK) {
+ if ((start2 - ip) < OPTIMAL_ML) {
int correction;
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
@@ -604,14 +604,13 @@ _Search3:
optr = op;
if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
- ip = start2;
- ref = ref2;
- ml = ml2;
+ /* ML2 becomes ML1 */
+ ip = start2; ref = ref2; ml = ml2;
- start2 = start3;
- ref2 = ref3;
- ml2 = ml3;
+ /* ML3 becomes ML2 */
+ start2 = start3; ref2 = ref3; ml2 = ml3;
+ /* let's find a new ML3 */
goto _Search3;
}
@@ -705,8 +704,6 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
ctx->end += *srcSizePtr;
if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
- assert(cLevel >= 0);
- assert(cLevel <= LZ4HC_CLEVEL_MAX);
{ cParams_t const cParam = clTable[cLevel];
HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
if (cParam.strat == lz4hc)
@@ -905,7 +902,8 @@ void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC
static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
{
DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
- if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
+ if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
+ LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
/* Only one memory segment for extDict, so any previous extDict is lost at this stage */
ctxPtr->lowLimit = ctxPtr->dictLimit;
diff --git a/programs/util.h b/programs/util.h
index ef6ca77..d74db0d 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -194,7 +194,7 @@ extern "C" {
return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
}
-#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2))
+#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || (defined(__GLIBC__) && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2) ) )
#include <time.h>
typedef struct timespec UTIL_time_t;
diff --git a/tests/fullbench.c b/tests/fullbench.c
index ee2966f..c06e230 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -267,13 +267,20 @@ static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int
return outSize;
}
-static int local_LZ4_decompress_fast_usingDict(const char* in, char* out, int inSize, int outSize)
+static int local_LZ4_decompress_fast_usingDict_prefix(const char* in, char* out, int inSize, int outSize)
{
(void)inSize;
LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65536);
return outSize;
}
+static int local_LZ4_decompress_fast_usingExtDict(const char* in, char* out, int inSize, int outSize)
+{
+ (void)inSize;
+ LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65535);
+ return outSize;
+}
+
static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int inSize, int outSize)
{
(void)inSize;
@@ -460,7 +467,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
double averageTime;
clock_t clockTime;
- PROGRESS("%1i- %-28.28s :%9i ->\r", loopNb, compressorName, (int)benchedSize);
+ PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, compressorName, (int)benchedSize);
{ size_t i; for (i=0; i<benchedSize; i++) compressed_buff[i]=(char)i; } /* warming up memory */
nb_loops = 0;
@@ -471,7 +478,8 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
if (initFunction!=NULL) initFunction();
for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
chunkP[chunkNb].compressedSize = compressionFunction(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize);
- if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1);
+ if (chunkP[chunkNb].compressedSize==0)
+ DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1);
}
nb_loops++;
}
@@ -482,13 +490,13 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
if (averageTime < bestTime) bestTime = averageTime;
cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].compressedSize;
ratio = (double)cSize/(double)benchedSize*100.;
- PROGRESS("%1i- %-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
+ PROGRESS("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
}
if (ratio<100.)
- DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
+ DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
else
- DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000);
+ DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000);
}
/* Prepare layout for decompression */
@@ -509,11 +517,12 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
}
for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
chunkP[chunkNb].compressedSize = LZ4_compress_default(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize, maxCompressedChunkSize);
- if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1);
+ if (chunkP[chunkNb].compressedSize==0)
+ DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1);
}
/* Decompression Algorithms */
- for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && (g_decompressionTest); dAlgNb++) {
+ for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && g_decompressionTest; dAlgNb++) {
const char* dName;
int (*decompressionFunction)(const char*, char*, int, int);
double bestTime = 100000000.;
@@ -524,7 +533,8 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
{
case 0: DISPLAY("Decompression functions : \n"); continue;
case 1: decompressionFunction = local_LZ4_decompress_fast; dName = "LZ4_decompress_fast"; break;
- case 3: decompressionFunction = local_LZ4_decompress_fast_usingDict; dName = "LZ4_decompress_fast_usingDict"; break;
+ case 2: decompressionFunction = local_LZ4_decompress_fast_usingDict_prefix; dName = "LZ4_decompress_fast_usingDict(prefix)"; break;
+ case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break;
case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break;
case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break;
case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break;
@@ -555,7 +565,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
clock_t clockTime;
U32 crcDecoded;
- PROGRESS("%1i- %-29.29s :%10i ->\r", loopNb, dName, (int)benchedSize);
+ PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, dName, (int)benchedSize);
nb_loops = 0;
clockTime = clock();
@@ -574,14 +584,14 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
averageTime = (double)clockTime / nb_loops / CLOCKS_PER_SEC;
if (averageTime < bestTime) bestTime = averageTime;
- PROGRESS("%1i- %-29.29s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
+ PROGRESS("%2i-%-34.34s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
/* CRC Checking */
crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); }
}
- DISPLAY("%2i-%-29.29s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
+ DISPLAY("%2i-%-34.34s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
}
}
free(orig_buff);
diff --git a/visual/.gitignore b/visual/.gitignore
index dea92fc..276f8f5 100644
--- a/visual/.gitignore
+++ b/visual/.gitignore
@@ -6,5 +6,5 @@
*.sdf
*.suo
*.user
-
+ver*/
VS2010/bin/