summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile6
-rw-r--r--lib/lz4.c62
2 files changed, 53 insertions, 15 deletions
diff --git a/Makefile b/Makefile
index da485a1..76fc9a8 100644
--- a/Makefile
+++ b/Makefile
@@ -57,12 +57,12 @@ all: allmost manuals
.PHONY: allmost
allmost: lib lz4 examples
-.PHONY: lib lib-release
-lib lib-release:
+.PHONY: lib lib-release liblz4.a
+lib lib-release liblz4.a:
@$(MAKE) -C $(LZ4DIR) $@
.PHONY: lz4 lz4-release
-lz4 : lib
+lz4 : liblz4.a
lz4-release : lib-release
lz4 lz4-release :
@$(MAKE) -C $(PRGDIR) $@
diff --git a/lib/lz4.c b/lib/lz4.c
index e21822d..cc76eba 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -85,6 +85,17 @@
#endif
+/*
+ * register is ignored when the code built with a C++-17 compiler
+ * Remove the keyword when built with C++-17 to silent the warning
+ */
+#if defined(__cplusplus) && __cplusplus > 201402L
+# define REGISTER
+#else
+# define REGISTER register
+#endif
+
+
/*-************************************
* Dependency
**************************************/
@@ -339,7 +350,7 @@ static int g_debuglog_enable = 1;
/*-************************************
* Common functions
**************************************/
-static unsigned LZ4_NbCommonBytes (register reg_t val)
+static unsigned LZ4_NbCommonBytes (REGISTER reg_t val)
{
if (LZ4_isLittleEndian()) {
if (sizeof(val)==8) {
@@ -401,11 +412,20 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
}
#define STEPSIZE sizeof(reg_t)
-static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
{
const BYTE* const pStart = pIn;
- while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+ if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) {
+ pIn+=STEPSIZE; pMatch+=STEPSIZE;
+ } else {
+ return LZ4_NbCommonBytes(diff);
+ } }
+
+ while (likely(pIn < pInLimit-(STEPSIZE-1))) {
reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
pIn += LZ4_NbCommonBytes(diff);
@@ -1156,7 +1176,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
int partialDecoding, /* full, partial */
int targetOutputSize, /* only used if partialDecoding==partial */
int dict, /* noDict, withPrefix64k, usingExtDict */
- const BYTE* const lowPrefix, /* == dst when no prefix */
+ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
const BYTE* const dictStart, /* only if dict==usingExtDict */
const size_t dictSize /* note : = 0 if noDict */
)
@@ -1170,15 +1190,15 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
BYTE* oexit = op + targetOutputSize;
const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
- const unsigned dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
- const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+ const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+ const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
const int safeDecode = (endOnInput==endOnInputSize);
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
/* Special cases */
- if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */
+ if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */
if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
@@ -1188,8 +1208,27 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
const BYTE* match;
size_t offset;
- /* get literal length */
unsigned const token = *ip++;
+
+ /* shortcut for common case :
+ * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes).
+ * this shortcut was tested on x86 and x64, where it improves decoding speed.
+ * it has not yet been benchmarked on ARM, Power, mips, etc. */
+ if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend)
+ & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend))
+ & ((token < (15<<ML_BITS)) & ((token & ML_MASK) != 15)) ) {
+ size_t const ll = token >> ML_BITS;
+ size_t const off = LZ4_readLE16(ip+ll);
+ const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */
+ if ((off >= 18) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) {
+ size_t const ml = (token & ML_MASK) + MINMATCH;
+ memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/;
+ memcpy(op, matchPtr, 18); op += ml;
+ continue;
+ }
+ }
+
+ /* decode literal length */
if ((length=(token>>ML_BITS)) == RUN_MASK) {
unsigned s;
do {
@@ -1267,14 +1306,13 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
/* copy match within block */
cpy = op + length;
if (unlikely(offset<8)) {
- const int dec64 = dec64table[offset];
op[0] = match[0];
op[1] = match[1];
op[2] = match[2];
op[3] = match[3];
- match += dec32table[offset];
+ match += inc32table[offset];
memcpy(op+4, match, 4);
- match -= dec64;
+ match -= dec64table[offset];
} else { LZ4_copy8(op, match); match+=8; }
op += 8;
@@ -1291,7 +1329,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
LZ4_copy8(op, match);
if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
}
- op=cpy; /* correction */
+ op = cpy; /* correction */
}
/* end of decoding */