summaryrefslogtreecommitdiffstats
path: root/lib/lz4.c
diff options
context:
space:
mode:
authorYann Collet <cyan@fb.com>2018-01-14 05:09:02 (GMT)
committerYann Collet <cyan@fb.com>2018-01-14 05:09:02 (GMT)
commit151a50e16e558b9619b979ef1c0a525acc13ed92 (patch)
treede2eace172cbef23f0f12392487104aafeee5960 /lib/lz4.c
parent7628d9d15a399404d3ee4c0c07fcf0f6a2f39ede (diff)
parente0e29cf09e12aa8e98275a736e98fba7eed93b3f (diff)
downloadlz4-151a50e16e558b9619b979ef1c0a525acc13ed92.zip
lz4-151a50e16e558b9619b979ef1c0a525acc13ed92.tar.gz
lz4-151a50e16e558b9619b979ef1c0a525acc13ed92.tar.bz2
Merge branch 'master' into coverity_scan
Diffstat (limited to 'lib/lz4.c')
-rw-r--r--lib/lz4.c240
1 files changed, 171 insertions, 69 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index fc19493..213b085 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -1,6 +1,6 @@
/*
LZ4 - Fast LZ compression algorithm
- Copyright (C) 2011-2016, Yann Collet.
+ Copyright (C) 2011-2017, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@@ -37,12 +37,12 @@
* Tuning parameters
**************************************/
/*
- * HEAPMODE :
+ * LZ4_HEAPMODE :
* Select how default compression functions will allocate memory for their hash table,
* in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
*/
-#ifndef HEAPMODE
-# define HEAPMODE 0
+#ifndef LZ4_HEAPMODE
+# define LZ4_HEAPMODE 0
#endif
/*
@@ -63,16 +63,15 @@
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
* Method 2 : direct access. This method is portable but violate C standard.
- * It can generate buggy code on targets which generate assembly depending on alignment.
+ * It can generate buggy code on targets which assembly generation depends on alignment.
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
* See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
* Prefer these methods in priority order (0 > 1 > 2)
*/
-#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define LZ4_FORCE_MEMORY_ACCESS 2
-# elif defined(__INTEL_COMPILER) || \
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+# elif defined(__INTEL_COMPILER) || defined(__GNUC__)
# define LZ4_FORCE_MEMORY_ACCESS 1
# endif
#endif
@@ -86,6 +85,7 @@
#endif
+
/*-************************************
* Dependency
**************************************/
@@ -97,20 +97,49 @@
* Compiler Options
**************************************/
#ifdef _MSC_VER /* Visual Studio */
-# define FORCE_INLINE static __forceinline
# include <intrin.h>
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */
-#else
-# if defined(__GNUC__) || defined(__clang__)
-# define FORCE_INLINE static inline __attribute__((always_inline))
-# elif defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# define FORCE_INLINE static inline
-# else
-# define FORCE_INLINE static
-# endif
#endif /* _MSC_VER */
+#ifndef LZ4_FORCE_INLINE
+# ifdef _MSC_VER /* Visual Studio */
+# define LZ4_FORCE_INLINE static __forceinline
+# else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define LZ4_FORCE_INLINE static inline
+# endif
+# else
+# define LZ4_FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+# endif /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+ * Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
+# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
+# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
+#else
+# define LZ4_FORCE_O2_GCC_PPC64LE
+# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
+#endif
+
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
#else
@@ -247,7 +276,8 @@ static void LZ4_copy8(void* dst, const void* src)
}
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
-static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
{
BYTE* d = (BYTE*)dstPtr;
const BYTE* s = (const BYTE*)srcPtr;
@@ -281,15 +311,36 @@ static const int LZ4_minLength = (MFLIMIT+1);
/*-************************************
-* Common Utils
+* Error detection
**************************************/
-#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+# include <assert.h>
+#else
+# ifndef assert
+# define assert(condition) ((void)0)
+# endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+# include <stdio.h>
+static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
/*-************************************
* Common functions
**************************************/
-static unsigned LZ4_NbCommonBytes (register reg_t val)
+static unsigned LZ4_NbCommonBytes (reg_t val)
{
if (LZ4_isLittleEndian()) {
if (sizeof(val)==8) {
@@ -300,7 +351,14 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctzll((U64)val) >> 3);
# else
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+ 0, 3, 1, 3, 1, 4, 2, 7,
+ 0, 2, 3, 6, 1, 5, 3, 5,
+ 1, 3, 4, 4, 2, 5, 6, 7,
+ 7, 0, 1, 2, 3, 3, 4, 6,
+ 2, 6, 5, 5, 3, 4, 5, 6,
+ 7, 1, 2, 4, 6, 4, 4, 5,
+ 7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else /* 32 bits */ {
@@ -311,12 +369,15 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctz((U32)val) >> 3);
# else
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+ 3, 2, 2, 1, 3, 2, 0, 1,
+ 3, 3, 1, 2, 2, 2, 2, 0,
+ 3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else /* Big Endian CPU */ {
- if (sizeof(val)==8) {
+ if (sizeof(val)==8) { /* 64-bits */
# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
unsigned long r = 0;
_BitScanReverse64( &r, val );
@@ -324,8 +385,11 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_clzll((U64)val) >> 3);
# else
+ static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
+ Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+ Note that this code path is never triggered in 32-bits mode. */
unsigned r;
- if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+ if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
@@ -348,11 +412,20 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
}
#define STEPSIZE sizeof(reg_t)
-static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
{
const BYTE* const pStart = pIn;
- while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+ if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) {
+ pIn+=STEPSIZE; pMatch+=STEPSIZE;
+ } else {
+ return LZ4_NbCommonBytes(diff);
+ } }
+
+ while (likely(pIn < pInLimit-(STEPSIZE-1))) {
reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
pIn += LZ4_NbCommonBytes(diff);
@@ -391,6 +464,7 @@ typedef enum { full = 0, partial = 1 } earlyEnd_directive;
* Local Utils
**************************************/
int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
@@ -417,7 +491,7 @@ static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
}
-FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
{
if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
return LZ4_hash4(LZ4_read32(p), tableType);
@@ -433,7 +507,7 @@ static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableTy
}
}
-FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
{
U32 const h = LZ4_hashPosition(p, tableType);
LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
@@ -446,7 +520,7 @@ static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tab
{ const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */
}
-FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
{
U32 const h = LZ4_hashPosition(p, tableType);
return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
@@ -455,7 +529,7 @@ FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableTy
/** LZ4_compress_generic() :
inlined, to ensure branches are decided at compilation time */
-FORCE_INLINE int LZ4_compress_generic(
+LZ4_FORCE_INLINE int LZ4_compress_generic(
LZ4_stream_t_internal* const cctx,
const char* const source,
char* const dest,
@@ -597,7 +671,11 @@ _next_match:
*token += ML_MASK;
matchCode -= ML_MASK;
LZ4_write32(op, 0xFFFFFFFF);
- while (matchCode >= 4*255) op+=4, LZ4_write32(op, 0xFFFFFFFF), matchCode -= 4*255;
+ while (matchCode >= 4*255) {
+ op+=4;
+ LZ4_write32(op, 0xFFFFFFFF);
+ matchCode -= 4*255;
+ }
op += matchCode / 255;
*op++ = (BYTE)(matchCode % 255);
} else
@@ -677,7 +755,7 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int
int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
{
-#if (HEAPMODE)
+#if (LZ4_HEAPMODE)
void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
#else
LZ4_stream_t ctx;
@@ -686,7 +764,7 @@ int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutp
int const result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
-#if (HEAPMODE)
+#if (LZ4_HEAPMODE)
FREEMEM(ctxPtr);
#endif
return result;
@@ -890,7 +968,7 @@ static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src,
int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
{
-#if (HEAPMODE)
+#if (LZ4_HEAPMODE)
LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
#else
LZ4_stream_t ctxBody;
@@ -899,7 +977,7 @@ int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targe
int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
-#if (HEAPMODE)
+#if (LZ4_HEAPMODE)
FREEMEM(ctx);
#endif
return result;
@@ -921,11 +999,13 @@ LZ4_stream_t* LZ4_createStream(void)
void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
{
+ DEBUGLOG(4, "LZ4_resetStream");
MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
}
int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
{
+ if (!LZ4_stream) return 0; /* support free on NULL */
FREEMEM(LZ4_stream);
return (0);
}
@@ -1080,47 +1160,46 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
* Decompression functions
*******************************/
/*! LZ4_decompress_generic() :
- * This generic decompression function cover all use cases.
- * It shall be instantiated several times, using different sets of directives
- * Note that it is important this generic function is really inlined,
+ * This generic decompression function covers all use cases.
+ * It shall be instantiated several times, using different sets of directives.
+ * Note that it is important for performance that this function really get inlined,
* in order to remove useless branches during compilation optimization.
*/
-FORCE_INLINE int LZ4_decompress_generic(
- const char* const source,
- char* const dest,
- int inputSize,
- int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_INLINE int LZ4_decompress_generic(
+ const char* const src,
+ char* const dst,
+ int srcSize,
+ int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
int endOnInput, /* endOnOutputSize, endOnInputSize */
int partialDecoding, /* full, partial */
int targetOutputSize, /* only used if partialDecoding==partial */
int dict, /* noDict, withPrefix64k, usingExtDict */
- const BYTE* const lowPrefix, /* == dest when no prefix */
+ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
const BYTE* const dictStart, /* only if dict==usingExtDict */
const size_t dictSize /* note : = 0 if noDict */
)
{
- /* Local Variables */
- const BYTE* ip = (const BYTE*) source;
- const BYTE* const iend = ip + inputSize;
+ const BYTE* ip = (const BYTE*) src;
+ const BYTE* const iend = ip + srcSize;
- BYTE* op = (BYTE*) dest;
+ BYTE* op = (BYTE*) dst;
BYTE* const oend = op + outputSize;
BYTE* cpy;
BYTE* oexit = op + targetOutputSize;
- const BYTE* const lowLimit = lowPrefix - dictSize;
const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
- const unsigned dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
- const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+ const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+ const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
const int safeDecode = (endOnInput==endOnInputSize);
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
/* Special cases */
- if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */
- if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
+ if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */
+ if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
/* Main Loop : decode sequences */
@@ -1129,8 +1208,27 @@ FORCE_INLINE int LZ4_decompress_generic(
const BYTE* match;
size_t offset;
- /* get literal length */
unsigned const token = *ip++;
+
+ /* shortcut for common case :
+ * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes).
+ * this shortcut was tested on x86 and x64, where it improves decoding speed.
+ * it has not yet been benchmarked on ARM, Power, mips, etc. */
+ if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend)
+ & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend))
+ & ((token < (15<<ML_BITS)) & ((token & ML_MASK) != 15)) ) {
+ size_t const ll = token >> ML_BITS;
+ size_t const off = LZ4_readLE16(ip+ll);
+ const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */
+ if ((off >= 18) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) {
+ size_t const ml = (token & ML_MASK) + MINMATCH;
+ memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/;
+ memcpy(op, matchPtr, 18); op += ml;
+ continue;
+ }
+ }
+
+ /* decode literal length */
if ((length=(token>>ML_BITS)) == RUN_MASK) {
unsigned s;
do {
@@ -1164,7 +1262,7 @@ FORCE_INLINE int LZ4_decompress_generic(
/* get offset */
offset = LZ4_readLE16(ip); ip+=2;
match = op - offset;
- if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside buffers */
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
LZ4_write32(op, (U32)offset); /* costs ~1%; silence an msan warning when offset==0 */
/* get matchlength */
@@ -1208,14 +1306,13 @@ FORCE_INLINE int LZ4_decompress_generic(
/* copy match within block */
cpy = op + length;
if (unlikely(offset<8)) {
- const int dec64 = dec64table[offset];
op[0] = match[0];
op[1] = match[1];
op[2] = match[2];
op[3] = match[3];
- match += dec32table[offset];
+ match += inc32table[offset];
memcpy(op+4, match, 4);
- match -= dec64;
+ match -= dec64table[offset];
} else { LZ4_copy8(op, match); match+=8; }
op += 8;
@@ -1232,31 +1329,34 @@ FORCE_INLINE int LZ4_decompress_generic(
LZ4_copy8(op, match);
if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
}
- op=cpy; /* correction */
+ op = cpy; /* correction */
}
/* end of decoding */
if (endOnInput)
- return (int) (((char*)op)-dest); /* Nb of output bytes decoded */
+ return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
else
- return (int) (((const char*)ip)-source); /* Nb of input bytes read */
+ return (int) (((const char*)ip)-src); /* Nb of input bytes read */
/* Overflow error detected */
_output_error:
- return (int) (-(((const char*)ip)-source))-1;
+ return (int) (-(((const char*)ip)-src))-1;
}
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
}
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
}
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
{
return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
@@ -1265,11 +1365,6 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
/*===== streaming decompression functions =====*/
-/*
- * If you prefer dynamic allocation methods,
- * LZ4_createStreamDecode()
- * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
- */
LZ4_streamDecode_t* LZ4_createStreamDecode(void)
{
LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
@@ -1278,6 +1373,7 @@ LZ4_streamDecode_t* LZ4_createStreamDecode(void)
int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
{
+ if (!LZ4_stream) return 0; /* support free on NULL */
FREEMEM(LZ4_stream);
return 0;
}
@@ -1306,6 +1402,7 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
If it's not possible, save the relevant part of decoded data into a safe buffer,
and indicate where it stands using LZ4_setStreamDecode()
*/
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
{
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -1332,6 +1429,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
return result;
}
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
{
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -1366,7 +1464,8 @@ Advanced decoding functions :
the dictionary must be explicitly provided within parameters
*/
-FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
{
if (dictSize==0)
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
@@ -1378,17 +1477,20 @@ FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
}
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
{
return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
}
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
{
return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
}
/* debug function */
+LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);