summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/lz4.c72
-rw-r--r--lib/lz4hc.c57
2 files changed, 68 insertions, 61 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index c902654..9d547e5 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -143,7 +143,7 @@
# endif /* _MSC_VER */
#endif /* LZ4_FORCE_INLINE */
-/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
* gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
* together with a simple 8-byte copy loop as a fall-back path.
* However, this optimization hurts the decompression speed by >30%,
@@ -158,11 +158,11 @@
* of LZ4_wildCopy8 does not affect the compression speed.
*/
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
-# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
-# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
+# define LZ4_FORCE_O2 __attribute__((optimize("O2")))
+# undef LZ4_FORCE_INLINE
+# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
#else
-# define LZ4_FORCE_O2_GCC_PPC64LE
-# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
+# define LZ4_FORCE_O2
#endif
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
@@ -310,7 +310,7 @@ typedef enum {
#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
#endif
-static unsigned LZ4_isLittleEndian(void)
+LZ4_FORCE_INLINE unsigned LZ4_isLittleEndian(void)
{
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
return one.c[0];
@@ -320,12 +320,12 @@ static unsigned LZ4_isLittleEndian(void)
#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
/* lie to the compiler about data alignment; use with caution */
-static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
-static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
-static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+LZ4_FORCE_INLINE U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+LZ4_FORCE_INLINE U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
-static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
@@ -333,36 +333,36 @@ static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
/* currently only defined for gcc and icc */
typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
-static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
+LZ4_FORCE_INLINE U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+LZ4_FORCE_INLINE U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
-static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
-static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
#else /* safe and portable access using memcpy() */
-static U16 LZ4_read16(const void* memPtr)
+LZ4_FORCE_INLINE U16 LZ4_read16(const void* memPtr)
{
U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
}
-static U32 LZ4_read32(const void* memPtr)
+LZ4_FORCE_INLINE U32 LZ4_read32(const void* memPtr)
{
U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
}
-static reg_t LZ4_read_ARCH(const void* memPtr)
+LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* memPtr)
{
reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
}
-static void LZ4_write16(void* memPtr, U16 value)
+LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value)
{
LZ4_memcpy(memPtr, &value, sizeof(value));
}
-static void LZ4_write32(void* memPtr, U32 value)
+LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value)
{
LZ4_memcpy(memPtr, &value, sizeof(value));
}
@@ -370,7 +370,7 @@ static void LZ4_write32(void* memPtr, U32 value)
#endif /* LZ4_FORCE_MEMORY_ACCESS */
-static U16 LZ4_readLE16(const void* memPtr)
+LZ4_FORCE_INLINE U16 LZ4_readLE16(const void* memPtr)
{
if (LZ4_isLittleEndian()) {
return LZ4_read16(memPtr);
@@ -380,7 +380,7 @@ static U16 LZ4_readLE16(const void* memPtr)
}
}
-static void LZ4_writeLE16(void* memPtr, U16 value)
+LZ4_FORCE_INLINE void LZ4_writeLE16(void* memPtr, U16 value)
{
if (LZ4_isLittleEndian()) {
LZ4_write16(memPtr, value);
@@ -392,7 +392,7 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
}
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE
+LZ4_FORCE_INLINE
void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
{
BYTE* d = (BYTE*)dstPtr;
@@ -421,7 +421,7 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
#if LZ4_FAST_DEC_LOOP
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
{
if (offset < 8) {
@@ -445,7 +445,7 @@ LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con
/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
* this version copies two times 16 bytes (instead of one time 32 bytes)
* because it must be compatible with offsets >= 16. */
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_FORCE_INLINE void
LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
{
BYTE* d = (BYTE*)dstPtr;
@@ -458,7 +458,7 @@ LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
/* LZ4_memcpy_using_offset() presumes :
* - dstEnd >= dstPtr + MINMATCH
* - there is at least 8 bytes available to write after dstEnd */
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_FORCE_INLINE void
LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
{
BYTE v[8];
@@ -2152,7 +2152,7 @@ LZ4_decompress_generic(
/*===== Instantiate the API decoding functions. =====*/
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
@@ -2160,7 +2160,7 @@ int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int
(BYTE*)dest, NULL, 0);
}
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
{
dstCapacity = MIN(targetOutputSize, dstCapacity);
@@ -2169,7 +2169,7 @@ int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize,
noDict, (BYTE*)dst, NULL, 0);
}
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
{
return LZ4_decompress_generic(source, dest, 0, originalSize,
@@ -2179,7 +2179,7 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
/*===== Instantiate a few more decoding cases, used more than once. =====*/
-LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
@@ -2195,7 +2195,7 @@ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int origin
return LZ4_decompress_fast(source, dest, originalSize);
}
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
size_t prefixSize)
{
@@ -2204,7 +2204,7 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i
(BYTE*)dest-prefixSize, NULL, 0);
}
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
int compressedSize, int maxOutputSize,
const void* dictStart, size_t dictSize)
@@ -2214,7 +2214,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
(BYTE*)dest, (const BYTE*)dictStart, dictSize);
}
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
const void* dictStart, size_t dictSize)
{
@@ -2303,7 +2303,7 @@ int LZ4_decoderRingBufferSize(int maxBlockSize)
If it's not possible, save the relevant part of decoded data into a safe buffer,
and indicate where it stands using LZ4_setStreamDecode()
*/
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
{
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -2343,7 +2343,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
return result;
}
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
{
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 29b6073..15bedec 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -159,7 +159,7 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
#endif
-static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
+LZ4_FORCE_INLINE U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
{
size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
if (bitsToRotate == 0) return pattern;
@@ -223,7 +223,7 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
* 4 byte MINMATCH would overflow.
* @returns true if the match index is okay.
*/
-static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
+LZ4_FORCE_INLINE int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
{
return ((U32)((dictLimit - 1) - matchIndex) >= 3);
}
@@ -460,83 +460,90 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl
* @return : 0 if ok,
* 1 if buffer issue detected */
LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
- const BYTE** ip,
- BYTE** op,
- const BYTE** anchor,
+ const BYTE** _ip,
+ BYTE** _op,
+ const BYTE** _anchor,
int matchLength,
const BYTE* const match,
limitedOutput_directive limit,
BYTE* oend)
{
+#define ip (*_ip)
+#define op (*_op)
+#define anchor (*_anchor)
+
size_t length;
- BYTE* const token = (*op)++;
+ BYTE* const token = op++;
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
static const BYTE* start = NULL;
static U32 totalCost = 0;
- U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
- U32 const ll = (U32)(*ip - *anchor);
+ U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
+ U32 const ll = (U32)(ip - anchor);
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
- if (start==NULL) start = *anchor; /* only works for single segment */
+ if (start==NULL) start = anchor; /* only works for single segment */
/* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
pos,
- (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
+ (U32)(ip - anchor), matchLength, (U32)(ip-match),
cost, totalCost);
totalCost += cost;
#endif
/* Encode Literal length */
- length = (size_t)(*ip - *anchor);
+ length = (size_t)(ip - anchor);
LZ4_STATIC_ASSERT(notLimited == 0);
/* Check output limit */
- if (limit && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
+ if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
- (int)length, (int)(oend-*op));
+ (int)length, (int)(oend - op));
return 1;
}
if (length >= RUN_MASK) {
size_t len = length - RUN_MASK;
*token = (RUN_MASK << ML_BITS);
- for(; len >= 255 ; len -= 255) *(*op)++ = 255;
- *(*op)++ = (BYTE)len;
+ for(; len >= 255 ; len -= 255) *op++ = 255;
+ *op++ = (BYTE)len;
} else {
*token = (BYTE)(length << ML_BITS);
}
/* Copy Literals */
- LZ4_wildCopy8(*op, *anchor, (*op) + length);
- *op += length;
+ LZ4_wildCopy8(op, anchor, op + length);
+ op += length;
/* Encode Offset */
- assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
- LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+ assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
+ LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
/* Encode MatchLength */
assert(matchLength >= MINMATCH);
length = (size_t)matchLength - MINMATCH;
- if (limit && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) {
+ if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
DEBUGLOG(6, "Not enough room to write match length");
return 1; /* Check output limit */
}
if (length >= ML_MASK) {
*token += ML_MASK;
length -= ML_MASK;
- for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
- if (length >= 255) { length -= 255; *(*op)++ = 255; }
- *(*op)++ = (BYTE)length;
+ for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
+ if (length >= 255) { length -= 255; *op++ = 255; }
+ *op++ = (BYTE)length;
} else {
*token += (BYTE)(length);
}
/* Prepare next loop */
- *ip += matchLength;
- *anchor = *ip;
+ ip += matchLength;
+ anchor = ip;
return 0;
}
+#undef ip
+#undef op
+#undef anchor
LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
LZ4HC_CCtx_internal* const ctx,