diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lz4.c | 72 | ||||
-rw-r--r-- | lib/lz4hc.c | 57 |
2 files changed, 68 insertions, 61 deletions
@@ -143,7 +143,7 @@ # endif /* _MSC_VER */ #endif /* LZ4_FORCE_INLINE */ -/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, * together with a simple 8-byte copy loop as a fall-back path. * However, this optimization hurts the decompression speed by >30%, @@ -158,11 +158,11 @@ * of LZ4_wildCopy8 does not affect the compression speed. */ #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) -# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2"))) -# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) #else -# define LZ4_FORCE_O2_GCC_PPC64LE -# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static +# define LZ4_FORCE_O2 #endif #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) @@ -310,7 +310,7 @@ typedef enum { #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) #endif -static unsigned LZ4_isLittleEndian(void) +LZ4_FORCE_INLINE unsigned LZ4_isLittleEndian(void) { const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ return one.c[0]; @@ -320,12 +320,12 @@ static unsigned LZ4_isLittleEndian(void) #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) /* lie to the compiler about data alignment; use with caution */ -static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } -static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } +LZ4_FORCE_INLINE U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +LZ4_FORCE_INLINE U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } -static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) @@ -333,36 +333,36 @@ static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } /* currently only defined for gcc and icc */ typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; -static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } +LZ4_FORCE_INLINE U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +LZ4_FORCE_INLINE U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } -static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } #else /* safe and portable access using memcpy() */ -static U16 LZ4_read16(const void* memPtr) +LZ4_FORCE_INLINE U16 LZ4_read16(const void* memPtr) { U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } -static U32 LZ4_read32(const void* memPtr) +LZ4_FORCE_INLINE U32 LZ4_read32(const void* memPtr) { U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } -static reg_t LZ4_read_ARCH(const void* memPtr) +LZ4_FORCE_INLINE reg_t LZ4_read_ARCH(const void* memPtr) { reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } -static void LZ4_write16(void* memPtr, U16 value) +LZ4_FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { LZ4_memcpy(memPtr, &value, sizeof(value)); } -static void LZ4_write32(void* memPtr, U32 value) +LZ4_FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { LZ4_memcpy(memPtr, &value, sizeof(value)); } @@ -370,7 +370,7 @@ static void LZ4_write32(void* memPtr, U32 value) #endif /* LZ4_FORCE_MEMORY_ACCESS */ -static U16 LZ4_readLE16(const void* memPtr) +LZ4_FORCE_INLINE U16 LZ4_readLE16(const void* memPtr) { if (LZ4_isLittleEndian()) { return LZ4_read16(memPtr); @@ -380,7 +380,7 @@ static U16 LZ4_readLE16(const void* memPtr) } } -static void LZ4_writeLE16(void* memPtr, U16 value) +LZ4_FORCE_INLINE void LZ4_writeLE16(void* memPtr, U16 value) { if (LZ4_isLittleEndian()) { LZ4_write16(memPtr, value); @@ -392,7 +392,7 @@ static void LZ4_writeLE16(void* memPtr, U16 value) } /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE +LZ4_FORCE_INLINE void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; @@ -421,7 +421,7 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; #if LZ4_FAST_DEC_LOOP -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { if (offset < 8) { @@ -445,7 +445,7 @@ LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd * this version copies two times 16 bytes (instead of one time 32 bytes) * because it must be compatible with offsets >= 16. */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; @@ -458,7 +458,7 @@ LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) /* LZ4_memcpy_using_offset() presumes : * - dstEnd >= dstPtr + MINMATCH * - there is at least 8 bytes available to write after dstEnd */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { BYTE v[8]; @@ -2152,7 +2152,7 @@ LZ4_decompress_generic( /*===== Instantiate the API decoding functions. =====*/ -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, @@ -2160,7 +2160,7 @@ int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int (BYTE*)dest, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) { dstCapacity = MIN(targetOutputSize, dstCapacity); @@ -2169,7 +2169,7 @@ int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, noDict, (BYTE*)dst, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, @@ -2179,7 +2179,7 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize) /*===== Instantiate a few more decoding cases, used more than once. =====*/ -LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */ +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, @@ -2195,7 +2195,7 @@ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int origin return LZ4_decompress_fast(source, dest, originalSize); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, size_t prefixSize) { @@ -2204,7 +2204,7 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i (BYTE*)dest-prefixSize, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const void* dictStart, size_t dictSize) @@ -2214,7 +2214,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, const void* dictStart, size_t dictSize) { @@ -2303,7 +2303,7 @@ int LZ4_decoderRingBufferSize(int maxBlockSize) If it's not possible, save the relevant part of decoded data into a safe buffer, and indicate where it stands using LZ4_setStreamDecode() */ -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; @@ -2343,7 +2343,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch return result; } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 29b6073..15bedec 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -159,7 +159,7 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, #endif -static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) +LZ4_FORCE_INLINE U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) { size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; if (bitsToRotate == 0) return pattern; @@ -223,7 +223,7 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) * 4 byte MINMATCH would overflow. * @returns true if the match index is okay. */ -static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) +LZ4_FORCE_INLINE int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) { return ((U32)((dictLimit - 1) - matchIndex) >= 3); } @@ -460,83 +460,90 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl * @return : 0 if ok, * 1 if buffer issue detected */ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( - const BYTE** ip, - BYTE** op, - const BYTE** anchor, + const BYTE** _ip, + BYTE** _op, + const BYTE** _anchor, int matchLength, const BYTE* const match, limitedOutput_directive limit, BYTE* oend) { +#define ip (*_ip) +#define op (*_op) +#define anchor (*_anchor) + size_t length; - BYTE* const token = (*op)++; + BYTE* const token = op++; #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) static const BYTE* start = NULL; static U32 totalCost = 0; - U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start); - U32 const ll = (U32)(*ip - *anchor); + U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); + U32 const ll = (U32)(ip - anchor); U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; U32 const cost = 1 + llAdd + ll + 2 + mlAdd; - if (start==NULL) start = *anchor; /* only works for single segment */ + if (start==NULL) start = anchor; /* only works for single segment */ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u", pos, - (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), + (U32)(ip - anchor), matchLength, (U32)(ip-match), cost, totalCost); totalCost += cost; #endif /* Encode Literal length */ - length = (size_t)(*ip - *anchor); + length = (size_t)(ip - anchor); LZ4_STATIC_ASSERT(notLimited == 0); /* Check output limit */ - if (limit && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { + if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", - (int)length, (int)(oend-*op)); + (int)length, (int)(oend - op)); return 1; } if (length >= RUN_MASK) { size_t len = length - RUN_MASK; *token = (RUN_MASK << ML_BITS); - for(; len >= 255 ; len -= 255) *(*op)++ = 255; - *(*op)++ = (BYTE)len; + for(; len >= 255 ; len -= 255) *op++ = 255; + *op++ = (BYTE)len; } else { *token = (BYTE)(length << ML_BITS); } /* Copy Literals */ - LZ4_wildCopy8(*op, *anchor, (*op) + length); - *op += length; + LZ4_wildCopy8(op, anchor, op + length); + op += length; /* Encode Offset */ - assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ - LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; + assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; /* Encode MatchLength */ assert(matchLength >= MINMATCH); length = (size_t)matchLength - MINMATCH; - if (limit && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) { + if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { DEBUGLOG(6, "Not enough room to write match length"); return 1; /* Check output limit */ } if (length >= ML_MASK) { *token += ML_MASK; length -= ML_MASK; - for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; } - if (length >= 255) { length -= 255; *(*op)++ = 255; } - *(*op)++ = (BYTE)length; + for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length -= 255; *op++ = 255; } + *op++ = (BYTE)length; } else { *token += (BYTE)(length); } /* Prepare next loop */ - *ip += matchLength; - *anchor = *ip; + ip += matchLength; + anchor = ip; return 0; } +#undef ip +#undef op +#undef anchor LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( LZ4HC_CCtx_internal* const ctx, |