From 749bd91a06be385b6485bfc7fe94e8a86b3ed13a Mon Sep 17 00:00:00 2001 From: remittor Date: Wed, 7 Oct 2020 09:51:08 +0300 Subject: Replace define LZ4_FORCE_O2_INLINE_GCC_PPC64LE to LZ4_FORCE_INLINE There is no reason to separate these two definitions! --- lib/lz4.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 1d08e10..0f53857 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -143,7 +143,7 @@ # endif /* _MSC_VER */ #endif /* LZ4_FORCE_INLINE */ -/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, * together with a simple 8-byte copy loop as a fall-back path. * However, this optimization hurts the decompression speed by >30%, @@ -158,11 +158,11 @@ * of LZ4_wildCopy8 does not affect the compression speed. */ #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) -# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2"))) -# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) #else -# define LZ4_FORCE_O2_GCC_PPC64LE -# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE LZ4_FORCE_INLINE +# define LZ4_FORCE_O2 #endif #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) @@ -381,7 +381,7 @@ static void LZ4_writeLE16(void* memPtr, U16 value) } /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE +LZ4_FORCE_INLINE void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; @@ -410,7 +410,7 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; #if LZ4_FAST_DEC_LOOP -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { if (offset < 8) { @@ -434,7 +434,7 @@ LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd * this version copies two times 16 bytes (instead of one time 32 bytes) * because it must be compatible with offsets >= 16. */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; @@ -447,7 +447,7 @@ LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) /* LZ4_memcpy_using_offset() presumes : * - dstEnd >= dstPtr + MINMATCH * - there is at least 8 bytes available to write after dstEnd */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { BYTE v[8]; @@ -2145,7 +2145,7 @@ LZ4_decompress_generic( /*===== Instantiate the API decoding functions. =====*/ -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, @@ -2153,7 +2153,7 @@ int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int (BYTE*)dest, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) { dstCapacity = MIN(targetOutputSize, dstCapacity); @@ -2162,7 +2162,7 @@ int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, noDict, (BYTE*)dst, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, @@ -2172,7 +2172,7 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize) /*===== Instantiate a few more decoding cases, used more than once. =====*/ -LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */ +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, @@ -2188,7 +2188,7 @@ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int origin return LZ4_decompress_fast(source, dest, originalSize); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, size_t prefixSize) { @@ -2197,7 +2197,7 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i (BYTE*)dest-prefixSize, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const void* dictStart, size_t dictSize) @@ -2207,7 +2207,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, const void* dictStart, size_t dictSize) { @@ -2296,7 +2296,7 @@ int LZ4_decoderRingBufferSize(int maxBlockSize) If it's not possible, save the relevant part of decoded data into a safe buffer, and indicate where it stands using LZ4_setStreamDecode() */ -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; @@ -2336,7 +2336,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch return result; } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; -- cgit v0.12