From 0f2bf0c54e59d7a926074907556cff883a47f9c5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 16 Aug 2015 01:54:55 +0100 Subject: Improved performance on ARMv6 --- lib/lz4.c | 50 +++++++++++++++++++++++++++++++++++--------------- lib/xxhash.c | 2 +- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 21390c9..d808f25 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -53,6 +53,17 @@ /************************************** * CPU Feature Detection **************************************/ +/* LZ4_FORCE_DIRECT_MEMORY_ACCESS + * Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64. + * For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses. + * If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually. + * If your CPU efficiently supports unaligned memory accesses and the compiler did not automatically detected it, you will witness large performance improvement. + * You can also enable this switch from compilation command line / Makefile. + */ +#if !defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS) && ( defined(__ARM_FEATURE_UNALIGNED) ) +# define LZ4_FORCE_DIRECT_MEMORY_ACCESS 1 +#endif + /* * LZ4_FORCE_SW_BITCOUNT * Define this parameter if your target system or compiler does not support hardware bit count @@ -141,6 +152,13 @@ static unsigned LZ4_isLittleEndian(void) return one.c[0]; } +#if defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS) + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static size_t LZ4_read_ARCH(const void* memPtr) { return *(const size_t*) memPtr; } + +#else static U16 LZ4_read16(const void* memPtr) { @@ -149,6 +167,23 @@ static U16 LZ4_read16(const void* memPtr) return val16; } +static U32 LZ4_read32(const void* memPtr) +{ + U32 val32; + memcpy(&val32, memPtr, 4); + return val32; +} + +static size_t LZ4_read_ARCH(const void* memPtr) +{ + size_t val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif // LZ4_FORCE_DIRECT_MEMORY_ACCESS + + static U16 LZ4_readLE16(const void* memPtr) { if (LZ4_isLittleEndian()) @@ -176,21 +211,6 @@ static void LZ4_writeLE16(void* memPtr, U16 value) } } -static U32 LZ4_read32(const void* memPtr) -{ - U32 val32; - memcpy(&val32, memPtr, 4); - return val32; -} - -static size_t LZ4_read_ARCH(const void* memPtr) -{ - size_t val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - - /* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */ static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) { diff --git a/lib/xxhash.c b/lib/xxhash.c index a18b978..a80f1d5 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -35,7 +35,7 @@ You can contact the author at : /************************************** * Tuning parameters **************************************/ -/* XXH_FORCE_DIRECT_UNALIGNED_MEMORY_ACCESS +/* XXH_FORCE_DIRECT_MEMORY_ACCESS * Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64. * For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses. * If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually. -- cgit v0.12