diff options
-rw-r--r-- | lib/xxhash.c | 53 |
1 files changed, 39 insertions, 14 deletions
diff --git a/lib/xxhash.c b/lib/xxhash.c index a80f1d5..511d994 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -35,15 +35,26 @@ You can contact the author at : /************************************** * Tuning parameters **************************************/ -/* XXH_FORCE_DIRECT_MEMORY_ACCESS - * Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64. - * For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses. - * If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually. - * If your CPU efficiently supports unaligned memory accesses and the compiler did not automatically detected it, you will witness large performance improvement. - * You can also enable this switch from compilation command line / Makefile. +/* XXH_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which generate assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) */ -#if !defined(XXH_FORCE_DIRECT_MEMORY_ACCESS) && ( defined(__ARM_FEATURE_UNALIGNED) ) -# define XXH_FORCE_DIRECT_MEMORY_ACCESS 1 +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif #endif /* XXH_ACCEPT_NULL_INPUT_POINTER : @@ -57,8 +68,8 @@ You can contact the author at : * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independance be of no importance for your application, you may set the #define below to 1. - * It will improve speed for Big-endian CPU. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. * This option has no impact on Little_Endian CPU. */ #define XXH_FORCE_NATIVE_FORMAT 0 @@ -66,9 +77,9 @@ You can contact the author at : /* XXH_USELESS_ALIGN_BRANCH : * This is a minor performance trick, only useful with lots of very small keys. * It means : don't make a test between aligned/unaligned, because performance will be the same. - * It avoids one initial branch per hash. + * It saves one initial branch per hash. */ -#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) || defined(XXH_FORCE_DIRECT_MEMORY_ACCESS) +#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) # define XXH_USELESS_ALIGN_BRANCH 1 #endif @@ -125,13 +136,27 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp #endif -#if defined(XXH_FORCE_DIRECT_MEMORY_ACCESS) +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + #else +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + static U32 XXH_read32(const void* memPtr) { U32 val; @@ -146,7 +171,7 @@ static U64 XXH_read64(const void* memPtr) return val; } -#endif // defined +#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS /****************************************** |