summaryrefslogtreecommitdiffstats
path: root/lib/xxhash.c
diff options
context:
space:
mode:
authorYann Collet <yann.collet.73@gmail.com>2015-08-19 16:54:19 (GMT)
committerYann Collet <yann.collet.73@gmail.com>2015-08-19 16:54:19 (GMT)
commite5aee601ef4f04cfdfa7a5b768a8670d7a77743d (patch)
tree131368cd9bb4ef3111cdb6f5fe64465a59bcc2fe /lib/xxhash.c
parente64345506845034454be3c1fdd19e9397e8a2b6d (diff)
downloadlz4-e5aee601ef4f04cfdfa7a5b768a8670d7a77743d.zip
lz4-e5aee601ef4f04cfdfa7a5b768a8670d7a77743d.tar.gz
lz4-e5aee601ef4f04cfdfa7a5b768a8670d7a77743d.tar.bz2
updated xxhash
Diffstat (limited to 'lib/xxhash.c')
-rw-r--r--lib/xxhash.c53
1 files changed, 39 insertions, 14 deletions
diff --git a/lib/xxhash.c b/lib/xxhash.c
index a80f1d5..511d994 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -35,15 +35,26 @@ You can contact the author at :
/**************************************
* Tuning parameters
**************************************/
-/* XXH_FORCE_DIRECT_MEMORY_ACCESS
- * Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64.
- * For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses.
- * If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually.
- * If your CPU efficiently supports unaligned memory accesses and the compiler did not automatically detected it, you will witness large performance improvement.
- * You can also enable this switch from compilation command line / Makefile.
+/* XXH_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ * It can generate buggy code on targets which generate assembly depending on alignment.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
*/
-#if !defined(XXH_FORCE_DIRECT_MEMORY_ACCESS) && ( defined(__ARM_FEATURE_UNALIGNED) )
-# define XXH_FORCE_DIRECT_MEMORY_ACCESS 1
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define XXH_FORCE_MEMORY_ACCESS 2
+# elif defined(__INTEL_COMPILER) || \
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
#endif
/* XXH_ACCEPT_NULL_INPUT_POINTER :
@@ -57,8 +68,8 @@ You can contact the author at :
* By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
* Results are therefore identical for little-endian and big-endian CPU.
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
- * Should endian-independance be of no importance for your application, you may set the #define below to 1.
- * It will improve speed for Big-endian CPU.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
* This option has no impact on Little_Endian CPU.
*/
#define XXH_FORCE_NATIVE_FORMAT 0
@@ -66,9 +77,9 @@ You can contact the author at :
/* XXH_USELESS_ALIGN_BRANCH :
* This is a minor performance trick, only useful with lots of very small keys.
* It means : don't make a test between aligned/unaligned, because performance will be the same.
- * It avoids one initial branch per hash.
+ * It saves one initial branch per hash.
*/
-#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) || defined(XXH_FORCE_DIRECT_MEMORY_ACCESS)
+#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
# define XXH_USELESS_ALIGN_BRANCH 1
#endif
@@ -125,13 +136,27 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
#endif
-#if defined(XXH_FORCE_DIRECT_MEMORY_ACCESS)
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
#else
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
static U32 XXH_read32(const void* memPtr)
{
U32 val;
@@ -146,7 +171,7 @@ static U64 XXH_read64(const void* memPtr)
return val;
}
-#endif // defined
+#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
/******************************************