From 0f2bf0c54e59d7a926074907556cff883a47f9c5 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Sun, 16 Aug 2015 01:54:55 +0100
Subject: Improved performance on ARMv6

---
 lib/lz4.c    | 50 +++++++++++++++++++++++++++++++++++---------------
 lib/xxhash.c |  2 +-
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/lib/lz4.c b/lib/lz4.c
index 21390c9..d808f25 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -53,6 +53,17 @@
 /**************************************
 *  CPU Feature Detection
 **************************************/
+/* LZ4_FORCE_DIRECT_MEMORY_ACCESS
+ * Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64.
+ * For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses.
+ * If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually.
+ * If your CPU efficiently supports unaligned memory accesses and the compiler did not automatically detected it, you will witness large performance improvement.
+ * You can also enable this switch from compilation command line / Makefile.
+ */
+#if !defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS) && ( defined(__ARM_FEATURE_UNALIGNED) )
+#  define LZ4_FORCE_DIRECT_MEMORY_ACCESS 1
+#endif
+
 /*
  * LZ4_FORCE_SW_BITCOUNT
  * Define this parameter if your target system or compiler does not support hardware bit count
@@ -141,6 +152,13 @@ static unsigned LZ4_isLittleEndian(void)
     return one.c[0];
 }
 
+#if defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS)
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static size_t LZ4_read_ARCH(const void* memPtr) { return *(const size_t*) memPtr; }
+
+#else
 
 static U16 LZ4_read16(const void* memPtr)
 {
@@ -149,6 +167,23 @@ static U16 LZ4_read16(const void* memPtr)
     return val16;
 }
 
+static U32 LZ4_read32(const void* memPtr)
+{
+    U32 val32;
+    memcpy(&val32, memPtr, 4);
+    return val32;
+}
+
+static size_t LZ4_read_ARCH(const void* memPtr)
+{
+    size_t val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif // LZ4_FORCE_DIRECT_MEMORY_ACCESS
+
+
 static U16 LZ4_readLE16(const void* memPtr)
 {
     if (LZ4_isLittleEndian())
@@ -176,21 +211,6 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
     }
 }
 
-static U32 LZ4_read32(const void* memPtr)
-{
-    U32 val32;
-    memcpy(&val32, memPtr, 4);
-    return val32;
-}
-
-static size_t LZ4_read_ARCH(const void* memPtr)
-{
-    size_t val;
-    memcpy(&val, memPtr, sizeof(val));
-    return val;
-}
-
-
 /* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
 static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
diff --git a/lib/xxhash.c b/lib/xxhash.c
index a18b978..a80f1d5 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -35,7 +35,7 @@ You can contact the author at :
 /**************************************
 *  Tuning parameters
 **************************************/
-/* XXH_FORCE_DIRECT_UNALIGNED_MEMORY_ACCESS
+/* XXH_FORCE_DIRECT_MEMORY_ACCESS
  * Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64.
  * For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses.
  * If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually.
-- 
cgit v0.12