summaryrefslogtreecommitdiffstats
path: root/lib/lz4.c
diff options
context:
space:
mode:
authorYann Collet <yann.collet.73@gmail.com>2015-08-16 00:54:55 (GMT)
committerYann Collet <yann.collet.73@gmail.com>2015-08-16 00:54:55 (GMT)
commit0f2bf0c54e59d7a926074907556cff883a47f9c5 (patch)
tree38c3ffe272c6121fc2f5b090a3790287a5045639 /lib/lz4.c
parentfb4d3ef2c487e9dad7978ec4ee558c04cb8fec9d (diff)
downloadlz4-0f2bf0c54e59d7a926074907556cff883a47f9c5.zip
lz4-0f2bf0c54e59d7a926074907556cff883a47f9c5.tar.gz
lz4-0f2bf0c54e59d7a926074907556cff883a47f9c5.tar.bz2
Improved performance on ARMv6
Diffstat (limited to 'lib/lz4.c')
-rw-r--r--lib/lz4.c50
1 files changed, 35 insertions, 15 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 21390c9..d808f25 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -53,6 +53,17 @@
/**************************************
* CPU Feature Detection
**************************************/
+/* LZ4_FORCE_DIRECT_MEMORY_ACCESS
+ * Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64.
+ * For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses.
+ * If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually.
+ * If your CPU efficiently supports unaligned memory accesses and the compiler did not automatically detected it, you will witness large performance improvement.
+ * You can also enable this switch from compilation command line / Makefile.
+ */
+#if !defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS) && ( defined(__ARM_FEATURE_UNALIGNED) )
+# define LZ4_FORCE_DIRECT_MEMORY_ACCESS 1
+#endif
+
/*
* LZ4_FORCE_SW_BITCOUNT
* Define this parameter if your target system or compiler does not support hardware bit count
@@ -141,6 +152,13 @@ static unsigned LZ4_isLittleEndian(void)
return one.c[0];
}
+#if defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS)
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static size_t LZ4_read_ARCH(const void* memPtr) { return *(const size_t*) memPtr; }
+
+#else
static U16 LZ4_read16(const void* memPtr)
{
@@ -149,6 +167,23 @@ static U16 LZ4_read16(const void* memPtr)
return val16;
}
+static U32 LZ4_read32(const void* memPtr)
+{
+ U32 val32;
+ memcpy(&val32, memPtr, 4);
+ return val32;
+}
+
+static size_t LZ4_read_ARCH(const void* memPtr)
+{
+ size_t val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif // LZ4_FORCE_DIRECT_MEMORY_ACCESS
+
+
static U16 LZ4_readLE16(const void* memPtr)
{
if (LZ4_isLittleEndian())
@@ -176,21 +211,6 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
}
}
-static U32 LZ4_read32(const void* memPtr)
-{
- U32 val32;
- memcpy(&val32, memPtr, 4);
- return val32;
-}
-
-static size_t LZ4_read_ARCH(const void* memPtr)
-{
- size_t val;
- memcpy(&val, memPtr, sizeof(val));
- return val;
-}
-
-
/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
{