summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorArseny Kapoulkine <arseny.kapoulkine@gmail.com>2021-11-22 19:23:41 (GMT)
committerArseny Kapoulkine <arseny.kapoulkine@gmail.com>2021-11-22 19:23:41 (GMT)
commit22e232dadaa36341afce0d3f114a3846273f0d2f (patch)
tree95c00d8d9e569295d06d06ac042858c47164ffe9 /lib
parent8f61d8eb7c6979769a484cde8df61ff7c4c77765 (diff)
downloadlz4-22e232dadaa36341afce0d3f114a3846273f0d2f.zip
lz4-22e232dadaa36341afce0d3f114a3846273f0d2f.tar.gz
lz4-22e232dadaa36341afce0d3f114a3846273f0d2f.tar.bz2
Enable fast decoding on Apple/AArch64 builds
This makes decoding significantly faster on M1; measured on compressed source code across 8 hardware threads, decompressing 294 MB to 1301 MB takes 513 ms of cumulative work (2.53 GB/s) before, and 406 ms (3.2 GB/s) after this change on M1 Pro. There's no way to check if the target architecture is M1 specifically but the gains are likely to be similar on recent iterations on Apple processors, and the original performance issue was probably more specific to Qualcomm.
Diffstat (limited to 'lib')
-rw-r--r--lib/lz4.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/lib/lz4.c b/lib/lz4.c
index 5499547..2867c60 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -421,10 +421,12 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
#ifndef LZ4_FAST_DEC_LOOP
# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
# define LZ4_FAST_DEC_LOOP 1
+# elif defined(__aarch64__) && defined(__APPLE__)
+# define LZ4_FAST_DEC_LOOP 1
# elif defined(__aarch64__) && !defined(__clang__)
- /* On aarch64, we disable this optimization for clang because on certain
- * mobile chipsets, performance is reduced with clang. For information
- * refer to https://github.com/lz4/lz4/pull/707 */
+ /* On non-Apple aarch64, we disable this optimization for clang because
+ * on certain mobile chipsets, performance is reduced with clang. For
+ * more information refer to https://github.com/lz4/lz4/pull/707 */
# define LZ4_FAST_DEC_LOOP 1
# else
# define LZ4_FAST_DEC_LOOP 0