summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <cyan@fb.com>2019-04-02 23:22:11 (GMT)
committerYann Collet <cyan@fb.com>2019-04-02 23:22:11 (GMT)
commit2589c4424ff56a9e6bb37b2be394e5e0c376e7a5 (patch)
tree24d6e8195830c487ab6aa6c1563f955d7a6538d8
parent7d9d00f4df14f02a59f9c605a08af52a97032262 (diff)
downloadlz4-2589c4424ff56a9e6bb37b2be394e5e0c376e7a5.zip
lz4-2589c4424ff56a9e6bb37b2be394e5e0c376e7a5.tar.gz
lz4-2589c4424ff56a9e6bb37b2be394e5e0c376e7a5.tar.bz2
created LZ4_FAST_DEC_LOOP build macro
-rw-r--r--lib/README.md19
-rw-r--r--lib/lz4.c21
2 files changed, 31 insertions, 9 deletions
diff --git a/lib/README.md b/lib/README.md
index a705de6..e9b221f 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -42,17 +42,28 @@ Should they be nonetheless needed, it's possible to force their publication
by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`.
+#### Build macros
+
+The following build macro can be determined at compilation time :
+
+- `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop.
+ This loops works great on x86/x64 cpus, and is automatically enabled on this platform.
+ It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
+ Typically with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
+ and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
+
+
#### Amalgamation
-lz4 code is able to be amalgamated into a single file.
-We can combine all source code in `lz4_all.c` by using following command,
+lz4 source code can be amalgamated into a single file.
+One can combine all source code into `lz4_all.c` by using following command:
```
cat lz4.c > lz4_all.c
cat lz4hc.c >> lz4_all.c
cat lz4frame.c >> lz4_all.c
```
-and compile `lz4_all.c`.
-It's necessary to include all `*.h` files present in `/lib` together with `lz4_all.c`.
+(`cat` file order is important) then compile `lz4_all.c`.
+All `*.h` files present in `/lib` remain necessary to compile `lz4_all.c`.
#### Windows : using MinGW+MSYS to create DLL
diff --git a/lib/lz4.c b/lib/lz4.c
index de744c5..dafd972 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -202,6 +202,7 @@
typedef size_t reg_t; /* 32-bits in x32 mode */
#endif
+
/*-************************************
* Reading and writing into memory
**************************************/
@@ -235,7 +236,7 @@ static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArc
static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
-#else /* safe and portable access through memcpy() */
+#else /* safe and portable access using memcpy() */
static U16 LZ4_read16(const void* memPtr)
{
@@ -301,7 +302,15 @@ static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
-#if defined(__i386__) || defined(__x86_64__)
+#ifndef LZ4_FAST_DEC_LOOP
+# if defined(__i386__) || defined(__x86_64__)
+# define LZ4_FAST_DEC_LOOP 1
+# else
+# define LZ4_FAST_DEC_LOOP 0
+# endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
LZ4_FORCE_O2_INLINE_GCC_PPC64LE
void LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) {
if (offset < 8) {
@@ -367,6 +376,8 @@ void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con
}
}
#endif
+
+
/*-************************************
* Common Constants
**************************************/
@@ -1590,7 +1601,7 @@ LZ4_decompress_generic(
if ((endOnInput) && unlikely(srcSize==0)) return -1;
/* Currently the fast loop shows a regression on qualcomm arm chips. */
-#if defined(__i386__) || defined(__x86_64__)
+#if LZ4_FAST_DEC_LOOP
if ((oend - op) < FASTLOOP_SAFE_DISTANCE)
goto safe_decode;
@@ -1773,7 +1784,7 @@ LZ4_decompress_generic(
/* copy literals */
cpy = op+length;
-#if defined(__i386__) || defined(__x86_64__)
+#if LZ4_FAST_DEC_LOOP
safe_literal_copy:
#endif
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
@@ -1823,7 +1834,7 @@ LZ4_decompress_generic(
}
length += MINMATCH;
-#if defined(__i386__) || defined(__x86_64__)
+#if LZ4_FAST_DEC_LOOP
safe_match_copy:
#endif
/* match starting within external dictionary */