summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYann Collet <yann.collet.73@gmail.com>2014-11-29 15:41:28 (GMT)
committerYann Collet <yann.collet.73@gmail.com>2014-11-29 15:41:28 (GMT)
commitdc43a1f6b5373e4e931f09b89ae7680be5afca69 (patch)
treedeb8b1c5d4844a78a89903112d8d11085acb44dc
parente3d1b4e020d9b34ebfa688270e2a1ca3fd4fecdf (diff)
downloadlz4-dc43a1f6b5373e4e931f09b89ae7680be5afca69.zip
lz4-dc43a1f6b5373e4e931f09b89ae7680be5afca69.tar.gz
lz4-dc43a1f6b5373e4e931f09b89ae7680be5afca69.tar.bz2
Fixed : decompression issue on 32-bits CPU without unaligned memory access
-rw-r--r--examples/Makefile2
-rw-r--r--lz4.c36
-rw-r--r--lz4.h29
-rw-r--r--programs/lz4cli.c15
4 files changed, 37 insertions, 45 deletions
diff --git a/examples/Makefile b/examples/Makefile
index 4474f59..df24ea9 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -32,7 +32,7 @@
CC := $(CC)
CFLAGS ?= -O3
-CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wstrict-prototypes -Wno-missing-braces # Wno-missing-braces required due to GCC <4.8.3 bug
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -Wno-missing-braces # Wno-missing-braces required due to GCC <4.8.3 bug
FLAGS = -I.. $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
TESTFILE= Makefile
diff --git a/lz4.c b/lz4.c
index 2a6f038..f2a8120 100644
--- a/lz4.c
+++ b/lz4.c
@@ -44,10 +44,26 @@
/*
* CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS :
- * You can force the code to use unaligned memory access, should you know your CPU can handle it efficiently.
- * If it effectively results in better speed (up to 50% improvement can be expected)
+ * By default, the source code expects the compiler to correctly optimize
+ * 4-bytes and 8-bytes read on architectures able to handle it efficiently.
+ * This is not always the case. In some circumstances (ARM notably),
+ * the compiler will issue cautious code even when target is able to correctly handle unaligned memory accesses.
+ *
+ * You can force the compiler to use unaligned memory access by uncommenting the line below.
+ * One of the below scenarios will happen :
+ * 1 - Your target CPU correctly handle unaligned access, and was not well optimized by compiler (good case).
+ * You will witness large performance improvements (+50% and up).
+ * Keep the line uncommented and send a word to upstream (https://groups.google.com/forum/#!forum/lz4c)
+ * The goal is to automatically detect such situations by adding your target CPU within an exception list.
+ * 2 - Your target CPU correctly handle unaligned access, and was already correctly optimized by compiler
+ * No change will be experienced.
+ * 3 - Your target CPU inefficiently handle unaligned access.
+ * You will experience a performance loss. Comment back the line.
+ * 4 - Your target CPU does not handle unaligned access.
+ * Program will crash.
+ * If it effectively results in better speed (case 1)
* please report your configuration to upstream (https://groups.google.com/forum/#!forum/lz4c)
- * so that an automatic detection macro can be added to mainline.
+ * so that an automatic detection macro can be added for future versions of the library.
*/
/* #define CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS 1 */
@@ -58,7 +74,7 @@
/*
* Automated efficient unaligned memory access detection
* Based on known hardware architectures
- * This list will be updated thanks to Open Source community feedbacks
+ * This list will be updated thanks to feedbacks
*/
#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
|| defined(__ARM_FEATURE_UNALIGNED) \
@@ -71,7 +87,10 @@
# define LZ4_UNALIGNED_ACCESS 0
#endif
-/* Define this parameter if your target system or compiler does not support hardware bit count */
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */
# define LZ4_FORCE_SW_BITCOUNT
#endif
@@ -88,7 +107,7 @@
#ifdef _MSC_VER /* Visual Studio */
# define FORCE_INLINE static __forceinline
-# include <intrin.h> /* For Visual 2005 */
+# include <intrin.h>
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#else
# ifdef __GNUC__
@@ -961,7 +980,6 @@ FORCE_INLINE int LZ4_decompress_generic(
}
LZ4_wildCopy(op, ip, cpy);
ip += length; op = cpy;
- //LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
/* get offset */
match = cpy - LZ4_readLE16(ip); ip+=2;
@@ -1018,7 +1036,7 @@ FORCE_INLINE int LZ4_decompress_generic(
/* copy repeated sequence */
cpy = op + length;
- if (unlikely((op-match)<(int)STEPSIZE))
+ if (unlikely((op-match)<8))
{
const size_t dec64 = dec64table[op-match];
op[0] = match[0];
@@ -1036,7 +1054,7 @@ FORCE_INLINE int LZ4_decompress_generic(
if (op < oend-8)
{
LZ4_wildCopy(op, match, oend-8);
- match += oend-8 - op;
+ match += (oend-8) - op;
op = oend-8;
}
while (op<cpy) *op++ = *match++;
diff --git a/lz4.h b/lz4.h
index 8b03995..22bbcb5 100644
--- a/lz4.h
+++ b/lz4.h
@@ -169,7 +169,7 @@ int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedS
/***********************************************
- Experimental Streaming Compression Functions
+ Streaming Compression Functions
***********************************************/
#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
@@ -231,20 +231,17 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_streamPtr, char* safeBuffer, int dictSize);
/************************************************
- Experimental Streaming Decompression Functions
+ Streaming Decompression Functions
************************************************/
#define LZ4_STREAMDECODESIZE_U64 4
#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
/*
* LZ4_streamDecode_t
* information structure to track an LZ4 stream.
- * important : init this structure content using LZ4_setStreamDecode or memset() before first use !
- */
-typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
-
-/*
- * If you prefer dynamic allocation methods,
+ * init this structure content using LZ4_setStreamDecode or memset() before first use !
+ * If you prefer dynamic allocation methods :
* LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
* LZ4_freeStreamDecode releases its memory.
*/
@@ -254,9 +251,7 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
/*
* LZ4_setStreamDecode
* Use this function to instruct where to find the dictionary.
- * This function can be used to specify a static dictionary,
- * or to instruct where to find some previously decoded data saved into a different memory space.
- * Setting a size of 0 is allowed (same effect as no dictionary, same effect as reset).
+ * Setting a size of 0 is allowed (same effect as reset).
* Return : 1 if OK, 0 if error
*/
int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
@@ -277,7 +272,7 @@ Advanced decoding functions :
*_usingDict() :
These decoding functions work the same as
a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue()
- They don't use nor update an LZ4_streamDecode_t structure.
+ They are stand-alone and don't use nor update an LZ4_streamDecode_t structure.
*/
int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
@@ -294,18 +289,10 @@ They are only provided here for compatibility with older user programs.
- LZ4_uncompress is the same as LZ4_decompress_fast
- LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
These function prototypes are now disabled; uncomment them if you really need them.
-It is highly recommended to stop using these functions and migrated to newer ones */
+It is highly recommended to stop using these functions and migrate to newer ones */
/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
-/*
- * If you prefer dynamic allocation methods,
- * LZ4_createStreamDecode()
- * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
- * LZ4_free just frees it.
- */
-/* void* LZ4_createStreamDecode(void); */
-/*int LZ4_free (void* LZ4_stream); yes, it's the same one as for compression */
/* Obsolete streaming functions; use new streaming interface whenever possible */
void* LZ4_create (const char* inputBuffer);
diff --git a/programs/lz4cli.c b/programs/lz4cli.c
index 9a6e5bc..6e52ec6 100644
--- a/programs/lz4cli.c
+++ b/programs/lz4cli.c
@@ -48,10 +48,6 @@
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
#endif
-#ifdef __clang__
-# pragma clang diagnostic ignored "-Wunused-const-variable" // const variable one is really used !
-#endif
-
#define _FILE_OFFSET_BITS 64 // Large file support on 32-bits unix
#define _POSIX_SOURCE 1 // for fileno() within <stdio.h> on unix
@@ -128,15 +124,6 @@
//**************************************
-// Architecture Macros
-//**************************************
-static const int one = 1;
-#define CPU_LITTLE_ENDIAN (*(char*)(&one))
-#define CPU_BIG_ENDIAN (!CPU_LITTLE_ENDIAN)
-#define LITTLE_ENDIAN_32(i) (CPU_LITTLE_ENDIAN?(i):swap32(i))
-
-
-//**************************************
// Macros
//**************************************
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
@@ -462,7 +449,7 @@ int main(int argc, char** argv)
}
DISPLAYLEVEL(3, WELCOME_MESSAGE);
- DISPLAYLEVEL(4, "Blocks size : %i KB\n", blockSize>>10);
+ if (!decode) DISPLAYLEVEL(4, "Blocks size : %i KB\n", blockSize>>10);
// No input filename ==> use stdin
if(!input_filename) { input_filename=stdinmark; }