diff options
author | Yann Collet <yann.collet.73@gmail.com> | 2014-11-29 15:41:28 (GMT) |
---|---|---|
committer | Yann Collet <yann.collet.73@gmail.com> | 2014-11-29 15:41:28 (GMT) |
commit | dc43a1f6b5373e4e931f09b89ae7680be5afca69 (patch) | |
tree | deb8b1c5d4844a78a89903112d8d11085acb44dc | |
parent | e3d1b4e020d9b34ebfa688270e2a1ca3fd4fecdf (diff) | |
download | lz4-dc43a1f6b5373e4e931f09b89ae7680be5afca69.zip lz4-dc43a1f6b5373e4e931f09b89ae7680be5afca69.tar.gz lz4-dc43a1f6b5373e4e931f09b89ae7680be5afca69.tar.bz2 |
Fixed : decompression issue on 32-bits CPU without unaligned memory access
-rw-r--r-- | examples/Makefile | 2 | ||||
-rw-r--r-- | lz4.c | 36 | ||||
-rw-r--r-- | lz4.h | 29 | ||||
-rw-r--r-- | programs/lz4cli.c | 15 |
4 files changed, 37 insertions, 45 deletions
diff --git a/examples/Makefile b/examples/Makefile index 4474f59..df24ea9 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -32,7 +32,7 @@ CC := $(CC) CFLAGS ?= -O3 -CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wstrict-prototypes -Wno-missing-braces # Wno-missing-braces required due to GCC <4.8.3 bug +CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -Wno-missing-braces # Wno-missing-braces required due to GCC <4.8.3 bug FLAGS = -I.. $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) TESTFILE= Makefile @@ -44,10 +44,26 @@ /* * CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS : - * You can force the code to use unaligned memory access, should you know your CPU can handle it efficiently. - * If it effectively results in better speed (up to 50% improvement can be expected) + * By default, the source code expects the compiler to correctly optimize + * 4-bytes and 8-bytes read on architectures able to handle it efficiently. + * This is not always the case. In some circumstances (ARM notably), + * the compiler will issue cautious code even when target is able to correctly handle unaligned memory accesses. + * + * You can force the compiler to use unaligned memory access by uncommenting the line below. + * One of the below scenarios will happen : + * 1 - Your target CPU correctly handle unaligned access, and was not well optimized by compiler (good case). + * You will witness large performance improvements (+50% and up). + * Keep the line uncommented and send a word to upstream (https://groups.google.com/forum/#!forum/lz4c) + * The goal is to automatically detect such situations by adding your target CPU within an exception list. + * 2 - Your target CPU correctly handle unaligned access, and was already correctly optimized by compiler + * No change will be experienced. + * 3 - Your target CPU inefficiently handle unaligned access. + * You will experience a performance loss. Comment back the line. + * 4 - Your target CPU does not handle unaligned access. + * Program will crash. + * If it effectively results in better speed (case 1) * please report your configuration to upstream (https://groups.google.com/forum/#!forum/lz4c) - * so that an automatic detection macro can be added to mainline. + * so that an automatic detection macro can be added for future versions of the library. */ /* #define CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS 1 */ @@ -58,7 +74,7 @@ /* * Automated efficient unaligned memory access detection * Based on known hardware architectures - * This list will be updated thanks to Open Source community feedbacks + * This list will be updated thanks to feedbacks */ #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ || defined(__ARM_FEATURE_UNALIGNED) \ @@ -71,7 +87,10 @@ # define LZ4_UNALIGNED_ACCESS 0 #endif -/* Define this parameter if your target system or compiler does not support hardware bit count */ +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ # define LZ4_FORCE_SW_BITCOUNT #endif @@ -88,7 +107,7 @@ #ifdef _MSC_VER /* Visual Studio */ # define FORCE_INLINE static __forceinline -# include <intrin.h> /* For Visual 2005 */ +# include <intrin.h> # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #else # ifdef __GNUC__ @@ -961,7 +980,6 @@ FORCE_INLINE int LZ4_decompress_generic( } LZ4_wildCopy(op, ip, cpy); ip += length; op = cpy; - //LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; /* get offset */ match = cpy - LZ4_readLE16(ip); ip+=2; @@ -1018,7 +1036,7 @@ FORCE_INLINE int LZ4_decompress_generic( /* copy repeated sequence */ cpy = op + length; - if (unlikely((op-match)<(int)STEPSIZE)) + if (unlikely((op-match)<8)) { const size_t dec64 = dec64table[op-match]; op[0] = match[0]; @@ -1036,7 +1054,7 @@ FORCE_INLINE int LZ4_decompress_generic( if (op < oend-8) { LZ4_wildCopy(op, match, oend-8); - match += oend-8 - op; + match += (oend-8) - op; op = oend-8; } while (op<cpy) *op++ = *match++; @@ -169,7 +169,7 @@ int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedS /*********************************************** - Experimental Streaming Compression Functions + Streaming Compression Functions ***********************************************/ #define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) @@ -231,20 +231,17 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_streamPtr, char* safeBuffer, int dictSize); /************************************************ - Experimental Streaming Decompression Functions + Streaming Decompression Functions ************************************************/ #define LZ4_STREAMDECODESIZE_U64 4 #define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) +typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t; /* * LZ4_streamDecode_t * information structure to track an LZ4 stream. - * important : init this structure content using LZ4_setStreamDecode or memset() before first use ! - */ -typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t; - -/* - * If you prefer dynamic allocation methods, + * init this structure content using LZ4_setStreamDecode or memset() before first use ! + * If you prefer dynamic allocation methods : * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure * LZ4_freeStreamDecode releases its memory. */ @@ -254,9 +251,7 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); /* * LZ4_setStreamDecode * Use this function to instruct where to find the dictionary. - * This function can be used to specify a static dictionary, - * or to instruct where to find some previously decoded data saved into a different memory space. - * Setting a size of 0 is allowed (same effect as no dictionary, same effect as reset). + * Setting a size of 0 is allowed (same effect as reset). * Return : 1 if OK, 0 if error */ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); @@ -277,7 +272,7 @@ Advanced decoding functions : *_usingDict() : These decoding functions work the same as a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue() - They don't use nor update an LZ4_streamDecode_t structure. + They are stand-alone and don't use nor update an LZ4_streamDecode_t structure. */ int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize); int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize); @@ -294,18 +289,10 @@ They are only provided here for compatibility with older user programs. - LZ4_uncompress is the same as LZ4_decompress_fast - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe These function prototypes are now disabled; uncomment them if you really need them. -It is highly recommended to stop using these functions and migrated to newer ones */ +It is highly recommended to stop using these functions and migrate to newer ones */ /* int LZ4_uncompress (const char* source, char* dest, int outputSize); */ /* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */ -/* - * If you prefer dynamic allocation methods, - * LZ4_createStreamDecode() - * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. - * LZ4_free just frees it. - */ -/* void* LZ4_createStreamDecode(void); */ -/*int LZ4_free (void* LZ4_stream); yes, it's the same one as for compression */ /* Obsolete streaming functions; use new streaming interface whenever possible */ void* LZ4_create (const char* inputBuffer); diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 9a6e5bc..6e52ec6 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -48,10 +48,6 @@ # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant #endif -#ifdef __clang__ -# pragma clang diagnostic ignored "-Wunused-const-variable" // const variable one is really used ! -#endif - #define _FILE_OFFSET_BITS 64 // Large file support on 32-bits unix #define _POSIX_SOURCE 1 // for fileno() within <stdio.h> on unix @@ -128,15 +124,6 @@ //************************************** -// Architecture Macros -//************************************** -static const int one = 1; -#define CPU_LITTLE_ENDIAN (*(char*)(&one)) -#define CPU_BIG_ENDIAN (!CPU_LITTLE_ENDIAN) -#define LITTLE_ENDIAN_32(i) (CPU_LITTLE_ENDIAN?(i):swap32(i)) - - -//************************************** // Macros //************************************** #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) @@ -462,7 +449,7 @@ int main(int argc, char** argv) } DISPLAYLEVEL(3, WELCOME_MESSAGE); - DISPLAYLEVEL(4, "Blocks size : %i KB\n", blockSize>>10); + if (!decode) DISPLAYLEVEL(4, "Blocks size : %i KB\n", blockSize>>10); // No input filename ==> use stdin if(!input_filename) { input_filename=stdinmark; } |