From 002a93473db38e83fd309aead9567da4aba6834f Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Mon, 1 Jul 2013 07:50:40 +0000 Subject: Corrected issue 70, 'pack' instruction on IBM AIX Added : fullbench : can select compression tests or decompression tests Removed extern inline, for compatibility with GNU89, as reported by Maciej Adamczyk lz4.c : made forceinline more explicit Decompression : corrected corner case behaviors (inputSize == 0 and outputSize == 0), thanks Adrien for detailed suggestions Makefile : Removed -march=native parameter, due to incompatibility with some GCC versions git-svn-id: https://lz4.googlecode.com/svn/trunk@98 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- Makefile | 9 ++++++--- fullbench.c | 40 ++++++++++++++++++++++++++++------------ lz4.c | 44 ++++++++++++++++++++++++++++---------------- lz4.h | 16 ++++++++-------- lz4_encoder.h | 2 +- lz4c.c | 4 ++-- lz4hc.c | 14 +++++++++----- lz4hc.h | 3 +-- lz4hc_encoder.h | 2 +- 9 files changed, 84 insertions(+), 50 deletions(-) diff --git a/Makefile b/Makefile index 53ea0f4..aafa94f 100644 --- a/Makefile +++ b/Makefile @@ -10,13 +10,16 @@ endif default: lz4c -all: lz4c lz4c32 fuzzer fullbench +all: lz4c lz4cs lz4c32 fuzzer fullbench lz4c: lz4.c lz4hc.c bench.c xxhash.c lz4c.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) +lz4cs: lz4.c lz4hc.c bench.c xxhash.c lz4c.c + $(CC) -Os $(CFLAGS) $^ -o $@$(EXT) + lz4c32: lz4.c lz4hc.c bench.c xxhash.c lz4c.c - $(CC) -m32 -Os -march=native $(CFLAGS) $^ -o $@$(EXT) + $(CC) -m32 -O3 $(CFLAGS) $^ -o $@$(EXT) fuzzer : lz4.c lz4hc.c fuzzer.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) @@ -25,4 +28,4 @@ fullbench : lz4.c lz4hc.c xxhash.c fullbench.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) clean: - rm -f core *.o lz4c$(EXT) lz4c32$(EXT) fuzzer$(EXT) fullbench$(EXT) + rm -f core *.o lz4c$(EXT) lz4cs$(EXT) lz4c32$(EXT) fuzzer$(EXT) fullbench$(EXT) diff --git a/fullbench.c b/fullbench.c index 54b46f6..e64664d 100644 --- a/fullbench.c +++ b/fullbench.c @@ -135,6 +135,8 @@ struct chunkParameters static int chunkSize = DEFAULT_CHUNKSIZE; static int nbIterations = NBLOOPS; static int BMK_pause = 0; +static int compressionTest = 1; +static int decompressionTest = 1; void BMK_SetBlocksize(int bsize) { @@ -279,7 +281,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) struct chunkParameters* chunkP; U32 crcc, crcd=0; # define NB_COMPRESSION_ALGORITHMS 4 - static char* compressionNames[] = { "LZ4_compress", "LZ4_compressHC", "LZ4_compressHC_limitedOutput", "LZ4_compress_limitedOutput" }; + static char* compressionNames[] = { "LZ4_compress", "LZ4_compress_limitedOutput", "LZ4_compressHC", "LZ4_compressHC_limitedOutput" }; double totalCTime[NB_COMPRESSION_ALGORITHMS] = {0}; double totalCSize[NB_COMPRESSION_ALGORITHMS] = {0}; # define NB_DECOMPRESSION_ALGORITHMS 5 @@ -371,7 +373,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) DISPLAY(" %s : \n", infilename); // Compression Algorithms - for (cAlgNb=0; cAlgNb < NB_COMPRESSION_ALGORITHMS; cAlgNb++) + for (cAlgNb=0; (cAlgNb < NB_COMPRESSION_ALGORITHMS) && (compressionTest); cAlgNb++) { char* cName = compressionNames[cAlgNb]; int (*compressionFunction)(const char*, char*, int); @@ -380,9 +382,9 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) switch(cAlgNb) { case 0: compressionFunction = LZ4_compress; break; - case 1: compressionFunction = LZ4_compressHC; break; - case 2: compressionFunction = local_LZ4_compressHC_limitedOutput; break; - case 3: compressionFunction = local_LZ4_compress_limitedOutput; break; + case 1: compressionFunction = local_LZ4_compress_limitedOutput; break; + case 2: compressionFunction = LZ4_compressHC; break; + case 3: compressionFunction = local_LZ4_compressHC_limitedOutput; break; default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); return 1; } @@ -425,10 +427,16 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) totalCSize[cAlgNb] += cSize; } - { size_t i; for (i=0; i%10llu (%5.2f%%), %6.1f MB/s\n", cName, (long long unsigned int)totals, (long long unsigned int)totalCSize[AlgNb], (double)totalCSize[AlgNb]/(double)totals*100., (double)totals/totalCTime[AlgNb]/1000.); } - for (AlgNb = 0; AlgNb < NB_DECOMPRESSION_ALGORITHMS; AlgNb ++) + for (AlgNb = 0; (AlgNb < NB_DECOMPRESSION_ALGORITHMS) && (decompressionTest); AlgNb ++) { char* dName = decompressionNames[AlgNb]; DISPLAY("%-21.21s :%10llu -> %6.1f MB/s\n", dName, (long long unsigned int)totals, (double)totals/totalDTime[AlgNb]/1000.); @@ -516,6 +524,8 @@ int usage(char* exename) DISPLAY( "Usage :\n"); DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename); DISPLAY( "Arguments :\n"); + DISPLAY( " -c : compression tests only\n"); + DISPLAY( " -d : decompression tests only\n"); DISPLAY( " -H : Help (this text + advanced options)\n"); return 0; } @@ -563,6 +573,12 @@ int main(int argc, char** argv) switch(argument[0]) { + // Select compression algorithm only + case 'c': decompressionTest = 0; break; + + // Select decompression algorithm only + case 'd': compressionTest = 0; break; + // Display help on usage case 'H': usage(exename); usage_advanced(); return 0; @@ -588,7 +604,7 @@ int main(int argc, char** argv) _exit_blockProperties: break; - // Modify Nb Iterations (benchmark only) + // Modify Nb Iterations case 'i': if ((argument[1] >='1') && (argument[1] <='9')) { @@ -598,7 +614,7 @@ _exit_blockProperties: } break; - // Pause at the end (benchmark only) (hidden option) + // Pause at the end (hidden option) case 'p': BMK_SetPause(); break; // Unrecognised command diff --git a/lz4.c b/lz4.c index 91819ad..fa7f3ba 100644 --- a/lz4.c +++ b/lz4.c @@ -116,15 +116,22 @@ Note : this source file requires "lz4_encoder.h" #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #ifdef _MSC_VER // Visual Studio -# include // For Visual 2005 -# if LZ4_ARCH64 // 64-bit +# define forceinline static __forceinline +# include // For Visual 2005 +# if LZ4_ARCH64 // 64-bits # pragma intrinsic(_BitScanForward64) // For Visual 2005 # pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else +# else // 32-bits # pragma intrinsic(_BitScanForward) // For Visual 2005 # pragma intrinsic(_BitScanReverse) // For Visual 2005 # endif # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +#else +# ifdef __GNUC__ +# define forceinline static inline __attribute__((always_inline)) +# else +# define forceinline static inline +# endif #endif #ifdef _MSC_VER @@ -176,7 +183,11 @@ Note : this source file requires "lz4_encoder.h" #endif #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(push, 1) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif #endif typedef struct _U16_S { U16 v; } _PACKED U16_S; @@ -260,7 +271,7 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; //**************************** #if LZ4_ARCH64 -static inline int LZ4_NbCommonBytes (register U64 val) +forceinline int LZ4_NbCommonBytes (register U64 val) { #if defined(LZ4_BIG_ENDIAN) #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -292,7 +303,7 @@ static inline int LZ4_NbCommonBytes (register U64 val) #else -static inline int LZ4_NbCommonBytes (register U32 val) +forceinline int LZ4_NbCommonBytes (register U32 val) { #if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -532,7 +543,7 @@ typedef enum { full = 0, partial = 1 } exit_directive; // It shall be instanciated several times, using different sets of directives // Note that it is essential this generic function is really inlined, // in order to remove useless branches during compilation optimisation. -static inline int LZ4_decompress_generic( +forceinline int LZ4_decompress_generic( const char* source, char* dest, int inputSize, // @@ -561,8 +572,9 @@ static inline int LZ4_decompress_generic( // Special case - if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; // targetOutputSize too large, better decode everything - if unlikely(outputSize==0) goto _output_error; // Empty output buffer + if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; // targetOutputSize too high => decode everything + if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1; // Empty output buffer + if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1); // Main Loop @@ -573,10 +585,10 @@ static inline int LZ4_decompress_generic( // get runlength token = *ip++; - if ((length=(token>>ML_BITS)) == RUN_MASK) + if ((length=(token>>ML_BITS)) == RUN_MASK) { unsigned s=255; - while (((endOnInput)?ip oend))) goto _output_error; // Error : not enough place for another match (min 4) + 5 literals + if ((!endOnInput) && (cpy != oend)) goto _output_error; // Error : block decoding must stop exactly there + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; // Error : input must be consumed } memcpy(op, ip, length); ip += length; @@ -612,13 +624,13 @@ static inline int LZ4_decompress_generic( // get matchlength if ((length=(token&ML_MASK)) == ML_MASK) { - while (endOnInput ? ip= 255) { length-=255; *op++ = 255; } *op++ = (BYTE)length; } - else *token += (BYTE)length; + else *token += (BYTE)(length); // Test end of chunk if (ip > mflimit) { anchor = ip; break; } diff --git a/lz4c.c b/lz4c.c index 0ac65b3..8da1e5f 100644 --- a/lz4c.c +++ b/lz4c.c @@ -390,7 +390,7 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i *(out_buff+4) |= (blockIndependence & _1BIT) << 5; *(out_buff+4) |= (blockChecksum & _1BIT) << 4; *(out_buff+4) |= (streamChecksum & _1BIT) << 2; - *(out_buff+5) = (blockSizeId & _3BITS) << 4; + *(out_buff+5) = (char)((blockSizeId & _3BITS) << 4); checkbits = XXH32((out_buff+4), 2, LZ4S_CHECKSUM_SEED); checkbits = LZ4S_GetCheckBits_FromXXH(checkbits); *(out_buff+6) = (unsigned char) checkbits; @@ -531,7 +531,7 @@ int compress_file(char* input_filename, char* output_filename, int compressionle *(out_buff+4) |= (blockIndependence & _1BIT) << 5; *(out_buff+4) |= (blockChecksum & _1BIT) << 4; *(out_buff+4) |= (streamChecksum & _1BIT) << 2; - *(out_buff+5) = (blockSizeId & _3BITS) <<4; + *(out_buff+5) = (char)((blockSizeId & _3BITS) << 4); checkbits = XXH32((out_buff+4), 2, LZ4S_CHECKSUM_SEED); checkbits = LZ4S_GetCheckBits_FromXXH(checkbits); *(out_buff+6) = (unsigned char) checkbits; diff --git a/lz4hc.c b/lz4hc.c index 862f48d..729bfd3 100644 --- a/lz4hc.c +++ b/lz4hc.c @@ -102,10 +102,10 @@ Note : this source file requires "lz4hc_encoder.h" #ifdef _MSC_VER # define forceinline __forceinline # include // For Visual 2005 -# if LZ4_ARCH64 // 64-bit +# if LZ4_ARCH64 // 64-bits # pragma intrinsic(_BitScanForward64) // For Visual 2005 # pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else +# else // 32-bits # pragma intrinsic(_BitScanForward) // For Visual 2005 # pragma intrinsic(_BitScanReverse) // For Visual 2005 # endif @@ -158,7 +158,11 @@ Note : this source file requires "lz4hc_encoder.h" #endif #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(push, 1) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif #endif typedef struct _U16_S { U16 v; } _PACKED U16_S; @@ -345,7 +349,7 @@ static inline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base) } -extern inline void* LZ4_createHC (const char* slidingInputBuffer) +void* LZ4_createHC (const char* slidingInputBuffer) { void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); LZ4_InitHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)slidingInputBuffer); @@ -353,7 +357,7 @@ extern inline void* LZ4_createHC (const char* slidingInputBuffer) } -extern inline int LZ4_freeHC (void* LZ4HC_Data) +int LZ4_freeHC (void* LZ4HC_Data) { FREEMEM(LZ4HC_Data); return (0); diff --git a/lz4hc.h b/lz4hc.h index 710c5e2..7db2160 100644 --- a/lz4hc.h +++ b/lz4hc.h @@ -68,8 +68,7 @@ Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD l */ -/* Advanced Functions -*/ +/* Advanced Functions */ void* LZ4_createHC (const char* slidingInputBuffer); int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize); diff --git a/lz4hc_encoder.h b/lz4hc_encoder.h index edee93c..b59bef3 100644 --- a/lz4hc_encoder.h +++ b/lz4hc_encoder.h @@ -97,7 +97,7 @@ forceinline static int ENCODE_SEQUENCE_NAME ( if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; // Check output limit #endif if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } - else *token += (BYTE)length; + else *token += (BYTE)(length); // Prepare next loop *ip += matchLength; -- cgit v0.12