From 13e966d9686de41c428e4e83e87e9255016cd9a6 Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Sat, 27 Jul 2013 11:19:31 +0000 Subject: lz4c : made display and arguments more compatible with gzip, for easier integration with tar (patch by Yaakov Selkowitz) Correction : large files support on 32-bits unix (reported by Karthik Rajeswaran) lz4c : reduce the amount of displayed information in default mode; introduce a verbose mode lz4c : changed help message Updated xxHash to r31 Made bench.c compatible with tcc Corrected : a few minor warnings found by CppCheck, as suggested by Brian White lz4.c : Pushed BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE farther in the code, since it is reported as providing little benefit Corrected : minor 64K input condition, detected by Mat Hostetter git-svn-id: https://lz4.googlecode.com/svn/trunk@99 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- bench.c | 30 ++++--- bench.h | 6 +- fullbench.c | 28 ++++--- fuzzer.c | 2 +- lz4.c | 124 ++++++++++++++-------------- lz4_encoder.h | 4 +- lz4c.c | 242 ++++++++++++++++++++++++++++++++++--------------------- lz4hc.c | 32 ++++---- lz4hc_encoder.h | 2 +- xxhash.c | 244 ++++++++++++++++++++++++++++---------------------------- 10 files changed, 392 insertions(+), 322 deletions(-) diff --git a/bench.c b/bench.c index f605249..07fd091 100644 --- a/bench.c +++ b/bench.c @@ -30,24 +30,18 @@ #define _CRT_SECURE_NO_DEPRECATE // VS2005 // Unix Large Files support (>4GB) +#define _FILE_OFFSET_BITS 64 #if (defined(__sun__) && (!defined(__LP64__))) // Sun Solaris 32-bits requires specific definitions # define _LARGEFILE_SOURCE -# define _FILE_OFFSET_BITS 64 #elif ! defined(__LP64__) // No point defining Large file for 64 bit # define _LARGEFILE64_SOURCE #endif // S_ISREG & gettimeofday() are not supported by MSVC -#if defined(_MSC_VER) -# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#if defined(_MSC_VER) || defined(_WIN32) # define BMK_LEGACY_TIMER 1 #endif -// GCC does not support _rotl outside of Windows -#if !defined(_WIN32) -# define _rotl(x,r) ((x << r) | (x >> (32 - r))) -#endif - //************************************** // Includes @@ -74,6 +68,19 @@ //************************************** +// Compiler specifics +//************************************** +#if !defined(S_ISREG) +# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#endif + +// GCC does not support _rotl outside of Windows +#if !defined(_WIN32) +# define _rotl(x,r) ((x << r) | (x >> (32 - r))) +#endif + + +//************************************** // Basic Types //************************************** #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 @@ -153,6 +160,7 @@ void BMK_SetPause() BMK_pause = 1; } + //********************************************************* // Private functions //********************************************************* @@ -300,11 +308,12 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) compressed_buff = (char*)malloc((size_t )compressed_buff_size); - if(!orig_buff || !compressed_buff) + if (!orig_buff || !compressed_buff) { DISPLAY("\nError: not enough memory!\n"); free(orig_buff); free(compressed_buff); + free(chunkP); fclose(fileIn); return 12; } @@ -330,11 +339,12 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) readSize = fread(orig_buff, 1, benchedSize, fileIn); fclose(fileIn); - if(readSize != benchedSize) + if (readSize != benchedSize) { DISPLAY("\nError: problem reading file '%s' !! \n", infilename); free(orig_buff); free(compressed_buff); + free(chunkP); return 13; } diff --git a/bench.h b/bench.h index 9d5e4f5..1e26685 100644 --- a/bench.h +++ b/bench.h @@ -16,9 +16,9 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ */ #pragma once diff --git a/fullbench.c b/fullbench.c index e64664d..66aa0ed 100644 --- a/fullbench.c +++ b/fullbench.c @@ -38,16 +38,10 @@ #endif // S_ISREG & gettimeofday() are not supported by MSVC -#if defined(_MSC_VER) -# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#if defined(_MSC_VER) || defined(_WIN32) # define BMK_LEGACY_TIMER 1 #endif -// GCC does not support _rotl outside of Windows -#if !defined(_WIN32) -# define _rotl(x,r) ((x << r) | (x >> (32 - r))) -#endif - //************************************** // Includes @@ -74,6 +68,20 @@ //************************************** +// Compiler Options +//************************************** +// S_ISREG & gettimeofday() are not supported by MSVC +#if !defined(S_ISREG) +# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#endif + +// GCC does not support _rotl outside of Windows +#if !defined(_WIN32) +# define _rotl(x,r) ((x << r) | (x >> (32 - r))) +#endif + + +//************************************** // Basic Types //************************************** #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 @@ -326,6 +334,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) DISPLAY("\nError: not enough memory!\n"); free(orig_buff); free(compressed_buff); + free(chunkP); fclose(fileIn); return 12; } @@ -356,6 +365,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) DISPLAY("\nError: problem reading file '%s' !! \n", infilename); free(orig_buff); free(compressed_buff); + free(chunkP); return 13; } @@ -385,7 +395,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) case 1: compressionFunction = local_LZ4_compress_limitedOutput; break; case 2: compressionFunction = LZ4_compressHC; break; case 3: compressionFunction = local_LZ4_compressHC_limitedOutput; break; - default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); return 1; + default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); free(chunkP); return 1; } for (loopNb = 1; loopNb <= nbIterations; loopNb++) @@ -449,7 +459,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) case 2: decompressionFunction = LZ4_decompress_safe; break; case 3: decompressionFunction = LZ4_decompress_safe_withPrefix64k; break; case 4: decompressionFunction = local_LZ4_decompress_safe_partial; break; - default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); return 1; + default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); free(chunkP); return 1; } for (loopNb = 1; loopNb <= nbIterations; loopNb++) diff --git a/fuzzer.c b/fuzzer.c index 44ca885..a10b222 100644 --- a/fuzzer.c +++ b/fuzzer.c @@ -132,7 +132,7 @@ int main() { unsigned int seed, randState, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart(); int i, j, k, ret, len, lenHC, attemptNb; char userInput[30] = {0}; -# define FUZ_CHECKTEST(cond, message) testNb++; if (cond) { printf("Test %i : %s : seed %u, cycle %u \n", testNb, message, seed, attemptNb); goto _output_error; } +# define FUZ_CHECKTEST(cond, message) testNb++; if (cond) { printf("Test %i : %s : seed %u, cycle %i \n", testNb, message, seed, attemptNb); goto _output_error; } printf("starting LZ4 fuzzer\n"); printf("Select an Initialisation number (default : random) : "); diff --git a/lz4.c b/lz4.c index fa7f3ba..779d68a 100644 --- a/lz4.c +++ b/lz4.c @@ -52,13 +52,6 @@ Note : this source file requires "lz4_encoder.h" // Note : explicit functions *_stack* and *_heap* are unaffected by this setting #define HEAPMODE 0 -// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : -// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU. -// You can set this option to 1 in situations where data will remain within closed environment -// This option is useless on Little_Endian CPU (such as x86) -//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 - - //************************************** // CPU Feature Detection @@ -92,7 +85,7 @@ Note : this source file requires "lz4_encoder.h" #endif // Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected +// For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance #if defined(__ARM_FEATURE_UNALIGNED) # define LZ4_FORCE_UNALIGNED_ACCESS 1 @@ -103,25 +96,29 @@ Note : this source file requires "lz4_encoder.h" # define LZ4_FORCE_SW_BITCOUNT #endif +// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : +// This option may provide a small boost to performance for some big endian cpu, although probably modest. +// You may set this option to 1 if data will remain within closed environment. +// This option is useless on Little_Endian CPU (such as x86) +//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 + //************************************** // Compiler Options //************************************** -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) // C99 /* "restrict" is a known keyword */ #else # define restrict // Disable restrict #endif -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - #ifdef _MSC_VER // Visual Studio # define forceinline static __forceinline -# include // For Visual 2005 -# if LZ4_ARCH64 // 64-bits +# include // For Visual 2005 +# if LZ4_ARCH64 // 64-bits # pragma intrinsic(_BitScanForward64) // For Visual 2005 # pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else // 32-bits +# else // 32-bits # pragma intrinsic(_BitScanForward) // For Visual 2005 # pragma intrinsic(_BitScanReverse) // For Visual 2005 # endif @@ -140,6 +137,8 @@ Note : this source file requires "lz4_encoder.h" # define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) #endif +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) # define expect(expr,value) (__builtin_expect ((expr),(value)) ) #else @@ -163,7 +162,7 @@ Note : this source file requires "lz4_encoder.h" //************************************** #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 # include - typedef uint8_t BYTE; + typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; @@ -190,17 +189,19 @@ Note : this source file requires "lz4_encoder.h" # endif #endif -typedef struct _U16_S { U16 v; } _PACKED U16_S; -typedef struct _U32_S { U32 v; } _PACKED U32_S; -typedef struct _U64_S { U64 v; } _PACKED U64_S; +typedef struct { U16 v; } _PACKED U16_S; +typedef struct { U32 v; } _PACKED U32_S; +typedef struct { U64 v; } _PACKED U64_S; +typedef struct {size_t v;} _PACKED size_t_S; #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) # pragma pack(pop) #endif -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) +#define A16(x) (((U16_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) +#define AARCH(x) (((size_t_S *)(x))->v) //************************************** @@ -230,22 +231,15 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; //************************************** // Architecture-specific macros //************************************** +#define STEPSIZE sizeof(size_t) +#define LZ4_COPYSTEP(s,d) { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; } +#define LZ4_COPY8(s,d) { LZ4_COPYSTEP(s,d); if (STEPSIZE<8) LZ4_COPYSTEP(s,d); } +#define LZ4_SECURECOPY(s,d,e) { if ((STEPSIZE==8)&&(d>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_clzll(val) >> 3); - #else +# else int r; if (!(val>>32)) { r=4; } else { r=0; val>>=32; } if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } r += (!val); return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanForward64( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_ctzll(val) >> 3); - #else +# else static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; - #endif -#endif + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif +# endif } #else forceinline int LZ4_NbCommonBytes (register U32 val) { -#if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# if defined(LZ4_BIG_ENDIAN) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanReverse( &r, val ); return (int)(r>>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_clz(val) >> 3); -# else +# else int r; if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } r += (!val); return r; -# endif -#else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r; _BitScanForward( &r, val ); return (int)(r>>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_ctz(val) >> 3); -# else +# else static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif -#endif +# endif +# endif } #endif @@ -535,8 +529,8 @@ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, in //**************************** typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive; -typedef enum { full = 0, partial = 1 } exit_directive; +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { full = 0, partial = 1 } earlyEnd_directive; // This generic decompression function cover all use cases. @@ -571,7 +565,7 @@ forceinline int LZ4_decompress_generic( #endif - // Special case + // Special cases if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; // targetOutputSize too high => decode everything if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1; // Empty output buffer if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1); @@ -602,12 +596,12 @@ forceinline int LZ4_decompress_generic( { if (partialDecoding) { - if (cpy > oend) goto _output_error; // Error : write attempt beyond end of output buffer - if ((endOnInput) && (ip+length > iend)) goto _output_error; // Error : read attempt beyond end of input buffer + if (cpy > oend) goto _output_error; // Error : write attempt beyond end of output buffer + if ((endOnInput) && (ip+length > iend)) goto _output_error; // Error : read attempt beyond end of input buffer } else { - if ((!endOnInput) && (cpy != oend)) goto _output_error; // Error : block decoding must stop exactly there + if ((!endOnInput) && (cpy != oend)) goto _output_error; // Error : block decoding must stop exactly there if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; // Error : input must be consumed } memcpy(op, ip, length); @@ -634,7 +628,7 @@ forceinline int LZ4_decompress_generic( } // copy repeated sequence - if unlikely((op-ref)LZ4_64KLIMIT) return 0; // Size too large (not within 64K limit) + if (inputSize>=LZ4_64KLIMIT) return 0; // Size too large (not within 64K limit) #endif #ifdef USE_HEAPMEMORY memset((void*)HashTable, 0, HASHTABLESIZE); @@ -173,7 +173,7 @@ _next_match: anchor = ip; while likely(ip= 403 # define swap32 __builtin_bswap32 #else - static inline unsigned int swap32(unsigned int x) { - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); + static inline unsigned int swap32(unsigned int x) + { + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); } #endif @@ -86,6 +90,9 @@ #define EXTENSION ".lz4" #define WELCOME_MESSAGE "*** %s %s, by %s (%s) ***\n", COMPRESSOR_NAME, COMPRESSOR_VERSION, AUTHOR, COMPILED +#define UNLZ4 "unlz4" +#define LZ4CAT "lz4cat" + #define KB *(1U<<10) #define MB *(1U<<20) #define GB *(1U<<30) @@ -108,7 +115,7 @@ #define LZ4S_BLOCKSIZEID_DEFAULT 7 #define LZ4S_CHECKSUM_SEED 0 #define LZ4S_EOS 0 -#define LZ4S_MAXHEADERSIZE (4+2+8+4+1) +#define LZ4S_MAXHEADERSIZE (MAGICNUMBER_SIZE+2+8+4+1) //************************************** @@ -129,7 +136,7 @@ static const int one = 1; //************************************** // Special input/output //************************************** -#define NULL_INPUT "null" +#define NULL_OUTPUT "null" char stdinmark[] = "stdin"; char stdoutmark[] = "stdout"; #ifdef _WIN32 @@ -142,12 +149,16 @@ char nulmark[] = "/dev/null"; //************************************** // Local Parameters //************************************** +static char* programName; +static int silence = 0; +static int verbose = 0; static int overwrite = 0; static int blockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; static int blockChecksum = 0; static int streamChecksum = 1; static int blockIndependence = 1; + //************************************** // Exceptions //************************************** @@ -167,22 +178,28 @@ static int blockIndependence = 1; //**************************** // Functions //**************************** -int usage(char* exename) +int usage() { DISPLAY( "Usage :\n"); - DISPLAY( " %s [arg] input [output]\n", exename); + DISPLAY( " %s [arg] [input] [output]\n", programName); + DISPLAY( "\n"); + DISPLAY( "input : a filename, or \n"); + DISPLAY( " '%s' or '-' for pipe mode (default if empty)\n", stdinmark); DISPLAY( "Arguments :\n"); DISPLAY( " -c0/-c : Fast compression (default) \n"); DISPLAY( " -c1/-hc: High compression \n"); DISPLAY( " -d : decompression \n"); DISPLAY( " -y : overwrite without prompting \n"); - DISPLAY( " -H : Help (this text + advanced options)\n"); + DISPLAY( " -h/-H : Help (this text + advanced options)\n"); return 0; } int usage_advanced() { - DISPLAY( "\nAdvanced options :\n"); + usage(); + DISPLAY( "\n"); + DISPLAY( "Advanced options :\n"); + DISPLAY( " -v : be verbose \n"); DISPLAY( " -t : test compressed file \n"); DISPLAY( " -B# : Block size [4-7](default : 7)\n"); DISPLAY( " -BD : Block dependency (improve compression ratio)\n"); @@ -190,18 +207,29 @@ int usage_advanced() DISPLAY( " -Sx : disable stream checksum (default:enabled)\n"); DISPLAY( " -b# : benchmark files, using # [0-1] compression level\n"); DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n"); - DISPLAY( "input : can be 'stdin' (pipe) or a filename\n"); - DISPLAY( "output : can be 'stdout'(pipe) or a filename or 'null'\n"); - DISPLAY( " example 1 : lz4c -hc stdin compressedfile.lz4\n"); - DISPLAY( " example 2 : lz4c -hcyB4D filename \n"); + DISPLAY( "\n"); + DISPLAY( "output : a filename, or \n"); + DISPLAY( " '%s', or '-' for pipe mode\n", stdoutmark); + DISPLAY( " or '%s'\n", NULL_OUTPUT); + DISPLAY( " default if empty : stdout if input is stdin\n"); + DISPLAY( " input.lz4 if compression selected\n"); + DISPLAY( " input without '.lz4' if decompression\n"); + DISPLAY( "\n"); + DISPLAY( "Examples :\n"); + DISPLAY( "1 : compress file 'filename', using default output name 'filename.lz4'\n"); + DISPLAY( " %s filename\n", programName); + DISPLAY( "2 : compress 'filename' in high compression mode, overwrite output if exists\n"); + DISPLAY( " %s -hcy filename \n", programName); + DISPLAY( "3 : compress data stream from 'generator', send result to 'consumer'\n"); + DISPLAY( " generator | %s | consumer \n", programName); return 0; } -int badusage(char* exename) +int badusage() { DISPLAY("Wrong parameters\n"); - usage(exename); - return 0; + usage(); + exit(1); } @@ -215,7 +243,7 @@ int get_fileHandle(char* input_filename, char* output_filename, FILE** pfinput, if (!strcmp (input_filename, stdinmark)) { - DISPLAY( "Using stdin for input\n"); + if (verbose) DISPLAY( "Using stdin for input\n"); *pfinput = stdin; #ifdef _WIN32 // Need to set stdin/stdout to binary mode specifically for windows _setmode( _fileno( stdin ), _O_BINARY ); @@ -228,7 +256,7 @@ int get_fileHandle(char* input_filename, char* output_filename, FILE** pfinput, if (!strcmp (output_filename, stdoutmark)) { - DISPLAY( "Using stdout for output\n"); + if (verbose) DISPLAY( "Using stdout for output\n"); *pfoutput = stdout; #ifdef _WIN32 // Need to set stdin/stdout to binary mode specifically for windows _setmode( _fileno( stdout ), _O_BINARY ); @@ -243,12 +271,12 @@ int get_fileHandle(char* input_filename, char* output_filename, FILE** pfinput, { char ch; fclose(*pfoutput); - DISPLAY( "Warning : %s already exists\n", output_filename); if (!overwrite) { + DISPLAY( "Warning : %s already exists\n", output_filename); DISPLAY( "Overwrite ? (Y/N) : "); ch = (char)getchar(); - if (ch!='Y') EXM_THROW(11, "Operation aborted : %s already exists", output_filename); + if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); } } *pfoutput = fopen( output_filename, "wb" ); @@ -319,11 +347,11 @@ int legacy_compress_file(char* input_filename, char* output_filename, int compre // Status end = clock(); - DISPLAY( "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + if (!silence) DISPLAY( "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); { double seconds = (double)(end - start)/CLOCKS_PER_SEC; - DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + if (verbose) DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); } // Close & Free @@ -350,7 +378,6 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i char* out_buff; FILE* finput; FILE* foutput; - int errorcode; int displayLevel = (compressionlevel>0); clock_t start, end; unsigned int blockSize, inputBufferSize; @@ -370,8 +397,7 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i translateFunction = LZ4_slideInputBufferHC; freeFunction = LZ4_freeHC; } - errorcode = get_fileHandle(input_filename, output_filename, &finput, &foutput); - if (errorcode) return errorcode; + get_fileHandle(input_filename, output_filename, &finput, &foutput); blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId); // Allocate Memory @@ -407,7 +433,7 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i // Read Block if ((in_start+blockSize) > in_end) in_start = translateFunction(ctx); inSize = (unsigned int) fread(in_start, (size_t)1, (size_t)blockSize, finput); - if( inSize<=0 ) break; // No more input : end of compression + if( inSize==0 ) break; // No more input : end of compression filesize += inSize; if (displayLevel) DISPLAY("Read : %i MB \r", (int)(filesize>>20)); if (streamChecksum) XXH32_update(streamChecksumState, in_start, inSize); @@ -469,11 +495,11 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i // Status end = clock(); - DISPLAY( "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + if (!silence) DISPLAY( "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); { double seconds = (double)(end - start)/CLOCKS_PER_SEC; - DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + if (verbose) DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); } // Close & Free @@ -495,13 +521,13 @@ int compress_file(char* input_filename, char* output_filename, int compressionle unsigned int checkbits; char* in_buff; char* out_buff; + char* headerBuffer; FILE* finput; FILE* foutput; - int errorcode; - int displayLevel = (compressionlevel>0); + int displayLevel = ((compressionlevel>0) && (!silence)) || (verbose); clock_t start, end; int blockSize; - size_t sizeCheck, header_size; + size_t sizeCheck, header_size, readSize; void* streamChecksumState=NULL; // Branch out @@ -515,45 +541,48 @@ int compress_file(char* input_filename, char* output_filename, int compressionle case 1 : compressionFunction = LZ4_compressHC_limitedOutput; break; default: compressionFunction = LZ4_compress_limitedOutput; } - errorcode = get_fileHandle(input_filename, output_filename, &finput, &foutput); - if (errorcode) return errorcode; + get_fileHandle(input_filename, output_filename, &finput, &foutput); blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId); // Allocate Memory in_buff = (char*)malloc(blockSize); out_buff = (char*)malloc(blockSize+CACHELINE); - if (!in_buff || !out_buff) EXM_THROW(31, "Allocation error : not enough memory"); + headerBuffer = (char*)malloc(LZ4S_MAXHEADERSIZE); + if (!in_buff || !out_buff || !(headerBuffer)) EXM_THROW(31, "Allocation error : not enough memory"); if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); // Write Archive Header - *(unsigned int*)out_buff = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER); // Magic Number, in Little Endian convention - *(out_buff+4) = (1 & _2BITS) << 6 ; // Version('01') - *(out_buff+4) |= (blockIndependence & _1BIT) << 5; - *(out_buff+4) |= (blockChecksum & _1BIT) << 4; - *(out_buff+4) |= (streamChecksum & _1BIT) << 2; - *(out_buff+5) = (char)((blockSizeId & _3BITS) << 4); - checkbits = XXH32((out_buff+4), 2, LZ4S_CHECKSUM_SEED); + *(unsigned int*)headerBuffer = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER); // Magic Number, in Little Endian convention + *(headerBuffer+4) = (1 & _2BITS) << 6 ; // Version('01') + *(headerBuffer+4) |= (blockIndependence & _1BIT) << 5; + *(headerBuffer+4) |= (blockChecksum & _1BIT) << 4; + *(headerBuffer+4) |= (streamChecksum & _1BIT) << 2; + *(headerBuffer+5) = (char)((blockSizeId & _3BITS) << 4); + checkbits = XXH32((headerBuffer+4), 2, LZ4S_CHECKSUM_SEED); checkbits = LZ4S_GetCheckBits_FromXXH(checkbits); - *(out_buff+6) = (unsigned char) checkbits; + *(headerBuffer+6) = (unsigned char) checkbits; header_size = 7; - sizeCheck = fwrite(out_buff, 1, header_size, foutput); + + // Write header + sizeCheck = fwrite(headerBuffer, 1, header_size, foutput); if (sizeCheck!=header_size) EXM_THROW(32, "Write error : cannot write header"); compressedfilesize += header_size; + // read first block + readSize = fread(in_buff, (size_t)1, (size_t)blockSize, finput); + // Main Loop - while (1) + while (readSize>0) { unsigned int outSize; - // Read Block - unsigned int inSize = (unsigned int) fread(in_buff, (size_t)1, (size_t)blockSize, finput); - if( inSize<=0 ) break; // No more input : end of compression - filesize += inSize; + + filesize += readSize; if (displayLevel) DISPLAY("Read : %i MB \r", (int)(filesize>>20)); - if (streamChecksum) XXH32_update(streamChecksumState, in_buff, inSize); + if (streamChecksum) XXH32_update(streamChecksumState, in_buff, (int)readSize); // Compress Block - outSize = compressionFunction(in_buff, out_buff+4, inSize, inSize-1); - if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += inSize+4; + outSize = compressionFunction(in_buff, out_buff+4, (int)readSize, (int)readSize-1); + if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += readSize+4; if (blockChecksum) compressedfilesize+=4; if (displayLevel) DISPLAY("Read : %i MB ==> %.2f%%\r", (int)(filesize>>20), (double)compressedfilesize/filesize*100); @@ -571,24 +600,26 @@ int compress_file(char* input_filename, char* output_filename, int compressionle sizeToWrite = 4 + outSize + (4*blockChecksum); sizeCheck = fwrite(out_buff, 1, sizeToWrite, foutput); if (sizeCheck!=(size_t)(sizeToWrite)) EXM_THROW(33, "Write error : cannot write compressed block"); - } - else // Copy Original + else // Copy Original Uncompressed { unsigned int checksum; - * (unsigned int*) out_buff = LITTLE_ENDIAN_32(inSize|0x80000000); // Add Uncompressed flag + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(((unsigned long)readSize)|0x80000000); // Add Uncompressed flag sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header"); - sizeCheck = fwrite(in_buff, 1, inSize, foutput); - if (sizeCheck!=(size_t)(inSize)) EXM_THROW(35, "Write error : cannot write block"); + sizeCheck = fwrite(in_buff, 1, readSize, foutput); + if (sizeCheck!=readSize) EXM_THROW(35, "Write error : cannot write block"); if (blockChecksum) { - checksum = XXH32(in_buff, inSize, LZ4S_CHECKSUM_SEED); + checksum = XXH32(in_buff, (int)readSize, LZ4S_CHECKSUM_SEED); * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum"); } } + + // Read next block + readSize = fread(in_buff, (size_t)1, (size_t)blockSize, finput); } // End of Stream mark @@ -605,21 +636,22 @@ int compress_file(char* input_filename, char* output_filename, int compressionle compressedfilesize += 4; } - // Status - end = clock(); - DISPLAY( "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", - (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); - { - double seconds = (double)(end - start)/CLOCKS_PER_SEC; - DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); - } - // Close & Free free(in_buff); free(out_buff); + free(headerBuffer); fclose(finput); fclose(foutput); + // Final Status + end = clock(); + if (!silence) DISPLAY( "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + { + double seconds = (double)(end - start)/CLOCKS_PER_SEC; + if (verbose) DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + } + return 0; } @@ -788,6 +820,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) else { memcpy(out_start, in_buff, blockSize); + decodedBytes = blockSize; } } } @@ -852,7 +885,7 @@ unsigned long long selectDecoder( FILE* finput, FILE* foutput) case LZ4S_MAGICNUMBER: return decodeLZ4S(finput, foutput); case LEGACY_MAGICNUMBER: - DISPLAY("Detected : Legacy format \n"); + if (verbose) DISPLAY("Detected : Legacy format \n"); return decodeLegacyStream(finput, foutput); case LZ4S_SKIPPABLE0: nbReadBytes = fread(&size, 1, 4, finput); @@ -890,10 +923,10 @@ int decodeFile(char* input_filename, char* output_filename) // Final Status end = clock(); - DISPLAY( "Successfully decoded %llu bytes \n", filesize); + if (!silence) DISPLAY( "Successfully decoded %llu bytes \n", filesize); { double seconds = (double)(end - start)/CLOCKS_PER_SEC; - DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + if (verbose) DISPLAY( "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); } // Close @@ -913,16 +946,15 @@ int main(int argc, char** argv) bench=0, filenamesStart=2, legacy_format=0; - char* exename=argv[0]; char* input_filename=0; char* output_filename=0; - char nullinput[] = NULL_INPUT; + char nullOutput[] = NULL_OUTPUT; char extension[] = EXTENSION; - // Welcome message - DISPLAY( WELCOME_MESSAGE); - - if (argc<2) { badusage(exename); return 1; } + // Select behavior + programName = argv[0]; + if (strstr(programName, UNLZ4)) { decode=1; silence=1; } + else if (strstr(programName, LZ4CAT)) { decode=1; silence=1; output_filename=stdoutmark; } for(i=1; i='0') && (argument[1] <='1')) { cLevel=argument[1] - '0'; argument++; } break; - case 'h': if (argument[1]=='c') { cLevel=1; argument++; } break; + case 'h': if (argument[1]=='c') { cLevel=1; argument++; } else { usage_advanced(); return 0; } break; // Use Legacy format (hidden option) case 'l': legacy_format=1; break; @@ -980,7 +1018,7 @@ _exit_blockProperties: break; // Modify Stream properties - case 'S': if (argument[1]=='x') { streamChecksum=0; argument++; break; } else { badusage(exename); return 1; } + case 'S': if (argument[1]=='x') { streamChecksum=0; argument++; break; } else { badusage(); } // Bench case 'b': bench=1; @@ -1003,8 +1041,11 @@ _exit_blockProperties: // Overwrite case 'y': overwrite=1; break; + // Verbose mode + case 'v': verbose=1; break; + // Unrecognised command - default : badusage(exename); return 1; + default : badusage(); } } continue; @@ -1017,28 +1058,33 @@ _exit_blockProperties: if (!output_filename) { output_filename=argument; - if (!strcmp (output_filename, nullinput)) output_filename = nulmark; + if (!strcmp (output_filename, nullOutput)) output_filename = nulmark; continue; } } - // No input filename ==> Error - if(!input_filename) { badusage(exename); return 1; } + if (verbose) DISPLAY( WELCOME_MESSAGE); + + // No input filename ==> use stdin + if(!input_filename) { input_filename=stdinmark; } + // Check if benchmark was required if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart, cLevel); - // No output filename ==> build one automatically (when possible) + // No output filename ==> select one automatically (when possible) if (!output_filename) - { - if (!decode) // compression + { + if (input_filename == stdinmark) { output_filename=stdoutmark; silence=1; } + else if (!decode) // compression { int i=0, l=0; while (input_filename[l]!=0) l++; output_filename = (char*)calloc(1,l+5); for (i=0;i4) while ((outl >= inl-4) && (input_filename[outl] == extension[outl-inl+4])) output_filename[outl--]=0; - if (outl != inl-5) output_filename = NULL; + if (outl != inl-5) { DISPLAY("Cannot automatically decide an output filename\n"); badusage(); } } - if (!output_filename) { badusage(exename); return 1; } } + if ((decode ? input_filename==stdinmark : output_filename==stdoutmark) + && !overwrite +#ifdef _WIN32 + && _isatty (_fileno ((decode ? stdin : stdout)))) +#else + && isatty ( fileno ((decode ? stdin : stdout)))) +#endif + badusage(); + + if ((input_filename == stdinmark) && (output_filename == stdoutmark)) silence=1; + + if (verbose) silence=0; + if (decode) return decodeFile(input_filename, output_filename); // compression is default action if (legacy_format) { - DISPLAY("! Generating compressed LZ4 using Legacy format (deprecated !) ! \n"); + if (!silence) DISPLAY("! Generating compressed LZ4 using Legacy format (deprecated !) ! \n"); return legacy_compress_file(input_filename, output_filename, cLevel); } else { - return compress_file(input_filename, output_filename, cLevel); + return compress_file(input_filename, output_filename, cLevel); } } diff --git a/lz4hc.c b/lz4hc.c index 729bfd3..8f2b422 100644 --- a/lz4hc.c +++ b/lz4hc.c @@ -99,13 +99,13 @@ Note : this source file requires "lz4hc_encoder.h" # define restrict // Disable restrict #endif -#ifdef _MSC_VER -# define forceinline __forceinline -# include // For Visual 2005 -# if LZ4_ARCH64 // 64-bits +#ifdef _MSC_VER // Visual Studio +# define forceinline static __forceinline +# include // For Visual 2005 +# if LZ4_ARCH64 // 64-bits # pragma intrinsic(_BitScanForward64) // For Visual 2005 # pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else // 32-bits +# else // 32-bits # pragma intrinsic(_BitScanForward) // For Visual 2005 # pragma intrinsic(_BitScanReverse) // For Visual 2005 # endif @@ -113,16 +113,16 @@ Note : this source file requires "lz4hc_encoder.h" # pragma warning(disable : 4701) // disable: C4701: potentially uninitialized local variable used #else # ifdef __GNUC__ -# define forceinline inline __attribute__((always_inline)) +# define forceinline static inline __attribute__((always_inline)) # else -# define forceinline inline +# define forceinline static inline # endif #endif #ifdef _MSC_VER // Visual Studio -#define lz4_bswap16(x) _byteswap_ushort(x) +# define lz4_bswap16(x) _byteswap_ushort(x) #else -#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) +# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) #endif @@ -273,7 +273,7 @@ typedef struct //************************************** #if LZ4_ARCH64 -inline static int LZ4_NbCommonBytes (register U64 val) +forceinline int LZ4_NbCommonBytes (register U64 val) { #if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -305,7 +305,7 @@ inline static int LZ4_NbCommonBytes (register U64 val) #else -inline static int LZ4_NbCommonBytes (register U32 val) +forceinline int LZ4_NbCommonBytes (register U32 val) { #if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -337,7 +337,7 @@ inline static int LZ4_NbCommonBytes (register U32 val) #endif -static inline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base) +forceinline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base) { MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); @@ -365,7 +365,7 @@ int LZ4_freeHC (void* LZ4HC_Data) // Update chains up to ip (excluded) -static forceinline void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) +forceinline void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) { U16* chainTable = hc4->chainTable; HTYPE* HashTable = hc4->hashTable; @@ -403,7 +403,7 @@ char* LZ4_slideInputBufferHC(void* LZ4HC_Data) } -static forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) +forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) { const BYTE* p1t = p1; @@ -421,7 +421,7 @@ static forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, co } -static forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos) +forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos) { U16* const chainTable = hc4->chainTable; HTYPE* const HashTable = hc4->hashTable; @@ -489,7 +489,7 @@ static forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, } -static forceinline int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos) +forceinline int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos) { U16* const chainTable = hc4->chainTable; HTYPE* const HashTable = hc4->hashTable; diff --git a/lz4hc_encoder.h b/lz4hc_encoder.h index b59bef3..49a0f23 100644 --- a/lz4hc_encoder.h +++ b/lz4hc_encoder.h @@ -62,7 +62,7 @@ // Function code //**************************** -forceinline static int ENCODE_SEQUENCE_NAME ( +forceinline int ENCODE_SEQUENCE_NAME ( const BYTE** ip, BYTE** op, const BYTE** anchor, diff --git a/xxhash.c b/xxhash.c index 6dacdcb..914421f 100644 --- a/xxhash.c +++ b/xxhash.c @@ -31,7 +31,6 @@ You can contact the author at : */ - //************************************** // Tuning parameters //************************************** @@ -44,8 +43,8 @@ You can contact the author at : #endif // XXH_ACCEPT_NULL_INPUT_POINTER : -// If the input pointer is a null pointer, xxHash default behavior is to crash, since it is a bad input. -// If this option is enabled, xxHash output for null input pointers will be the same as a null-length input. +// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. +// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. // This option has a very small performance cost (only measurable on small inputs). // By default, this option is disabled. To enable it, uncomment below define : //#define XXH_ACCEPT_NULL_INPUT_POINTER 1 @@ -54,17 +53,28 @@ You can contact the author at : // By default, xxHash library provides endian-independant Hash values, based on little-endian convention. // Results are therefore identical for little-endian and big-endian CPU. // This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. -// Should endian-independance be of no importance for your application, you may uncomment the #define below. +// Should endian-independance be of no importance for your application, you may set the #define below to 1. // It will improve speed for Big-endian CPU. // This option has no impact on Little_Endian CPU. -//#define XXH_FORCE_NATIVE_FORMAT 1 +#define XXH_FORCE_NATIVE_FORMAT 0 //************************************** -// Compiler Options +// Compiler Specific Options //************************************** -#if defined(_MSC_VER) && !defined(__cplusplus) // Visual Studio -# define inline __inline // Visual C is not C99, but supports some kind of inline +// Disable some Visual warning messages +#ifdef _MSC_VER // Visual Studio +# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +#endif + +#ifdef _MSC_VER // Visual Studio +# define forceinline static __forceinline +#else +# ifdef __GNUC__ +# define forceinline static inline __attribute__((always_inline)) +# else +# define forceinline static inline +# endif #endif @@ -75,39 +85,11 @@ You can contact the author at : // Modify the local functions below should you wish to use some other memory related routines // for malloc(), free() #include -static inline void* XXH_malloc(size_t s) { return malloc(s); } -static inline void XXH_free (void* p) { free(p); } +forceinline void* XXH_malloc(size_t s) { return malloc(s); } +forceinline void XXH_free (void* p) { free(p); } // for memcpy() #include -static inline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - - -//************************************** -// CPU Feature Detection -//************************************** -// Little Endian or Big Endian ? -// You can overwrite the #define below if you know your architecture endianess -#if defined(XXH_FORCE_NATIVE_FORMAT) && (XXH_FORCE_NATIVE_FORMAT==1) -// Force native format. The result will be endian dependant. -# define XXH_BIG_ENDIAN 0 -#elif defined (__GLIBC__) -# include -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define XXH_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define XXH_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define XXH_BIG_ENDIAN 1 -#endif - -#if !defined(XXH_BIG_ENDIAN) -// Little Endian assumed. PDP Endian and other very rare endian format are unsupported. -# define XXH_BIG_ENDIAN 0 -#endif +forceinline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } //************************************** @@ -135,7 +117,11 @@ static inline void* XXH_memcpy(void* dest, const void* src, size_t size) { retur #endif #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(push, 1) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif #endif typedef struct _U32_S { U32 v; } _PACKED U32_S; @@ -183,89 +169,53 @@ static inline U32 XXH_swap32 (U32 x) { //************************************** -// Macros +// Architecture Macros //************************************** -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations -#define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(A32(p)) : A32(p)) -#define XXH_alignedLE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(U32*)(p)) : *(U32*)(p)) +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; +#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch + static const int one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) +#endif +//************************************** +// Macros +//************************************** +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations + //**************************** -// Simple Hash Functions +// Memory reads //**************************** +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -#if !defined(XXH_USE_UNALIGNED_ACCESS) -// Specific version, for aligned 32-bits input. Useless for CPU supporting unaligned access. -static U32 XXH32_alignedInput(const void* input, int len, U32 seed) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U32 h32; - - if (len>=16) - { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - do - { - v1 += XXH_alignedLE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_alignedLE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_alignedLE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_alignedLE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } - else { h32 = seed + PRIME32_5; } - h32 += (U32) len; - while (p<=bEnd-4) - { - h32 += XXH_alignedLE32(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - return h32; +forceinline U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + else + return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); } -#endif -U32 XXH32(const void* input, int len, U32 seed) -{ -#if 0 - // Simple version, good for code maintenance, but unfortunately slow for small inputs - void* state = XXH32_init(seed); - XXH32_update(state, input, len); - return XXH32_digest(state); -#else +forceinline U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } + +//**************************** +// Simple Hash Functions +//**************************** +forceinline U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; U32 h32; #ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; p=(const BYTE*)16; } -#endif - -#if !defined(XXH_USE_UNALIGNED_ACCESS) - if ((((U32)p) & 3) == 0) return XXH32_alignedInput(input, len, seed); // Input is aligned, let's leverage the speed advantage + if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } #endif if (len>=16) { - const BYTE* const limit = bEnd - 16; + const BYTE* const limit = bEnd - 32; U32 v1 = seed + PRIME32_1 + PRIME32_2; U32 v2 = seed + PRIME32_2; U32 v3 = seed + 0; @@ -273,10 +223,10 @@ U32 XXH32(const void* input, int len, U32 seed) do { - v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; } while (p<=limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); @@ -290,8 +240,8 @@ U32 XXH32(const void* input, int len, U32 seed) while (p<=bEnd-4) { - h32 += XXH_LE32(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; p+=4; } @@ -309,7 +259,33 @@ U32 XXH32(const void* input, int len, U32 seed) h32 ^= h32 >> 16; return h32; +} + +U32 XXH32(const void* input, int len, U32 seed) +{ +#if 0 + // Simple version, good for code maintenance, but unfortunately slow for small inputs + void* state = XXH32_init(seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } @@ -360,7 +336,7 @@ void* XXH32_init (U32 seed) } -XXH_errorcode XXH32_update (void* state_in, const void* input, int len) +forceinline XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) { struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; const BYTE* p = (const BYTE*)input; @@ -384,10 +360,10 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len) XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); { const U32* p32 = (const U32*)state->memory; - state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_LE32(p32) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; - state->v3 += XXH_LE32(p32) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; - state->v4 += XXH_LE32(p32) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; } p += 16-state->memsize; state->memsize = 0; @@ -403,10 +379,10 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len) do { - v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; } while (p<=limit); state->v1 = v1; @@ -424,11 +400,22 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len) return XXH_OK; } +XXH_errorcode XXH32_update (void* state_in, const void* input, int len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} -U32 XXH32_intermediateDigest (void* state_in) + + +forceinline U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) { struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - BYTE * p = (BYTE*)state->memory; + const BYTE * p = (const BYTE*)state->memory; BYTE* bEnd = (BYTE*)state->memory + state->memsize; U32 h32; @@ -445,8 +432,8 @@ U32 XXH32_intermediateDigest (void* state_in) while (p<=bEnd-4) { - h32 += XXH_LE32(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; + h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; p+=4; } @@ -467,6 +454,17 @@ U32 XXH32_intermediateDigest (void* state_in) } +U32 XXH32_intermediateDigest (void* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + else + return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); +} + + U32 XXH32_digest (void* state_in) { U32 h32 = XXH32_intermediateDigest(state_in); -- cgit v0.12