diff options
author | Yann Collet <yann.collet.73@gmail.com> | 2014-11-30 12:54:04 (GMT) |
---|---|---|
committer | Yann Collet <yann.collet.73@gmail.com> | 2014-11-30 12:54:04 (GMT) |
commit | dee47d11be17aec91a16a6381ea321b4d79f4200 (patch) | |
tree | e478b0dde6dc6f5d301078b6051c0c87e05a4046 | |
parent | 73554386ed8c9b08940411c0e62bbdca3216f8cd (diff) | |
parent | d0a6cccf75e147301b430a43bc9e591fdd80fce9 (diff) | |
download | lz4-dee47d11be17aec91a16a6381ea321b4d79f4200.zip lz4-dee47d11be17aec91a16a6381ea321b4d79f4200.tar.gz lz4-dee47d11be17aec91a16a6381ea321b4d79f4200.tar.bz2 |
Merge pull request #36 from Cyan4973/AlignEndian
Align endian
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | NEWS | 10 | ||||
-rw-r--r-- | examples/Makefile | 2 | ||||
-rw-r--r-- | lz4.c | 571 | ||||
-rw-r--r-- | lz4.h | 41 | ||||
-rw-r--r-- | lz4hc.c | 321 | ||||
-rw-r--r-- | lz4hc.h | 6 | ||||
-rw-r--r-- | programs/Makefile | 16 | ||||
-rw-r--r-- | programs/frametest.c | 2 | ||||
-rw-r--r-- | programs/fullbench.c | 20 | ||||
-rw-r--r-- | programs/lz4cli.c | 17 | ||||
-rw-r--r-- | programs/lz4io.c | 2 | ||||
-rw-r--r-- | xxhash.c | 72 |
13 files changed, 454 insertions, 635 deletions
@@ -31,7 +31,7 @@ # ################################################################ # Version numbers -VERSION=124 +VERSION=125 export RELEASE=r$(VERSION) LIBVER_MAJOR=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h` LIBVER_MINOR=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h` @@ -50,13 +50,6 @@ PRGDIR = programs DISTRIBNAME=lz4-$(RELEASE).tar.gz -# Define *.exe as extension for Windows systems -ifneq (,$(filter Windows%,$(OS))) -EXT =.exe -else -EXT = -endif - # OS X linker doesn't support -soname, and use different extension # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html ifeq ($(shell uname), Darwin) @@ -1,6 +1,12 @@ +r125: +Changed : endian and alignment code +Fixed : some alignment warnings under clang + r124: -Fix : LZ4F_compressBound() using NULL preferencesPtr -Updated : xxHash, to r37 +New : LZ4 HC streaming mode +Fixed : LZ4F_compressBound() using null preferencesPtr +Updated : xxHash to r38 +Updated library number, to 1.4.0 r123: Added : experimental lz4frame API, thanks to Takayuki Matsuoka and Christopher Jackson for testings diff --git a/examples/Makefile b/examples/Makefile index 4474f59..df24ea9 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -32,7 +32,7 @@ CC := $(CC) CFLAGS ?= -O3 -CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wstrict-prototypes -Wno-missing-braces # Wno-missing-braces required due to GCC <4.8.3 bug +CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -Wno-missing-braces # Wno-missing-braces required due to GCC <4.8.3 bug FLAGS = -I.. $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) TESTFILE= Makefile @@ -31,82 +31,73 @@ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ + /************************************** Tuning parameters **************************************/ /* * HEAPMODE : * Select how default compression functions will allocate memory for their hash table, - * in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)). + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). */ #define HEAPMODE 0 +/* + * CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS : + * By default, the source code expects the compiler to correctly optimize + * 4-bytes and 8-bytes read on architectures able to handle it efficiently. + * This is not always the case. In some circumstances (ARM notably), + * the compiler will issue cautious code even when target is able to correctly handle unaligned memory accesses. + * + * You can force the compiler to use unaligned memory access by uncommenting the line below. + * One of the below scenarios will happen : + * 1 - Your target CPU correctly handle unaligned access, and was not well optimized by compiler (good case). + * You will witness large performance improvements (+50% and up). + * Keep the line uncommented and send a word to upstream (https://groups.google.com/forum/#!forum/lz4c) + * The goal is to automatically detect such situations by adding your target CPU within an exception list. + * 2 - Your target CPU correctly handle unaligned access, and was already already optimized by compiler + * No change will be experienced. + * 3 - Your target CPU inefficiently handle unaligned access. + * You will experience a performance loss. Comment back the line. + * 4 - Your target CPU does not handle unaligned access. + * Program will crash. + * If uncommenting results in better performance (case 1) + * please report your configuration to upstream (https://groups.google.com/forum/#!forum/lz4c) + * An automatic detection macro will be added to match your case within future versions of the library. + */ +/* #define CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS 1 */ + /************************************** CPU Feature Detection **************************************/ -/* 32 or 64 bits ? */ -#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ - || defined(__64BIT__) || defined(__mips64) \ - || defined(__powerpc64__) || defined(__powerpc64le__) \ - || defined(__ppc64__) || defined(__ppc64le__) \ - || defined(__PPC64__) || defined(__PPC64LE__) \ - || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) \ - || defined(__s390x__) ) /* Detects 64 bits mode */ -# define LZ4_ARCH64 1 -#else -# define LZ4_ARCH64 0 -#endif -#define LZ4_32BITS (sizeof(void*)==4) -#define LZ4_64BITS (sizeof(void*)==8) - /* - * Little Endian or Big Endian ? - * Overwrite the #define below if you know your architecture endianess + * Automated efficient unaligned memory access detection + * Based on known hardware architectures + * This list will be updated thanks to feedbacks */ -#include <stdlib.h> /* Apparently required to detect endianess */ -#if defined (__GLIBC__) -# include <endian.h> -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 +#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ + || defined(__ARM_FEATURE_UNALIGNED) \ + || defined(__i386__) || defined(__x86_64__) \ + || defined(_M_IX86) || defined(_M_X64) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ + || (defined(_M_ARM) && (_M_ARM >= 7)) +# define LZ4_UNALIGNED_ACCESS 1 #else -/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ +# define LZ4_UNALIGNED_ACCESS 0 #endif /* - * Unaligned memory access is automatically enabled for "common" CPU, such as x86. - * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property - * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count */ -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -/* Define this parameter if your target system or compiler does not support hardware bit count */ #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ # define LZ4_FORCE_SW_BITCOUNT #endif -/* - * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : - * This option may provide a small boost to performance for some big endian cpu, although probably modest. - * You may set this option to 1 if data will remain within closed environment. - * This option is useless on Little_Endian CPU (such as x86) - */ - -/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ - /************************************** - Compiler Options + Compiler Options **************************************/ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ /* "restrict" is a known keyword */ @@ -116,14 +107,7 @@ #ifdef _MSC_VER /* Visual Studio */ # define FORCE_INLINE static __forceinline -# include <intrin.h> /* For Visual 2005 */ -# if LZ4_ARCH64 /* 64-bits */ -# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ -# else /* 32-bits */ -# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ -# endif +# include <intrin.h> # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #else # ifdef __GNUC__ @@ -133,12 +117,6 @@ # endif #endif -#ifdef _MSC_VER /* Visual Studio */ -# define lz4_bswap16(x) _byteswap_ushort(x) -#else -# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) -#endif - #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) @@ -185,46 +163,130 @@ typedef unsigned long long U64; #endif -#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif +/************************************** + Reading and writing into memory +**************************************/ +#define STEPSIZE sizeof(size_t) -typedef struct { U16 v; } _PACKED U16_S; -typedef struct { U32 v; } _PACKED U32_S; -typedef struct { U64 v; } _PACKED U64_S; -typedef struct {size_t v;} _PACKED size_t_S; +static unsigned LZ4_64bits(void) { return sizeof(void*)==8; } + +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) + return *(U16*)memPtr; + else + { + const BYTE* p = memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) + { + *(U16*)memPtr = value; + return; + } + else + { + BYTE* p = memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# if defined(__SUNPRO_C) || defined(__SUNPRO_CC) -# pragma pack(0) -# else -# pragma pack(pop) -# endif -#endif -#define A16(x) (((U16_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A64(x) (((U64_S *)(x))->v) -#define AARCH(x) (((size_t_S *)(x))->v) +static U16 LZ4_read16(const void* memPtr) +{ + if (LZ4_UNALIGNED_ACCESS) + return *(U16*)memPtr; + else + { + U16 val16; + memcpy(&val16, memPtr, 2); + return val16; + } +} + +static U32 LZ4_read32(const void* memPtr) +{ + if (LZ4_UNALIGNED_ACCESS) + return *(U32*)memPtr; + else + { + U32 val32; + memcpy(&val32, memPtr, 4); + return val32; + } +} + +static U64 LZ4_read64(const void* memPtr) +{ + if (LZ4_UNALIGNED_ACCESS) + return *(U64*)memPtr; + else + { + U64 val64; + memcpy(&val64, memPtr, 8); + return val64; + } +} + +static size_t LZ4_read_ARCH(const void* p) +{ + if (LZ4_64bits()) + return (size_t)LZ4_read64(p); + else + return (size_t)LZ4_read32(p); +} + + +static void LZ4_copy4(void* dstPtr, const void* srcPtr) +{ + if (LZ4_UNALIGNED_ACCESS) + { + *(U32*)dstPtr = *(U32*)srcPtr; + return; + } + memcpy(dstPtr, srcPtr, 4); +} + +static void LZ4_copy8(void* dstPtr, const void* srcPtr) +{ + if (LZ4_UNALIGNED_ACCESS) + { + if (LZ4_64bits()) + *(U64*)dstPtr = *(U64*)srcPtr; + else + ((U32*)dstPtr)[0] = ((U32*)srcPtr)[0], + ((U32*)dstPtr)[1] = ((U32*)srcPtr)[1]; + return; + } + memcpy(dstPtr, srcPtr, 8); +} + +/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */ +static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = dstPtr; + const BYTE* s = srcPtr; + BYTE* e = dstEnd; + do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e); +} /************************************** - Constants + Common Constants **************************************/ -#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) -#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) -#define HASH_SIZE_U32 (1 << LZ4_HASHLOG) - #define MINMATCH 4 #define COPYLENGTH 8 @@ -232,13 +294,10 @@ typedef struct {size_t v;} _PACKED size_t_S; #define MFLIMIT (COPYLENGTH+MINMATCH) static const int LZ4_minLength = (MFLIMIT+1); -#define KB *(1U<<10) -#define MB *(1U<<20) +#define KB *(1 <<10) +#define MB *(1 <<20) #define GB *(1U<<30) -#define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1)) -#define SKIPSTRENGTH 6 /* Increasing this value will make the compression run slower on incompressible data */ - #define MAXD_LOG 16 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) @@ -249,130 +308,150 @@ static const int LZ4_minLength = (MFLIMIT+1); /************************************** - Structures and local types + Common Utils **************************************/ -typedef struct { - U32 hashTable[HASH_SIZE_U32]; - U32 currentOffset; - U32 initCheck; - const BYTE* dictionary; - const BYTE* bufferStart; - U32 dictSize; -} LZ4_stream_t_internal; +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ -typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive; -typedef enum { byPtr, byU32, byU16 } tableType_t; -typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; -typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; +/******************************** + Common functions +********************************/ +static unsigned LZ4_NbCommonBytes (register size_t val) +{ + if (LZ4_isLittleEndian()) + { + if (LZ4_64bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward( &r, (U32)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } + else /* Big Endian CPU */ + { + if (LZ4_64bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + if (!(val>>32)) { r=4; } else { r=0; val>>=32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } + } +} -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; +static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + while (likely(pIn<pInLimit-(STEPSIZE-1))) + { + size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; + + return (unsigned)(pIn - pStart); +} +#ifndef LZ4_COMMONDEFS_ONLY /************************************** - Architecture-specific macros + Local Common Constants **************************************/ -#define STEPSIZE sizeof(size_t) -#define LZ4_COPYSTEP(d,s) { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; } -#define LZ4_COPY8(d,s) { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); } - -#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) -# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } -# define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; } -#else /* Little Endian */ -# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } -# define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } -#endif +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define HASH_SIZE_U32 (1 << LZ4_HASHLOG) + +#define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1)) +#define SKIPSTRENGTH 6 /* Increasing this value will make the compression run slower on incompressible data */ + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) /************************************** - Macros + Local Utils **************************************/ -#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ -#if LZ4_ARCH64 || !defined(__GNUC__) -# define LZ4_WILDCOPY(d,s,e) { do { LZ4_COPY8(d,s) } while (d<e); } /* at the end, d>=e; */ -#else -# define LZ4_WILDCOPY(d,s,e) { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d<e); } -#endif +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -/**************************** - Private local functions -****************************/ -#if LZ4_ARCH64 +/************************************** + Local Structures and types +**************************************/ +typedef struct { + U32 hashTable[HASH_SIZE_U32]; + U32 currentOffset; + U32 initCheck; + const BYTE* dictionary; + const BYTE* bufferStart; + U32 dictSize; +} LZ4_stream_t_internal; -static int LZ4_NbCommonBytes (register U64 val) -{ -# if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); -# else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif -# else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif -# endif -} +typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive; +typedef enum { byPtr, byU32, byU16 } tableType_t; -#else +typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; -static int LZ4_NbCommonBytes (register U32 val) -{ -# if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); -# else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif -# else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif -# endif -} +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { full = 0, partial = 1 } earlyEnd_directive; -#endif /******************************** Compression functions ********************************/ -int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } -int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -static int LZ4_hashSequence(U32 sequence, tableType_t tableType) +static U32 LZ4_hashSequence(U32 sequence, tableType_t tableType) { if (tableType == byU16) return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); @@ -380,15 +459,15 @@ static int LZ4_hashSequence(U32 sequence, tableType_t tableType) return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); } -static int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); } +static U32 LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(LZ4_read32(p), tableType); } static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) { switch (tableType) { - case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; } + case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } } } @@ -411,32 +490,12 @@ static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t t return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); } -static unsigned LZ4_count(const BYTE* pIn, const BYTE* pRef, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - while (likely(pIn<pInLimit-(STEPSIZE-1))) - { - size_t diff = AARCH(pRef) ^ AARCH(pIn); - if (!diff) { pIn+=STEPSIZE; pRef+=STEPSIZE; continue; } - pIn += LZ4_NbCommonBytes(diff); - return (unsigned)(pIn - pStart); - } - if (LZ4_64BITS) if ((pIn<(pInLimit-3)) && (A32(pRef) == A32(pIn))) { pIn+=4; pRef+=4; } - if ((pIn<(pInLimit-1)) && (A16(pRef) == A16(pIn))) { pIn+=2; pRef+=2; } - if ((pIn<pInLimit) && (*pRef == *pIn)) pIn++; - - return (unsigned)(pIn - pStart); -} - - static int LZ4_compress_generic( void* ctx, const char* source, char* dest, int inputSize, int maxOutputSize, - limitedOutput_directive outputLimited, tableType_t tableType, dict_directive dict, @@ -526,7 +585,7 @@ static int LZ4_compress_generic( } while ( ((dictIssue==dictSmall) ? (ref < lowRefLimit) : 0) || ((tableType==byU16) ? 0 : (ref + MAX_DISTANCE < ip)) - || (A32(ref+refDelta) != A32(ip)) ); + || (LZ4_read32(ref+refDelta) != LZ4_read32(ip)) ); } /* Catch up */ @@ -548,12 +607,13 @@ static int LZ4_compress_generic( else *token = (BYTE)(litLength<<ML_BITS); /* Copy Literals */ - { BYTE* end = op+litLength; LZ4_WILDCOPY(op,anchor,end); op=end; } + LZ4_wildCopy(op, anchor, op+litLength); + op+=litLength; } _next_match: /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, (U16)(ip-ref)); + LZ4_writeLE16(op, (U16)(ip-ref)); op+=2; /* Encode MatchLength */ { @@ -619,7 +679,7 @@ _next_match: LZ4_putPosition(ip, ctx, tableType, base); if ( ((dictIssue==dictSmall) ? (ref>=lowRefLimit) : 1) && (ref+MAX_DISTANCE>=ip) - && (A32(ref+refDelta)==A32(ip)) ) + && (LZ4_read32(ref+refDelta)==LZ4_read32(ip)) ) { token=op++; *token=0; goto _next_match; } /* Prepare next loop */ @@ -646,16 +706,16 @@ _last_literals: int LZ4_compress(const char* source, char* dest, int inputSize) { #if (HEAPMODE) - void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U32, 4); /* Aligned on 4-bytes boundaries */ + void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 8); /* Aligned on 8-bytes boundaries */ #else - U32 ctx[LZ4_STREAMSIZE_U32] = {0}; /* Ensure data is aligned on 4-bytes boundaries */ + U64 ctx[LZ4_STREAMSIZE_U64] = {0}; /* Ensure data is aligned on 8-bytes boundaries */ #endif int result; if (inputSize < (int)LZ4_64KLIMIT) result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); #if (HEAPMODE) FREEMEM(ctx); @@ -666,16 +726,16 @@ int LZ4_compress(const char* source, char* dest, int inputSize) int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { #if (HEAPMODE) - void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U32, 4); /* Aligned on 4-bytes boundaries */ + void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 8); /* Aligned on 8-bytes boundaries */ #else - U32 ctx[LZ4_STREAMSIZE_U32] = {0}; /* Ensure data is aligned on 4-bytes boundaries */ + U64 ctx[LZ4_STREAMSIZE_U64] = {0}; /* Ensure data is aligned on 8-bytes boundaries */ #endif int result; if (inputSize < (int)LZ4_64KLIMIT) result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); #if (HEAPMODE) FREEMEM(ctx); @@ -700,7 +760,7 @@ void LZ4_resetStream (LZ4_stream_t* LZ4_stream) LZ4_stream_t* LZ4_createStream(void) { - LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(4, LZ4_STREAMSIZE_U32); + LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64); LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ LZ4_resetStream(lz4s); return lz4s; @@ -956,14 +1016,16 @@ FORCE_INLINE int LZ4_decompress_generic( op += length; break; /* Necessarily EOF, due to parsing restrictions */ } - LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; + LZ4_wildCopy(op, ip, cpy); + ip += length; op = cpy; /* get offset */ - LZ4_READ_LITTLEENDIAN_16(match,cpy,ip); ip+=2; + match = cpy - LZ4_readLE16(ip); ip+=2; if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */ /* get matchlength */ - if ((length=(token&ML_MASK)) == ML_MASK) + length = token & ML_MASK; + if (length == ML_MASK) { unsigned s; do @@ -1012,7 +1074,7 @@ FORCE_INLINE int LZ4_decompress_generic( /* copy repeated sequence */ cpy = op + length; - if (unlikely((op-match)<(int)STEPSIZE)) + if (unlikely((op-match)<8)) { const size_t dec64 = dec64table[op-match]; op[0] = match[0]; @@ -1020,17 +1082,23 @@ FORCE_INLINE int LZ4_decompress_generic( op[2] = match[2]; op[3] = match[3]; match += dec32table[op-match]; - A32(op+4) = A32(match); + LZ4_copy4(op+4, match); op += 8; match -= dec64; - } else { LZ4_COPY8(op,match); } + } else { LZ4_copy8(op, match); op+=8; match+=8; } if (unlikely(cpy>oend-12)) { - if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last 5 bytes must be literals */ - if (op<oend-COPYLENGTH) LZ4_WILDCOPY(op, match, (oend-COPYLENGTH)); - while(op<cpy) *op++=*match++; + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals */ + if (op < oend-8) + { + LZ4_wildCopy(op, match, oend-8); + match += (oend-8) - op; + op = oend-8; + } + while (op<cpy) *op++ = *match++; } - else LZ4_WILDCOPY(op, match, cpy); + else + LZ4_wildCopy(op, match, cpy); op=cpy; /* correction */ } @@ -1079,7 +1147,7 @@ typedef struct */ LZ4_streamDecode_t* LZ4_createStreamDecode(void) { - LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(sizeof(U32), LZ4_STREAMDECODESIZE_U32); + LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(sizeof(U64), LZ4_STREAMDECODESIZE_U64); return lz4s; } @@ -1241,7 +1309,7 @@ int LZ4_resetStreamState(void* state, const char* inputBuffer) void* LZ4_create (const char* inputBuffer) { - void* lz4ds = ALLOCATOR(4, LZ4_STREAMSIZE_U32); + void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64); LZ4_init ((LZ4_stream_t_internal*)lz4ds, (const BYTE*)inputBuffer); return lz4ds; } @@ -1267,7 +1335,7 @@ int LZ4_compress_withState (void* state, const char* source, char* dest, int inp if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); else - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); } int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize) @@ -1278,7 +1346,7 @@ int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); else - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue); } /* Obsolete streaming decompression functions */ @@ -1292,3 +1360,6 @@ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int origin { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); } + +#endif /* LZ4_COMMONDEFS_ONLY */ + @@ -48,7 +48,7 @@ extern "C" { **************************************/ #define LZ4_VERSION_MAJOR 1 /* for major interface/format changes */ #define LZ4_VERSION_MINOR 4 /* for minor interface/format changes */ -#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) int LZ4_versionNumber (void); @@ -169,17 +169,17 @@ int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedS /*********************************************** - Experimental Streaming Compression Functions + Streaming Compression Functions ***********************************************/ -#define LZ4_STREAMSIZE_U32 ((1 << (LZ4_MEMORY_USAGE-2)) + 8) -#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U32 * sizeof(unsigned int)) +#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) +#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(long long)) /* * LZ4_stream_t * information structure to track an LZ4 stream. * important : init this structure content before first use ! */ -typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t; +typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t; /* * LZ4_resetStream @@ -231,20 +231,17 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_streamPtr, char* safeBuffer, int dictSize); /************************************************ - Experimental Streaming Decompression Functions + Streaming Decompression Functions ************************************************/ -#define LZ4_STREAMDECODESIZE_U32 8 -#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int)) +#define LZ4_STREAMDECODESIZE_U64 4 +#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) +typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t; /* * LZ4_streamDecode_t * information structure to track an LZ4 stream. - * important : init this structure content using LZ4_setStreamDecode or memset() before first use ! - */ -typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t; - -/* - * If you prefer dynamic allocation methods, + * init this structure content using LZ4_setStreamDecode or memset() before first use ! + * If you prefer dynamic allocation methods : * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure * LZ4_freeStreamDecode releases its memory. */ @@ -254,9 +251,7 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); /* * LZ4_setStreamDecode * Use this function to instruct where to find the dictionary. - * This function can be used to specify a static dictionary, - * or to instruct where to find some previously decoded data saved into a different memory space. - * Setting a size of 0 is allowed (same effect as no dictionary, same effect as reset). + * Setting a size of 0 is allowed (same effect as reset). * Return : 1 if OK, 0 if error */ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); @@ -277,7 +272,7 @@ Advanced decoding functions : *_usingDict() : These decoding functions work the same as a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue() - They don't use nor update an LZ4_streamDecode_t structure. + They are stand-alone and don't use nor update an LZ4_streamDecode_t structure. */ int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize); int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize); @@ -294,18 +289,10 @@ They are only provided here for compatibility with older user programs. - LZ4_uncompress is the same as LZ4_decompress_fast - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe These function prototypes are now disabled; uncomment them if you really need them. -It is highly recommended to stop using these functions and migrated to newer ones */ +It is highly recommended to stop using these functions and migrate to newer ones */ /* int LZ4_uncompress (const char* source, char* dest, int outputSize); */ /* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */ -/* - * If you prefer dynamic allocation methods, - * LZ4_createStreamDecode() - * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. - * LZ4_free just frees it. - */ -/* void* LZ4_createStreamDecode(void); */ -/*int LZ4_free (void* LZ4_stream); yes, it's the same one as for compression */ /* Obsolete streaming functions; use new streaming interface whenever possible */ void* LZ4_create (const char* inputBuffer); @@ -34,223 +34,66 @@ You can contact the author at : /************************************** -Tuning Parameter + Tuning Parameter **************************************/ #define LZ4HC_DEFAULT_COMPRESSIONLEVEL 8 /************************************** -Memory routines + Includes **************************************/ -#include <stdlib.h> /* calloc, free */ -#define ALLOCATOR(s) calloc(1,s) -#define FREEMEM free -#include <string.h> /* memset, memcpy */ -#define MEM_INIT memset - - -/************************************** -CPU Feature Detection -**************************************/ -/* 32 or 64 bits ? */ -#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ - || defined(__64BIT__) || defined(__mips64) \ - || defined(__powerpc64__) || defined(__powerpc64le__) \ - || defined(__ppc64__) || defined(__ppc64le__) \ - || defined(__PPC64__) || defined(__PPC64LE__) \ - || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) \ - || defined(__s390x__) ) /* Detects 64 bits mode */ -# define LZ4_ARCH64 1 -#else -# define LZ4_ARCH64 0 -#endif - -/* -* Little Endian or Big Endian ? -* Overwrite the #define below if you know your architecture endianess -*/ -#include <stdlib.h> /* Apparently required to detect endianess */ -#if defined (__GLIBC__) -# include <endian.h> -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 -#else -/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ -#endif - -/* -* Unaligned memory access is automatically enabled for "common" CPU, such as x86. -* For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected -* If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance -*/ -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -/* Define this parameter if your target system or compiler does not support hardware bit count */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ -# define LZ4_FORCE_SW_BITCOUNT -#endif +#include "lz4.h" +#include "lz4hc.h" /************************************** -Compiler Options + Local Compiler Options **************************************/ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -/* "restrict" is a known keyword */ -#else -# define restrict /* Disable restrict */ +#if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" #endif -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include <intrin.h> /* For Visual 2005 */ -# if LZ4_ARCH64 /* 64-bits */ -# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ -# else /* 32-bits */ -# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ -# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ -# endif -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable used */ -#else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif +#if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" #endif -#ifdef _MSC_VER /* Visual Studio */ -# define lz4_bswap16(x) _byteswap_ushort(x) -#else -# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) +#if defined(_MSC_VER) /* Visual Studio */ +# pragma warning(disable : 4201) /* disable: C4201: unnamed struct/union*/ #endif /************************************** -Includes -**************************************/ -#include "lz4hc.h" -#include "lz4.h" - - -/************************************** -Basic Types + Common LZ4 definition **************************************/ -#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# include <stdint.h> -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -#else -typedef unsigned char BYTE; -typedef unsigned short U16; -typedef unsigned int U32; -typedef signed int S32; -typedef unsigned long long U64; -#endif - -#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif - -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# ifdef __IBMC__ -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif - -typedef struct _U16_S { U16 v; } _PACKED U16_S; -typedef struct _U32_S { U32 v; } _PACKED U32_S; -typedef struct _U64_S { U64 v; } _PACKED U64_S; - -#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(pop) -#endif - -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) +#define LZ4_COMMONDEFS_ONLY +#include "lz4.c" /************************************** -Constants + Local Constants **************************************/ -#define MINMATCH 4 - #define DICTIONARY_LOGSIZE 16 #define MAXD (1<<DICTIONARY_LOGSIZE) #define MAXD_MASK ((U32)(MAXD - 1)) -#define MAX_DISTANCE (MAXD - 1) #define HASH_LOG (DICTIONARY_LOGSIZE-1) #define HASHTABLESIZE (1 << HASH_LOG) #define HASH_MASK (HASHTABLESIZE - 1) -#define ML_BITS 4 -#define ML_MASK (size_t)((1U<<ML_BITS)-1) -#define RUN_BITS (8-ML_BITS) -#define RUN_MASK ((1U<<RUN_BITS)-1) - -#define COPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH+MINMATCH) -#define MINLENGTH (MFLIMIT+1) #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define KB *(1<<10) -#define MB *(1<<20) -#define GB *(1U<<30) - - -/************************************** -Architecture-specific macros -**************************************/ -#if LZ4_ARCH64 /* 64-bit */ -# define STEPSIZE 8 -# define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; -# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) -# define AARCH A64 -#else /* 32-bit */ -# define STEPSIZE 4 -# define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; -# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); -# define AARCH A32 -#endif - -#if defined(LZ4_BIG_ENDIAN) -# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } -# define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; } -#else /* Little Endian */ -# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } -# define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } -#endif - /************************************** - Local Types + Local Types **************************************/ typedef struct { - U32 hashTable[HASHTABLESIZE]; + union { + U64 alignedOn8Bytes; /* force 8-bytes alignment on 32-bits systems */ + U32 hashTable[HASHTABLESIZE]; + }; U16 chainTable[MAXD]; - const BYTE* end; /* next block here to keep current prefix as prefix */ + const BYTE* end; /* next block here to continue on current prefix */ const BYTE* base; /* All index relative to this position */ const BYTE* dictBase; /* alternate base for extDict */ U32 dictLimit; /* below that point, need extDict */ @@ -262,86 +105,19 @@ typedef struct /************************************** - Macros + Local Macros **************************************/ -#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(!!(c)) }; } /* Visual : use only *after* variable declarations */ -#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); -#define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; } #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) #define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] #define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) -static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(A32(ptr)); } - -/************************************** -Private functions -**************************************/ -#if LZ4_ARCH64 - -FORCE_INLINE int LZ4_NbCommonBytes (register U64 val) -{ -#if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); -# else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif -#else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; -# endif -#endif -} - -#else - -FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanReverse( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); -# else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif -#else -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, val ); - return (int)(r>>3); -# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif -#endif -} +static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } -#endif +/************************************** + HC Compression +**************************************/ static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* base) { MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); @@ -394,24 +170,6 @@ static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newB } -static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const p1Limit) -{ - const BYTE* const p1Start = p1; - - while (p1 <= p1Limit - STEPSIZE) - { - size_t diff = AARCH(p2) ^ AARCH(p1); - if (!diff) { p1+=STEPSIZE; p2+=STEPSIZE; continue; } - p1 += LZ4_NbCommonBytes(diff); - return (p1 - p1Start); - } - if (LZ4_ARCH64) if ((p1<(p1Limit-3)) && (A32(p2) == A32(p1))) { p1+=4; p2+=4; } - if ((p1<(p1Limit-1)) && (A16(p2) == A16(p1))) { p1+=2; p2+=2; } - if ((p1<p1Limit) && (*p2 == *p1)) p1++; - return (p1 - p1Start); -} - - FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, // Index table will be updated const BYTE* ip, const BYTE* const iLimit, const BYTE** matchpos, @@ -439,23 +197,23 @@ FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, // I { match = base + matchIndex; if (*(match+ml) == *(ip+ml) - && (A32(match) == A32(ip))) + && (LZ4_read32(match) == LZ4_read32(ip))) { - size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; if (mlt > ml) { ml = mlt; *matchpos = match; } } } else { match = dictBase + matchIndex; - if (A32(match) == A32(ip)) + if (LZ4_read32(match) == LZ4_read32(ip)) { size_t mlt; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iLimit) vLimit = iLimit; - mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iLimit)) - mlt += LZ4HC_CommonLength(ip+mlt, base+dictLimit, iLimit); + mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit); if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } // virtual matchpos } } @@ -499,11 +257,11 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( { match = base + matchIndex; if (*(iLowLimit + longest) == *(match - delta + longest)) - if (A32(match) == A32(ip)) + if (LZ4_read32(match) == LZ4_read32(ip)) { const BYTE* startt = ip; const BYTE* tmpMatch = match; - const BYTE* const matchEnd = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, iHighLimit); + const BYTE* const matchEnd = ip + MINMATCH + LZ4_count(ip+MINMATCH, match+MINMATCH, iHighLimit); while ((startt>iLowLimit) && (tmpMatch > iLowLimit) && (startt[-1] == tmpMatch[-1])) {startt--; tmpMatch--;} @@ -518,15 +276,15 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( else { match = dictBase + matchIndex; - if (A32(match) == A32(ip)) + if (LZ4_read32(match) == LZ4_read32(ip)) { size_t mlt; int back=0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) - mlt += LZ4HC_CommonLength(ip+mlt, base+dictLimit, iHighLimit); + mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--; mlt -= back; if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } @@ -565,10 +323,11 @@ FORCE_INLINE int LZ4HC_encodeSequence ( else *token = (BYTE)(length<<ML_BITS); /* Copy Literals */ - LZ4_BLINDCOPY(*anchor, *op, length); + LZ4_wildCopy(*op, *anchor, (*op) + length); + *op += length; /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-match)); + LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; /* Encode MatchLength */ length = (int)(matchLength-MINMATCH); @@ -955,7 +714,7 @@ int LZ4_resetStreamStateHC(void* state, const char* inputBuffer) void* LZ4_createHC (const char* inputBuffer) { - void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); + void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure)); LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer); return hc4; } @@ -105,9 +105,9 @@ They just use the externally allocated memory for state instead of allocating th /************************************** Experimental Streaming Functions **************************************/ -#define LZ4_STREAMHCSIZE_U32 65548 -#define LZ4_STREAMHCSIZE (LZ4_STREAMHCSIZE_U32 * sizeof(unsigned int)) -typedef struct { unsigned int table[LZ4_STREAMHCSIZE_U32]; } LZ4_streamHC_t; +#define LZ4_STREAMHCSIZE_U64 32774 +#define LZ4_STREAMHCSIZE (LZ4_STREAMHCSIZE_U64 * sizeof(unsigned long long)) +typedef struct { unsigned long long table[LZ4_STREAMHCSIZE_U64]; } LZ4_streamHC_t; /* This structure allows static allocation of LZ4 HC streaming state. diff --git a/programs/Makefile b/programs/Makefile index 1d7e17f..8a3ed95 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -30,7 +30,7 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -RELEASE=r124 +RELEASE=r125 DESTDIR?= PREFIX ?= /usr @@ -113,9 +113,9 @@ ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU)) install: lz4 lz4c @echo Installing binaries @install -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ - @install -m 755 lz4 $(DESTDIR)$(BINDIR)/lz4 - @ln -sf lz4 $(DESTDIR)$(BINDIR)/lz4cat - @install -m 755 lz4c $(DESTDIR)$(BINDIR)/lz4c + @install -m 755 lz4$(EXT) $(DESTDIR)$(BINDIR)/lz4$(EXT) + @ln -sf lz4$(EXT) $(DESTDIR)$(BINDIR)/lz4cat + @install -m 755 lz4c$(EXT) $(DESTDIR)$(BINDIR)/lz4c$(EXT) @echo Installing man pages @install -m 644 lz4.1 $(DESTDIR)$(MANDIR)/lz4.1 @install -m 644 lz4c.1 $(DESTDIR)$(MANDIR)/lz4c.1 @@ -124,12 +124,12 @@ install: lz4 lz4c uninstall: rm -f $(DESTDIR)$(BINDIR)/lz4cat - [ -x $(DESTDIR)$(BINDIR)/lz4 ] && rm -f $(DESTDIR)$(BINDIR)/lz4 - [ -x $(DESTDIR)$(BINDIR)/lz4c ] && rm -f $(DESTDIR)$(BINDIR)/lz4c + [ -x $(DESTDIR)$(BINDIR)/lz4$(EXT) ] && rm -f $(DESTDIR)$(BINDIR)/lz4$(EXT) + [ -x $(DESTDIR)$(BINDIR)/lz4c$(EXT) ] && rm -f $(DESTDIR)$(BINDIR)/lz4c$(EXT) [ -f $(DESTDIR)$(MANDIR)/lz4.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz4.1 [ -f $(DESTDIR)$(MANDIR)/lz4c.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz4c.1 [ -f $(DESTDIR)$(MANDIR)/lz4cat.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz4cat.1 - @echo lz4 successfully uninstalled + @echo lz4 programs successfully uninstalled test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-mem @@ -193,7 +193,7 @@ test-mem: lz4 datagen fuzzer frametest ./datagen -g16MB > tmp valgrind --leak-check=yes ./lz4 -9 -B5D -f tmp /dev/null ./datagen -g256MB > tmp - valgrind --leak-check=yes ./lz4 -B4D -f tmp /dev/null + valgrind --leak-check=yes ./lz4 -B4D -f -vq tmp /dev/null rm tmp valgrind --leak-check=yes ./fuzzer -i50 -t0 valgrind --leak-check=yes ./frametest -i100 diff --git a/programs/frametest.c b/programs/frametest.c index c7e6f45..2a0c603 100644 --- a/programs/frametest.c +++ b/programs/frametest.c @@ -663,7 +663,7 @@ int main(int argc, char** argv) if (nbTests<=0) nbTests=1; - //if (testNb==0) result = basicTests(seed, ((double)proba) / 100); + if (testNb==0) result = basicTests(seed, ((double)proba) / 100); if (result) return 1; return fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); } diff --git a/programs/fullbench.c b/programs/fullbench.c index cdf1d1d..647a458 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -454,7 +454,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) // Alloc chunkP = (struct chunkParameters*) malloc(((benchedSize / (size_t)chunkSize)+1) * sizeof(struct chunkParameters)); orig_buff = (char*) malloc((size_t)benchedSize); - nbChunks = (int) ((int)benchedSize / chunkSize) + 1; + nbChunks = (int) (((int)benchedSize + (chunkSize-1))/ chunkSize); maxCompressedChunkSize = LZ4_compressBound(chunkSize); compressedBuffSize = nbChunks * maxCompressedChunkSize; compressed_buff = (char*)malloc((size_t)compressedBuffSize); @@ -511,7 +511,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) size_t remaining = benchedSize; char* in = orig_buff; char* out = compressed_buff; - nbChunks = (int) ((int)benchedSize / chunkSize) + 1; + nbChunks = (int) (((int)benchedSize + (chunkSize-1))/ chunkSize); for (i=0; i<nbChunks; i++) { chunkP[i].id = i; @@ -593,6 +593,22 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) } // Prepare layout for decompression + // Init data chunks + { + int i; + size_t remaining = benchedSize; + char* in = orig_buff; + char* out = compressed_buff; + nbChunks = (int) (((int)benchedSize + (chunkSize-1))/ chunkSize); + for (i=0; i<nbChunks; i++) + { + chunkP[i].id = i; + chunkP[i].origBuffer = in; in += chunkSize; + if ((int)remaining > chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; } + chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize; + chunkP[i].compressedSize = 0; + } + } for (chunkNb=0; chunkNb<nbChunks; chunkNb++) { chunkP[chunkNb].compressedSize = LZ4_compress(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize); diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 2d612e7..6e52ec6 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -48,10 +48,6 @@ # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant #endif -#ifdef __clang__ -# pragma clang diagnostic ignored "-Wunused-const-variable" // const variable one is really used ! -#endif - #define _FILE_OFFSET_BITS 64 // Large file support on 32-bits unix #define _POSIX_SOURCE 1 // for fileno() within <stdio.h> on unix @@ -113,7 +109,7 @@ //**************************** #define COMPRESSOR_NAME "LZ4 Compression CLI" #ifndef LZ4_VERSION -# define LZ4_VERSION "r122" +# define LZ4_VERSION "r125" #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ4_VERSION, AUTHOR, __DATE__ @@ -128,15 +124,6 @@ //************************************** -// Architecture Macros -//************************************** -static const int one = 1; -#define CPU_LITTLE_ENDIAN (*(char*)(&one)) -#define CPU_BIG_ENDIAN (!CPU_LITTLE_ENDIAN) -#define LITTLE_ENDIAN_32(i) (CPU_LITTLE_ENDIAN?(i):swap32(i)) - - -//************************************** // Macros //************************************** #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) @@ -462,7 +449,7 @@ int main(int argc, char** argv) } DISPLAYLEVEL(3, WELCOME_MESSAGE); - DISPLAYLEVEL(4, "Blocks size : %i KB\n", blockSize>>10); + if (!decode) DISPLAYLEVEL(4, "Blocks size : %i KB\n", blockSize>>10); // No input filename ==> use stdin if(!input_filename) { input_filename=stdinmark; } diff --git a/programs/lz4io.c b/programs/lz4io.c index 3a84866..afaa59f 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -357,7 +357,7 @@ int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, i static void* LZ4IO_LZ4_createStream (const char* inputBuffer) { (void)inputBuffer; - return calloc(4, LZ4_STREAMSIZE_U32); + return calloc(8, LZ4_STREAMSIZE_U64); } static int LZ4IO_LZ4_compress_limitedOutput_continue (void* ctx, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel) @@ -84,11 +84,11 @@ You can contact the author at : // Modify the local functions below should you wish to use some other memory routines // for malloc(), free() #include <stdlib.h> -FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } -FORCE_INLINE void XXH_free (void* p) { free(p); } +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } // for memcpy() #include <string.h> -FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } @@ -221,28 +221,28 @@ static const int one = 1; //**************************** typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); else - return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); + return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr); } -FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } -FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); else - return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr); + return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr); } -FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } @@ -256,7 +256,7 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align) +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (p==NULL) @@ -361,7 +361,7 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER if (p==NULL) @@ -509,8 +509,8 @@ typedef struct U32 v2; U32 v3; U32 v4; + U32 mem32[4]; /* defined as U32 for alignment */ U32 memsize; - char memory[16]; } XXH_istate32_t; typedef struct @@ -521,8 +521,8 @@ typedef struct U64 v2; U64 v3; U64 v4; + U64 mem64[4]; /* defined as U64 for alignment */ U32 memsize; - char memory[32]; } XXH_istate64_t; @@ -592,16 +592,16 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v if (state->memsize + len < 16) // fill in tmp buffer { - XXH_memcpy(state->memory + state->memsize, input, len); + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) // some data left from previous update { - XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { - const U32* p32 = (const U32*)state->memory; + const U32* p32 = state->mem32; state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; @@ -633,19 +633,19 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v do { - v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v1 += XXH_readLE32(p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v2 += XXH_readLE32(p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v3 += XXH_readLE32(p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; + v4 += XXH_readLE32(p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; @@ -660,7 +660,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v if (p < bEnd) { - XXH_memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->mem32, p, bEnd-p); state->memsize = (int)(bEnd-p); } @@ -682,8 +682,8 @@ XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t l FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) { XXH_istate32_t* state = (XXH_istate32_t*) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; + const BYTE * p = (const BYTE*)state->mem32; + BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->total_len >= 16) @@ -699,7 +699,7 @@ FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endiane while (p+4<=bEnd) { - h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 += XXH_readLE32(p, endian) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4; p+=4; } @@ -746,16 +746,16 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v if (state->memsize + len < 32) // fill in tmp buffer { - XXH_memcpy(state->memory + state->memsize, input, len); + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) // some data left from previous update { - XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); { - const U64* p64 = (const U64*)state->memory; + const U64* p64 = state->mem64; state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; state->v1 = XXH_rotl64(state->v1, 31); state->v1 *= PRIME64_1; @@ -787,19 +787,19 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v do { - v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v1 += XXH_readLE64(p, endian) * PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; p+=8; - v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v2 += XXH_readLE64(p, endian) * PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; p+=8; - v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v3 += XXH_readLE64(p, endian) * PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; p+=8; - v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; + v4 += XXH_readLE64(p, endian) * PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; p+=8; @@ -814,7 +814,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v if (p < bEnd) { - XXH_memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->mem64, p, bEnd-p); state->memsize = (int)(bEnd-p); } @@ -836,8 +836,8 @@ XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t l FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) { XXH_istate64_t * state = (XXH_istate64_t *) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; + const BYTE * p = (const BYTE*)state->mem64; + BYTE* bEnd = (BYTE*)state->mem64 + state->memsize; U64 h64; if (state->total_len >= 32) @@ -882,7 +882,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endiane while (p+8<=bEnd) { - U64 k1 = XXH_readLE64((const U64*)p, endian); + U64 k1 = XXH_readLE64(p, endian); k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; @@ -893,7 +893,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endiane if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1; + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } |