summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>2012-03-09 21:46:59 (GMT)
committeryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>2012-03-09 21:46:59 (GMT)
commitad59ba1cfad62af37c44ded985fe1e2a0dffae05 (patch)
treef9001c58f8af4b5adcb53155443faa42e68a71fa
parent89767cc28059fff5e782c4b8ddd5b0b03ddedb90 (diff)
downloadlz4-ad59ba1cfad62af37c44ded985fe1e2a0dffae05.zip
lz4-ad59ba1cfad62af37c44ded985fe1e2a0dffae05.tar.gz
lz4-ad59ba1cfad62af37c44ded985fe1e2a0dffae05.tar.bz2
minor code refactoring, mostly around __builtin_expect
git-svn-id: https://lz4.googlecode.com/svn/trunk@59 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
-rw-r--r--Makefile16
-rw-r--r--bench.c49
-rw-r--r--bench.h3
-rw-r--r--lz4.c43
-rw-r--r--lz4demo.c7
5 files changed, 80 insertions, 38 deletions
diff --git a/Makefile b/Makefile
index 076fd55..76f3163 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,20 @@
+OS := $(shell uname)
+
+ifeq ($(OS),Linux)
+ OUTPUT32 = lz4demo32
+ OUTPUT64 = lz4demo64
+else
+ OUTPUT32 = LZ4Demo32.exe
+ OUTPUT64 = LZ4Demo64.exe
+endif
+
all: lz4demo64 lz4demo32
lz4demo64: lz4.c lz4.h bench.c lz4demo.c
- gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo64.exe
+ gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT64)
lz4demo32: lz4.c lz4.h bench.c lz4demo.c
- gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o lz4demo32.exe
+ gcc -m32 -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4.c bench.c lz4demo.c -o $(OUTPUT32)
clean:
- rm -f core *.o lz4demo32.exe lz4demo64.exe
+ rm -f core *.o $(OUTPUT32) $(OUTPUT64)
diff --git a/bench.c b/bench.c
index 6fe0d94..5139e8a 100644
--- a/bench.c
+++ b/bench.c
@@ -47,7 +47,10 @@
#include <sys/timeb.h> // timeb
#include <sys/types.h> // stat64
#include <sys/stat.h> // stat64
+
#include "lz4.h"
+#define DEFAULTCOMPRESSOR LZ4_compress
+
//**************************************
@@ -107,16 +110,24 @@ struct compressionParameters
//**************************************
-// Private Parameters
+// Benchmark Parameters
//**************************************
static int chunkSize = DEFAULT_CHUNKSIZE;
+static int nbIterations = NBLOOPS;
void BMK_SetBlocksize(int bsize)
{
chunkSize = bsize;
- DISPLAY("Using Block Size of %i KB... ", chunkSize>>10);
+ DISPLAY("-Using Block Size of %i KB-", chunkSize>>10);
+}
+
+void BMK_SetNbIterations(int nbLoops)
+{
+ nbIterations = nbLoops;
+ DISPLAY("- %i iterations-", nbIterations);
}
+
//*********************************************************
// Private functions
//*********************************************************
@@ -231,7 +242,7 @@ static U64 BMK_GetFileSize(char* infilename)
// Public function
//*********************************************************
-int BMK_benchFile(char** fileNamesTable, int nbFiles)
+int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel)
{
int fileIdx=0;
FILE* fileIn;
@@ -244,7 +255,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
char* in_buff;
char* out_buff; int out_buff_size;
struct chunkParameters* chunkP;
- U32 crcc, crcd;
+ U32 crcc, crcd=0;
struct compressionParameters compP;
U64 totals = 0;
@@ -254,7 +265,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
// Init
- compP.compressionFunction = LZ4_compress;
+ switch (cLevel)
+ {
+#ifdef COMPRESSOR0
+ case 0 : compP.compressionFunction = COMPRESSOR0; break;
+#endif
+ default : compP.compressionFunction = DEFAULTCOMPRESSOR;
+ }
compP.decompressionFunction = LZ4_uncompress;
// Loop for each file
@@ -313,7 +330,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
}
// Fill input buffer
- DISPLAY("Loading %s... \r", infilename);
+ DISPLAY("Loading %s... \r", infilename);
readSize = fread(in_buff, 1, benchedsize, fileIn);
fclose(fileIn);
@@ -332,11 +349,13 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
// Bench
{
int loopNb, nb_loops, chunkNb;
- size_t cSize;
+ size_t cSize=0;
int milliTime;
double fastestC = 100000000., fastestD = 100000000.;
+ double ratio=0.;
- for (loopNb = 1; loopNb <= NBLOOPS; loopNb++)
+ DISPLAY("\r%79s\r", "");
+ for (loopNb = 1; loopNb <= nbIterations; loopNb++)
{
// Compression
DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedsize);
@@ -356,8 +375,9 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
if ((double)milliTime < fastestC*nb_loops) fastestC = (double)milliTime/nb_loops;
cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].outputSize;
+ ratio = (double)cSize/(double)benchedsize*100.;
- DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000.);
+ DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000.);
// Decompression
{ size_t i; for (i=0; i<benchedsize; i++) in_buff[i]=0; } // zeroing area, for CRC checking
@@ -370,20 +390,25 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
{
for (chunkNb=0; chunkNb<nbChunks; chunkNb++)
chunkP[chunkNb].outputSize = compP.decompressionFunction(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].inputSize);
- //LZ4_uncompress_unknownOutputSize(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].outputSize, chunkP[chunkNb].inputSize); // For testing
nb_loops++;
}
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops;
- DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+ DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
// CRC Checking
crcd = BMK_checksum_MMH3A(in_buff, benchedsize);
if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }
}
- DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+ if (crcc==crcd)
+ {
+ if (ratio<100.)
+ DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+ else
+ DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%), %6.1f MB/s , %6.1f MB/s \n", infilename, (int)benchedsize, (int)cSize, ratio, (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+ }
totals += benchedsize;
totalz += cSize;
totalc += fastestC;
diff --git a/bench.h b/bench.h
index 547a1bc..f97eb51 100644
--- a/bench.h
+++ b/bench.h
@@ -27,10 +27,11 @@ extern "C" {
#endif
-int BMK_benchFile(char** fileNamesTable, int nbFiles) ;
+int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel);
// Parameters
void BMK_SetBlocksize(int bsize);
+void BMK_SetNbIterations(int nbLoops);
diff --git a/lz4.c b/lz4.c
index 28a892e..df62be6 100644
--- a/lz4.c
+++ b/lz4.c
@@ -98,6 +98,8 @@
#define restrict // Disable restrict
#endif
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
#ifdef _MSC_VER // Visual Studio
#define inline __forceinline // Visual is not C99, but supports some kind of inline
#include <intrin.h> // _BitScanForward
@@ -109,6 +111,15 @@
#define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
#endif
+#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+# define expect(expr,value) (__builtin_expect ((expr),(value)) )
+#else
+# define expect(expr,value) (expr)
+#endif
+
+#define likely(expr) expect((expr) != 0, 1)
+#define unlikely(expr) expect((expr) != 0, 0)
+
//**************************************
// Includes
@@ -210,14 +221,6 @@ typedef struct _U64_S { U64 v; } U64_S;
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
#endif
-#if __GNUC__ >= 3
-# define expect(expr,value) __builtin_expect ((expr),(value))
-#else
-# define expect(expr,value) (expr)
-#endif
-
-#define expect_true(expr) expect ((expr) != 0, 1)
-#define expect_false(expr) expect ((expr) != 0, 0)
//**************************************
// Local structures
@@ -249,7 +252,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
unsigned long r = 0;
_BitScanReverse64( &r, val );
return (int)(r>>3);
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_clzll(val) >> 3);
#else
int r;
@@ -263,7 +266,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
unsigned long r = 0;
_BitScanForward64( &r, val );
return (int)(r>>3);
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctzll(val) >> 3);
#else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
@@ -281,7 +284,7 @@ inline static int LZ4_NbCommonBytes (register U32 val)
unsigned long r = 0;
_BitScanReverse( &r, val );
return (int)(r>>3);
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_clz(val) >> 3);
#else
int r;
@@ -294,7 +297,7 @@ inline static int LZ4_NbCommonBytes (register U32 val)
unsigned long r = 0;
_BitScanForward( &r, val );
return (int)(r>>3);
- #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctz(val) >> 3);
#else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
@@ -381,7 +384,7 @@ int LZ4_compressCtx(void** ctx,
ip = forwardIp;
forwardIp = ip + step;
- if (expect_false(forwardIp > mflimit)) { goto _last_literals; }
+ if unlikely(forwardIp > mflimit) { goto _last_literals; }
forwardH = LZ4_HASH_VALUE(forwardIp);
ref = base + HashTable[h];
@@ -390,7 +393,7 @@ int LZ4_compressCtx(void** ctx,
} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
// Catch up
- while ((expect_false(ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
+ while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
// Encode Literal length
length = ip - anchor;
@@ -408,7 +411,7 @@ _next_match:
// Start Counting
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
anchor = ip;
- while (expect_true(ip<matchlimit-(STEPSIZE-1)))
+ while likely(ip<matchlimit-(STEPSIZE-1))
{
UARCH diff = AARCH(ref) ^ AARCH(ip);
if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
@@ -532,7 +535,7 @@ int LZ4_compress64kCtx(void** ctx,
} while (A32(ref) != A32(ip));
// Catch up
- while (((ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
+ while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
// Encode Literal length
length = ip - anchor;
@@ -576,7 +579,7 @@ _endCount:
// Test next position
ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
- if (expect_true(A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
+ if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
// Prepare next loop
anchor = ip++;
@@ -658,7 +661,7 @@ int LZ4_uncompress(const char* source,
// copy literals
cpy = op+length;
- if (expect_false(cpy>oend-COPYLENGTH))
+ if unlikely(cpy>oend-COPYLENGTH)
{
if (cpy > oend) goto _output_error;
memcpy(op, ip, length);
@@ -675,7 +678,7 @@ int LZ4_uncompress(const char* source,
if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
// copy repeated sequence
- if (expect_false(op-ref<STEPSIZE))
+ if unlikely(op-ref<STEPSIZE)
{
#if LZ4_ARCH64
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
@@ -764,7 +767,7 @@ int LZ4_uncompress_unknownOutputSize(
if ((length=(token&ML_MASK)) == ML_MASK) { while (ip<iend) { int s = *ip++; length +=s; if (s==255) continue; break; } }
// copy repeated sequence
- if (expect_false(op-ref<STEPSIZE))
+ if unlikely(op-ref<STEPSIZE)
{
#if LZ4_ARCH64
size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
diff --git a/lz4demo.c b/lz4demo.c
index 3905cfe..0d7b020 100644
--- a/lz4demo.c
+++ b/lz4demo.c
@@ -23,8 +23,8 @@
/*
Note : this is *only* a demo program, an example to show how LZ4 can be used.
It is not considered part of LZ4 compression library.
- The license of the demo program is GPL.
The license of LZ4 is BSD.
+ The license of the demo program is GPL.
*/
//****************************
@@ -340,6 +340,9 @@ int main(int argc, char** argv)
// Modify Block Size (benchmark only)
if ( argument[0] =='B' ) { int B = argument[1] - '0'; int S = 1 << (10 + 2*B); BMK_SetBlocksize(S); continue; }
+ // Modify Nb Iterations (benchmark only)
+ if ( argument[0] =='i' ) { int iters = argument[1] - '0'; BMK_SetNbIterations(iters); continue; }
+
// Test
if ( argument[0] =='t' ) { decode=1; output_filename=nulmark; continue; }
}
@@ -359,7 +362,7 @@ int main(int argc, char** argv)
// No input filename ==> Error
if(!input_filename) { badusage(); return 1; }
- if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart);
+ if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart, 0);
// No output filename
if (!output_filename) { badusage(); return 1; }