summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>2012-02-01 04:19:38 (GMT)
committeryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>2012-02-01 04:19:38 (GMT)
commit2327aa404e50d23c46d539646e63a6b0682b2dfc (patch)
tree91ef54a425396343b82c7414b43180d086ce4b0f
parent572cab747fc214656bbf9bded1a0d93787d0c2f6 (diff)
downloadlz4-2327aa404e50d23c46d539646e63a6b0682b2dfc.zip
lz4-2327aa404e50d23c46d539646e63a6b0682b2dfc.tar.gz
lz4-2327aa404e50d23c46d539646e63a6b0682b2dfc.tar.bz2
Better detection of Big-Endian and PowerPC CPU
LZ4Demo : correction : produces compatible streams on both big-endian and little-endian systems git-svn-id: https://lz4.googlecode.com/svn/trunk@52 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
-rw-r--r--bench.c16
-rw-r--r--lz4.c70
-rw-r--r--lz4demo.c49
3 files changed, 88 insertions, 47 deletions
diff --git a/bench.c b/bench.c
index bd5dc1c..f99497e 100644
--- a/bench.c
+++ b/bench.c
@@ -29,7 +29,7 @@
//**************************************
// Includes
//**************************************
-#include <stdio.h> // printf, fopen, fseeko64, ftello64
+#include <stdio.h> // fprintf, fopen, ftello64
#include <stdlib.h> // malloc
#include <sys/timeb.h> // timeb
#include "lz4.h"
@@ -167,11 +167,11 @@ static U64 BMK_GetFileSize(FILE* f)
{
U64 r;
#ifdef _MSC_VER
- _fseeki64(f, 0L, SEEK_END);
+ r = _fseeki64(f, 0L, SEEK_END);
r = (U64) _ftelli64(f);
_fseeki64(f, 0L, SEEK_SET);
#else
- fseeko64(f, 0LL, SEEK_END);
+ r = (U64) fseeko64(f, 0LL, SEEK_END);
r = (U64) ftello64(f);
fseeko64(f, 0LL, SEEK_SET);
#endif
@@ -269,7 +269,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
if(readSize != benchedsize)
{
- printf("\nError: problem reading file '%s' !! \n", infilename);
+ DISPLAY("\nError: problem reading file '%s' !! \n", infilename);
free(in_buff);
free(out_buff);
return 13;
@@ -326,6 +326,10 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops;
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\r", loopNb, infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
+
+ // CRC Checking
+ crcd = BMK_checksum(in_buff, benchedsize);
+ if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; }
}
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", infilename, (int)benchedsize, (int)cSize, (double)cSize/(double)benchedsize*100., (double)benchedsize / fastestC / 1000., (double)benchedsize / fastestD / 1000.);
@@ -333,10 +337,6 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
totalz += cSize;
totalc += fastestC;
totald += fastestD;
-
- // CRC Checking
- crcd = BMK_checksum(in_buff, benchedsize);
- if (crcc!=crcd) printf("!!! WARNING !!! Invalid Checksum : %x != %x\n", (unsigned)crcc, (unsigned)crcd);
}
free(in_buff);
diff --git a/lz4.c b/lz4.c
index 8591110..4bc306c 100644
--- a/lz4.c
+++ b/lz4.c
@@ -28,16 +28,30 @@
*/
//**************************************
-// Compilation Directives
+// Tuning parameters
//**************************************
-#if __STDC_VERSION__ >= 199901L
+// Increasing this value improves compression ratio
+// Lowering this value reduces memory usage
+// Reduced memory usage typically improves speed, due to cache effect (ex : L1 32KB for Intel, L1 64KB for AMD)
+// Memory usage formula : N->2^(N+2) Bytes (examples : 12 -> 16KB ; 17 -> 512KB)
+#define COMPRESSIONLEVEL 12
+
+// Uncomment this parameter if your target system does not support hardware bit count
+//#define _FORCE_SW_BITCOUNT
+
+
+
+//**************************************
+// Compiler Options
+//**************************************
+#if __STDC_VERSION__ >= 199901L // C99
/* "restrict" is a known keyword */
#else
#define restrict // Disable restrict
#endif
#ifdef _MSC_VER
-#define inline __forceinline
+#define inline __forceinline // Visual is not C99, but supports inline
#endif
#ifdef __GNUC__
@@ -46,10 +60,10 @@
#define _PACKED
#endif
-#if (__x86_64__ || __ppc64__ || _WIN64 || __LP64__) // Detect 64 bits mode
-#define ARCH64 1
+#ifdef _MSC_VER // Visual Studio
+#define bswap16(i) _byteswap_ushort(i)
#else
-#define ARCH64 0
+#define bswap16(i) (((i)>>8) | ((i)<<8))
#endif
@@ -62,18 +76,6 @@
//**************************************
-// Performance parameter
-//**************************************
-// Increasing this value improves compression ratio
-// Lowering this value reduces memory usage
-// Lowering may also improve speed, typically on reaching cache size limits (L1 32KB for Intel, 64KB for AMD)
-// Memory usage formula for 32 bits systems : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)
-#define HASH_LOG 12
-
-//#define _FORCE_SW_BITCOUNT // Uncomment for better performance if target platform has no hardware support for LowBitCount
-
-
-//**************************************
// Basic Types
//**************************************
#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively
@@ -107,6 +109,7 @@
#define MAXD_LOG 16
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define HASH_LOG COMPRESSIONLEVEL
#define HASHTABLESIZE (1 << HASH_LOG)
#define HASH_MASK (HASHTABLESIZE - 1)
@@ -147,6 +150,19 @@ typedef struct _U16_S
//**************************************
// Architecture-specific macros
//**************************************
+#if (__x86_64__ || __x86_64 || __amd64__ || __amd64 || __ppc64__ || _WIN64 || __LP64__ || _LP64) // Detects 64 bits mode
+#define ARCH64 1
+#else
+#define ARCH64 0
+#endif
+
+// The following macro auto-detects Big-endian CPU. You can manually override it in case of bad detection.
+#if (__BIG_ENDIAN__ || _BIG_ENDIAN || _ARCH_PPC || __PPC__ || __PPC || PPC || __powerpc__ || __powerpc || powerpc || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) )
+#define CPU_BIG_ENDIAN 1
+#else
+// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
+#endif
+
#if ARCH64 // 64-bit
#define STEPSIZE 8
#define UARCH U64
@@ -167,14 +183,14 @@ typedef struct _U16_S
#define INITBASE(base) const int base = 0
#endif
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#if CPU_BIG_ENDIAN
+#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; }
+#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; }
+#define LZ4_NbCommonBytes LZ4_NbCommonBytes_BigEndian
+#else // Little Endian
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
#define LZ4_NbCommonBytes LZ4_NbCommonBytes_LittleEndian
-#else // Big Endian
-#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { int delta = p[0]; delta += p[1] << 8; d = (s) - delta; }
-#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { int delta = v; *p++ = delta; *p++ = delta>>8; }
-#define LZ4_NbCommonBytes LZ4_NbCommonBytes_BigEndian
#endif
@@ -616,7 +632,7 @@ int LZ4_uncompress(char* source,
if (op-ref<STEPSIZE)
{
#if ARCH64
- size_t dec2table[]={0, 4, 4, 3, 4, 5, 6, 7};
+ size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
size_t dec2 = dec2table[op-ref];
#else
const int dec2 = 0;
@@ -626,7 +642,7 @@ int LZ4_uncompress(char* source,
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
- A32(op)=A32(ref); op += STEPSIZE-4; ref += STEPSIZE-4;
+ A32(op)=A32(ref); op += STEPSIZE-4;
ref -= dec2;
} else { LZ4_COPYSTEP(ref,op); }
cpy = op + length - (STEPSIZE-4);
@@ -703,7 +719,7 @@ int LZ4_uncompress_unknownOutputSize(
if (op-ref<STEPSIZE)
{
#if ARCH64
- size_t dec2table[]={0, 4, 4, 3, 4, 5, 6, 7};
+ size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
size_t dec2 = dec2table[op-ref];
#else
const int dec2 = 0;
@@ -713,7 +729,7 @@ int LZ4_uncompress_unknownOutputSize(
*op++ = *ref++;
*op++ = *ref++;
ref -= dec[op-ref];
- A32(op)=A32(ref); op += STEPSIZE-4; ref += STEPSIZE-4;
+ A32(op)=A32(ref); op += STEPSIZE-4;
ref -= dec2;
} else { LZ4_COPYSTEP(ref,op); }
cpy = op + length - (STEPSIZE-4);
diff --git a/lz4demo.c b/lz4demo.c
index c15b242..760f611 100644
--- a/lz4demo.c
+++ b/lz4demo.c
@@ -43,9 +43,13 @@
//**************************************
-// Basic Types
+// Compiler functions
//**************************************
-
+#if defined(_MSC_VER) // Visual Studio
+#define swap32 _byteswap_ulong
+#else // GCC assumed
+#define swap32 __builtin_bswap32
+#endif
//****************************
@@ -67,11 +71,19 @@
//**************************************
-// MACRO
+// Architecture Macros
//**************************************
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+static const int one = 1;
+#define CPU_LITTLE_ENDIAN (*(char*)(&one))
+#define CPU_BIG_ENDIAN (!CPU_LITTLE_ENDIAN)
+#define LITTLE_ENDIAN32(i) if (CPU_BIG_ENDIAN) { i = swap32(i); }
+//**************************************
+// Macros
+//**************************************
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+
//****************************
// Functions
@@ -79,7 +91,7 @@
int usage()
{
DISPLAY( "Usage :\n");
- DISPLAY( " %s [arg] input output\n",BINARY_NAME);
+ DISPLAY( " %s [arg] input output\n", BINARY_NAME);
DISPLAY( "Arguments :\n");
DISPLAY( " -c : compression (default)\n");
DISPLAY( " -d : decompression \n");
@@ -138,6 +150,7 @@ int compress_file(char* input_filename, char* output_filename)
{
unsigned long long filesize = 0;
unsigned long long compressedfilesize = ARCHIVE_MAGICNUMBER_SIZE;
+ unsigned int u32var;
char* in_buff;
char* out_buff;
FILE* finput;
@@ -157,7 +170,9 @@ int compress_file(char* input_filename, char* output_filename)
if (!in_buff || !out_buff) { DISPLAY("Allocation error : not enough memory\n"); return 8; }
// Write Archive Header
- *(unsigned long*)out_buff = ARCHIVE_MAGICNUMBER;
+ u32var = ARCHIVE_MAGICNUMBER;
+ LITTLE_ENDIAN32(u32var);
+ *(unsigned int*)out_buff = u32var;
fwrite(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, foutput);
// Main Loop
@@ -171,10 +186,12 @@ int compress_file(char* input_filename, char* output_filename)
// Compress Block
outSize = LZ4_compress(in_buff, out_buff+4, inSize);
- * (unsigned int*) out_buff = outSize;
compressedfilesize += outSize+4;
// Write Block
+ LITTLE_ENDIAN32(outSize);
+ * (unsigned int*) out_buff = outSize;
+ LITTLE_ENDIAN32(outSize);
fwrite(out_buff, 1, outSize+4, foutput);
}
@@ -223,9 +240,15 @@ int decode_file(char* input_filename, char* output_filename)
// Check Archive Header
uselessRet = fread(out_buff, 1, ARCHIVE_MAGICNUMBER_SIZE, finput);
- if (*(unsigned long*)out_buff != ARCHIVE_MAGICNUMBER) { DISPLAY("Unrecognized header : file cannot be decoded\n"); return 6; }
+ nextSize = *(unsigned int*)out_buff;
+ LITTLE_ENDIAN32(nextSize);
+ if (nextSize != ARCHIVE_MAGICNUMBER) { DISPLAY("Unrecognized header : file cannot be decoded\n"); return 6; }
+
+ // First Block
+ *(unsigned int*)in_buff = 0;
uselessRet = fread(in_buff, 1, 4, finput);
- nextSize = *(unsigned long*)in_buff;
+ nextSize = *(unsigned int*)in_buff;
+ LITTLE_ENDIAN32(nextSize);
// Main Loop
while (1)
@@ -234,18 +257,20 @@ int decode_file(char* input_filename, char* output_filename)
uselessRet = fread(in_buff, 1, nextSize, finput);
// Check Next Block
- uselessRet = (unsigned long) fread(&nextSize, 1, 4, finput);
- if( uselessRet==0 ) break;
+ uselessRet = (size_t) fread(&nextSize, 1, 4, finput);
+ if( uselessRet==0 ) break; // Nothing read : file read is completed
+ LITTLE_ENDIAN32(nextSize);
// Decode Block
sinkint = LZ4_uncompress(in_buff, out_buff, CHUNKSIZE);
+ if (sinkint < 0) { DISPLAY("Decoding Failed ! Corrupted input !\n"); return 9; }
filesize += CHUNKSIZE;
// Write Block
fwrite(out_buff, 1, CHUNKSIZE, foutput);
}
- // Last Block
+ // Last Block (which size is <= CHUNKSIZE, but let LZ4 figure that out)
uselessRet = fread(in_buff, 1, nextSize, finput);
sinkint = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, nextSize, CHUNKSIZE);
filesize += sinkint;