From a2010203173de3f793f4f351ccc7df3377d27a2c Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Tue, 7 Jun 2011 17:57:25 +0000 Subject: Improved compression and decompression speed under GCC compiler git-svn-id: https://lz4.googlecode.com/svn/trunk@11 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- Makefile | 6 +++--- lz4.c | 69 +++++++++++++++++++++++++++++++--------------------------------- lz4.h | 10 +++++----- main.c | 2 +- 4 files changed, 42 insertions(+), 45 deletions(-) diff --git a/Makefile b/Makefile index d3443a5..ea32feb 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ all: lz4.32 lz4.64 lz4.64: lz4.c lz4.h main.c - gcc -g -O3 -I. -Wall lz4.c main.c -o lz4.64 + gcc -g -O3 -I. -Wall -W lz4.c main.c -o lz4.64.exe lz4.32: lz4.c lz4.h main.c - gcc -m32 -g -O3 -I. -Wall lz4.c main.c -o lz4.32 + gcc -m32 -g -O3 -I. -Wall -W lz4.c main.c -o lz4.32.exe clean: - rm -f core *.o lz4.32 lz4.64 + rm -f core *.o lz4.32.exe lz4.64.exe diff --git a/lz4.c b/lz4.c index 8608ea1..ff4f515 100644 --- a/lz4.c +++ b/lz4.c @@ -94,7 +94,6 @@ struct refTables //************************************** #define HASH_FUNCTION(i) ((i * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) #define HASH_VALUE(p) HASH_FUNCTION(*(U32*)p) -#define HASH_POINTER(p) HashTable[HASH_VALUE(p)] @@ -124,16 +123,16 @@ int LZ4_compressCtx(void** ctx, BYTE** HashTable; BYTE *ip = (BYTE*) source, /* input pointer */ - *anchor = (BYTE*) source, + *anchor = ip, *incompressible = anchor + INCOMPRESSIBLE, - *iend = (BYTE*) source + isize, - *ilimit = iend - MINMATCH - 1; + *iend = ip + isize, + *ilimit = iend - MINMATCH; BYTE *op = (BYTE*) dest, /* output pointer */ *ref, *orun, *l_end; - int len, length, sequence, h; + int len, length; U32 step=1; @@ -149,13 +148,11 @@ int LZ4_compressCtx(void** ctx, // Main Loop while (ip < ilimit) { - sequence = *(U32*)ip; - h = HASH_FUNCTION(sequence); - ref = HashTable[h]; - HashTable[h] = ip; + ref = HashTable[HASH_VALUE(ip)]; + HashTable[HASH_VALUE(ip)] = ip; // Min Match - if (( ((ip-ref) >> MAXD_LOG)) || (*(U32*)ref != sequence)) + if ((ref < ip - MAX_DISTANCE) || (*(U32*)ref != *(U32*)ip)) { if (ip>incompressible) { incompressible += INCOMPRESSIBLE << (step >> 1); step++; } ip+=step; @@ -164,12 +161,12 @@ int LZ4_compressCtx(void** ctx, step=1; // Catch up - while ((ip>anchor) && (*(ip-1)==*(ref-1))) { ip--; ref--; } + while ((ip>anchor) && (ip[-1]==ref[-1])) { ip--; ref--; } // Encode Literal length length = ip - anchor; orun = op++; - if (length>(RUN_MASK-1)) { *orun=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } + if (length>=(int)RUN_MASK) { *orun=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } else *orun = (length<(ML_MASK-1)) { *orun+=ML_MASK; len-=ML_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } + if (len>=(int)ML_MASK) { *orun+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } else *orun += len; // Prepare next loop @@ -209,7 +206,7 @@ _endCount: if (length) { orun=op++; - if (len>(RUN_MASK-1)) { *orun=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; } + if (len>=(int)RUN_MASK) { *orun=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE) len; } else *orun = (len<0;length-=4) { *(U32*)op = *(U32*)anchor; op+=4; anchor+=4; } op += length; // correction @@ -232,12 +229,12 @@ int LZ4_uncompress(char* source, BYTE *ip = (BYTE*) source; BYTE *op = (BYTE*) dest, - *oend= op + osize, + *olimit = op + osize - 4, *ref, *cpy, runcode; U32 dec[4]={0, 3, 2, 3}; - int len, length; + int length; // Main Loop @@ -245,25 +242,25 @@ int LZ4_uncompress(char* source, { // get runlength runcode = *ip++; - if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; } + if ((length=(runcode>>ML_BITS)) == RUN_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; } // copy literals ref = op+length; - if (ref>oend-4) + if (ref > olimit) { - if (ref > oend) goto _output_error; - while(op olimit+4) goto _output_error; + while(op <= olimit) { *(U32*)op=*(U32*)ip; op+=4; ip+=4; } + while(op < ref) *op++=*ip++; break; // Necessarily EOF } - while (opoend-4) + } else { *(U32*)op=*(U32*)ref; op+=4; ref+=4; } + if (cpy > olimit) { - if (cpy > oend) goto _output_error; - while(op=oend) break; // Check EOF + if (cpy > olimit+4) goto _output_error; + while(op < cpy-3) { *(U32*)op=*(U32*)ref; op+=4; ref+=4; } + while(op < cpy) *op++=*ref++; + if (op >= olimit+4) break; // Check EOF continue; } - while(op=iend) break; // check EOF @@ -352,7 +349,7 @@ int LZ4_uncompress_unknownOutputSize( *op++ = *ref++; *op++ = *ref++; ref -= dec[op-ref]; - } + } else { *(U32*)op=*(U32*)ref; op+=4; ref+=4; } if (cpy>oend-4) { if (cpy > oend) goto _output_error; @@ -361,7 +358,7 @@ int LZ4_uncompress_unknownOutputSize( if (op>=oend) break; // Check EOF continue; } - while(op