diff options
author | yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd> | 2011-06-05 21:23:42 (GMT) |
---|---|---|
committer | yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd> | 2011-06-05 21:23:42 (GMT) |
commit | 075bf1349b23914d237f4e676d71da57ecfa4b7c (patch) | |
tree | de4436c5c700d01f3e9892a67fd92f9bed2e5b1d | |
parent | 6b798d5e40e4c0788f7c51f6e9b0e9c4e966d068 (diff) | |
download | lz4-075bf1349b23914d237f4e676d71da57ecfa4b7c.zip lz4-075bf1349b23914d237f4e676d71da57ecfa4b7c.tar.gz lz4-075bf1349b23914d237f4e676d71da57ecfa4b7c.tar.bz2 |
Greatly improved compression and decompression speed, at the expense of some compression ratio.
Most of the change is due to a modification in the performance parameter (HASH_LOG) now set to 12, to match Intel L1 cache processors.
You can change it back to 17 to get back previous compression ratio.
AMD users are invited to try HASH_LOG = 13, since AMD L1 cache is twice larger.
git-svn-id: https://lz4.googlecode.com/svn/trunk@10 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
-rw-r--r-- | lz4.c | 42 | ||||
-rw-r--r-- | lz4.h | 17 |
2 files changed, 35 insertions, 24 deletions
@@ -36,13 +36,13 @@ //**************************************
-// Performance parameter <---------------------------------------------------------
+// Performance parameter
//**************************************
// Lowering this value reduce memory usage
// It may also improve speed, especially if you reach L1 cache size (32KB for Intel, 64KB for AMD)
// Expanding memory usage typically improves compression ratio
// Memory usage formula : N->2^(N+2) Bytes (examples : 17 -> 512KB ; 12 -> 16KB)
-#define HASH_LOG 17
+#define HASH_LOG 12
//**************************************
@@ -125,6 +125,7 @@ int LZ4_compressCtx(void** ctx, BYTE *ip = (BYTE*) source, /* input pointer */
*anchor = (BYTE*) source,
+ *incompressible = anchor + INCOMPRESSIBLE,
*iend = (BYTE*) source + isize,
*ilimit = iend - MINMATCH - 1;
@@ -134,7 +135,6 @@ int LZ4_compressCtx(void** ctx, int len, length, sequence, h;
U32 step=1;
- S32 limit=INCOMPRESSIBLE;
// Init
@@ -154,23 +154,23 @@ int LZ4_compressCtx(void** ctx, ref = HashTable[h];
HashTable[h] = ip;
- // Check Min Match
- if (( ((ip-ref) >> MAXD_LOG) != 0) || (*(U32*)ref != sequence))
+ // Min Match
+ if (( ((ip-ref) >> MAXD_LOG)) || (*(U32*)ref != sequence))
{
- if (ip-anchor>limit) { limit <<= 1; step += 1 + (step>>2); }
- ip += step;
+ if (ip>incompressible) { incompressible += INCOMPRESSIBLE << (step >> 1); step++; }
+ ip+=step;
continue;
- }
+ }
+ step=1;
- // catch up
- if (step>1) { HashTable[h] = ref; ip -= (step-1); step=1; continue; }
- limit = INCOMPRESSIBLE;
+ // Catch up
+ while ((ip>anchor) && (*(ip-1)==*(ref-1))) { ip--; ref--; }
// Encode Literal length
- len = length = ip - anchor;
+ length = ip - anchor;
orun = op++;
- if (len>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len-=RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
- else *orun = (len<<ML_BITS);
+ if (length>(RUN_MASK-1)) { *orun=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
+ else *orun = (length<<ML_BITS);
// Copy Literals
l_end = op + length;
@@ -183,7 +183,16 @@ int LZ4_compressCtx(void** ctx, // Start Counting
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
anchor = ip;
- while ((ip<iend) && (*ref == *ip)) { ip++; ref++; } // Ends at *ip!=*ref
+ while (ip<(iend-3))
+ {
+ if (*(U32*)ref == *(U32*)ip) { ip+=4; ref+=4; continue; }
+ if (*(U16*)ref == *(U16*)ip) { ip+=2; ref+=2; }
+ if (*ref == *ip) ip++;
+ goto _endCount;
+ }
+ if ((ip<(iend-1)) && (*(U16*)ref == *(U16*)ip)) { ip+=2; ref+=2; }
+ if ((ip<iend) && (*ref == *ip)) ip++;
+_endCount:
len = (ip - anchor);
// Encode MatchLength
@@ -192,6 +201,7 @@ int LZ4_compressCtx(void** ctx, // Prepare next loop
anchor = ip;
+ incompressible = anchor + INCOMPRESSIBLE;
}
// Encode Last Literals
@@ -222,7 +232,7 @@ int LZ4_uncompress(char* source, BYTE *ip = (BYTE*) source;
BYTE *op = (BYTE*) dest,
- *oend=(BYTE*) dest + osize,
+ *oend= op + osize,
*ref, *cpy,
runcode;
@@ -66,9 +66,9 @@ int LZ4_uncompress_unknownOutputSize (char* source, char* dest, int isize, int m LZ4_uncompress :
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
- This version never writes beyond dest + osize, and is therefore protected against malicious data packets
+ This version never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
note 1 : isize is the input size, therefore the compressed size
- note 2 : destination buffer must be already allocated
+ note 2 : destination buffer must already be allocated, with at least maxOutputSize bytes
note 3 : this version is slower by up to 10%, and is therefore not recommended for general use
*/
@@ -88,20 +88,21 @@ LZ4_compressCtx : */
-//****************************
-// Deprecated Functions
-//****************************
+//*********************************
+// Faster Decoding function
+//*********************************
-int LZ4_decode (char* source, char* dest, int isize);
+#define LZ4_uncompress_fast LZ4_decode
+int LZ4_decode (char* source, char* dest, int isize);
/*
-LZ4_decode :
+LZ4_decode : This version is the fastest one, besting LZ4_uncompress by a few %.
return : the number of bytes in decoded buffer dest
note 1 : isize is the input size, therefore the compressed size
note 2 : destination buffer must be already allocated.
The program calling the decoder must know in advance the size of decoded stream to properly allocate the destination buffer
The destination buffer size must be at least "decompressedSize + 3 Bytes"
- This version is unprotected against malicious data packets designed to create buffer overflow errors.
+ This version is **unprotected** against malicious data packets designed to create buffer overflow errors.
It is therefore deprecated, but still present in this version for compatibility.
*/
|