Small speed improvement (compression & decompression), Thanks Maciej Adamczyk for suggestion

Fixed : LZ4_uncompress_unknownOutputSize(), now protected against a malicious attack type, where a crafted input deliberately attempts to make it read outside of input buffer. Thanks Steinar H. Gunderson for report. git-svn-id: https://lz4.googlecode.com/svn/trunk@58 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
author: yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd> 2012-03-01 15:19:08 (GMT)
committer: yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd> 2012-03-01 15:19:08 (GMT)
commit: 89767cc28059fff5e782c4b8ddd5b0b03ddedb90 (patch)
tree: fd55089bc20c348c5f3402be86725c7eaa42cff4
parent: 3430ee1bc0d90e32c47a036eb39c3b0d16ba5451 (diff)
download: lz4-89767cc28059fff5e782c4b8ddd5b0b03ddedb90.zip
lz4-89767cc28059fff5e782c4b8ddd5b0b03ddedb90.tar.gz
lz4-89767cc28059fff5e782c4b8ddd5b0b03ddedb90.tar.bz2
3 files changed, 57 insertions, 42 deletions
diff --git a/bench.c b/bench.c
index 8d86bbc..6fe0d94 100644
--- a/bench.c
+++ b/bench.c
@@ -33,8 +33,8 @@
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
 #endif
 
-// GCC on does not support _rotl outside of Windows
-#if defined(__GNUC__)
+// GCC does not support _rotl outside of Windows
+#if !defined(_WIN32)
 #define _rotl(x,r) ((x << r) | (x >> (32 - r)))
 #endif
 
@@ -42,8 +42,8 @@
 //**************************************
 // Includes
 //**************************************
-#include <stdio.h>      // fprintf, fopen, ftello64
 #include <stdlib.h>     // malloc
+#include <stdio.h>      // fprintf, fopen, ftello64
 #include <sys/timeb.h>  // timeb
 #include <sys/types.h>  // stat64
 #include <sys/stat.h>   // stat64
@@ -370,6 +370,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles)
 		  {
 			for (chunkNb=0; chunkNb<nbChunks; chunkNb++)
 				chunkP[chunkNb].outputSize = compP.decompressionFunction(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].inputSize);
+				//LZ4_uncompress_unknownOutputSize(chunkP[chunkNb].outputBuffer, chunkP[chunkNb].inputBuffer, chunkP[chunkNb].outputSize, chunkP[chunkNb].inputSize);  // For testing
 			nb_loops++;
 		  }
 		  milliTime = BMK_GetMilliSpan(milliTime);
diff --git a/lz4.c b/lz4.c
index cce3b17..28a892e 100644
--- a/lz4.c
+++ b/lz4.c
@@ -92,20 +92,21 @@
 //**************************************
 // Compiler Options
 //**************************************
-#if __STDC_VERSION__ >= 199901L    // C99
-  /* "restrict" is a known keyword */
+#if __STDC_VERSION__ >= 199901L // C99
+/* "restrict" is a known keyword */
 #else
-#define restrict  // Disable restrict
+#define restrict // Disable restrict
 #endif
 
-#ifdef _MSC_VER
-#define inline __forceinline    // Visual is not C99, but supports some kind of inline
+#ifdef _MSC_VER  // Visual Studio
+#define inline __forceinline // Visual is not C99, but supports some kind of inline
+#include <intrin.h>          // _BitScanForward
 #endif
 
-#ifdef _MSC_VER  // Visual Studio
+#ifdef _MSC_VER 
 #define bswap16(x) _byteswap_ushort(x)
 #else
-#define bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+#define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
 #endif
 
 
@@ -209,6 +210,14 @@ typedef struct _U64_S { U64 v; } U64_S;
 #define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
 #endif
 
+#if __GNUC__ >= 3
+# define expect(expr,value)         __builtin_expect ((expr),(value))
+#else
+# define expect(expr,value)         (expr)
+#endif
+
+#define expect_true(expr)  expect ((expr) != 0, 1)
+#define expect_false(expr) expect ((expr) != 0, 0)
 
 //**************************************
 // Local structures
@@ -372,7 +381,7 @@ int LZ4_compressCtx(void** ctx,
 			ip = forwardIp;
 			forwardIp = ip + step;
 
-			if (forwardIp > mflimit) { goto _last_literals; }
+			if (expect_false(forwardIp > mflimit)) { goto _last_literals; }
 
 			forwardH = LZ4_HASH_VALUE(forwardIp);
 			ref = base + HashTable[h];
@@ -381,7 +390,7 @@ int LZ4_compressCtx(void** ctx,
 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
 
 		// Catch up
-		while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
+		while ((expect_false(ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
 
 		// Encode Literal length
 		length = ip - anchor;
@@ -399,7 +408,7 @@ _next_match:
 		// Start Counting
 		ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
 		anchor = ip;
-		while (ip<matchlimit-(STEPSIZE-1))
+		while (expect_true(ip<matchlimit-(STEPSIZE-1)))
 		{
 			UARCH diff = AARCH(ref) ^ AARCH(ip);
 			if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
@@ -523,7 +532,7 @@ int LZ4_compress64kCtx(void** ctx,
 		} while (A32(ref) != A32(ip));
 
 		// Catch up
-		while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
+		while (((ip>anchor) && expect_false(ref>(BYTE*)source) && (ip[-1]==ref[-1]))) { ip--; ref--; }
 
 		// Encode Literal length
 		length = ip - anchor;
@@ -567,7 +576,7 @@ _endCount:
 		// Test next position
 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
-		if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
+		if (expect_true(A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
 
 		// Prepare next loop
 		anchor = ip++;
@@ -618,7 +627,8 @@ int LZ4_compress(const char* source,
 
 // Note : The decoding functions LZ4_uncompress() and LZ4_uncompress_unknownOutputSize()
 //		are safe against "buffer overflow" attack type.
-//		They will never write nor read outside of the provided input and output buffers.
+//		They will never write nor read outside of the provided output buffers.
+//      LZ4_uncompress_unknownOutputSize() also insures that it will never read outside of the input buffer.
 //		A corrupted input will produce an error result, a negative int, indicating the position of the error within input stream.
 
 int LZ4_uncompress(const char* source,
@@ -648,7 +658,7 @@ int LZ4_uncompress(const char* source,
 
 		// copy literals
 		cpy = op+length;
-		if (cpy>oend-COPYLENGTH)
+		if (expect_false(cpy>oend-COPYLENGTH))
 		{
 			if (cpy > oend) goto _output_error;
 			memcpy(op, ip, length);
@@ -665,7 +675,7 @@ int LZ4_uncompress(const char* source,
 		if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
 
 		// copy repeated sequence
-		if (op-ref<STEPSIZE)
+		if (expect_false(op-ref<STEPSIZE))
 		{
 #if LZ4_ARCH64
 			size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
@@ -719,40 +729,42 @@ int LZ4_uncompress_unknownOutputSize(
 	BYTE* const oend = op + maxOutputSize;
 	BYTE* cpy;
 
-	BYTE token;
-	
-	int	len, length;
 	size_t dec[] ={0, 3, 2, 3, 0, 0, 0, 0};
 
 
 	// Main Loop
 	while (ip<iend)
 	{
+		BYTE token;
+		int length;
+
 		// get runlength
 		token = *ip++;
-		if ((length=(token>>ML_BITS)) == RUN_MASK)  { for (;(len=*ip++)==255;length+=255){} length += len; }
+		if ((length=(token>>ML_BITS)) == RUN_MASK) { int s=255; while ((ip<iend) && (s==255)) { s=*ip++; length += s; } }
 
 		// copy literals
 		cpy = op+length;
-		if (cpy>oend-COPYLENGTH)
+		if ((cpy>oend-COPYLENGTH) || (ip+length>iend-COPYLENGTH))
 		{
 			if (cpy > oend) goto _output_error;
+			if (ip+length > iend) goto _output_error;
 			memcpy(op, ip, length);
 			op += length;
-			break;    // Necessarily EOF
+			ip += length;
+			if (ip<iend) goto _output_error;
+			break;    // Necessarily EOF, due to parsing restrictions
 		}
 		LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
-		if (ip>=iend) break;    // check EOF
 
 		// get offset
 		LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
 		if (ref < (BYTE* const)dest) goto _output_error;
 
 		// get matchlength
-		if ((length=(token&ML_MASK)) == ML_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; }
+		if ((length=(token&ML_MASK)) == ML_MASK) { while (ip<iend) { int s = *ip++; length +=s; if (s==255) continue; break; } }
 
 		// copy repeated sequence
-		if (op-ref<STEPSIZE)
+		if (expect_false(op-ref<STEPSIZE))
 		{
 #if LZ4_ARCH64
 			size_t dec2table[]={0, 0, 0, -1, 0, 1, 2, 3};
diff --git a/lz4_format_description.txt b/lz4_format_description.txt
index e4a053b..a170dde 100644
--- a/lz4_format_description.txt
+++ b/lz4_format_description.txt
@@ -1,23 +1,25 @@
 LZ4 Format Description
-Last revised: 2012-02-12
+Last revised: 2012-02-27
 Author : Y. Collet
 
 
 
-This is not a formal specification, but intents to provide enough information
-to anyone willing to produce LZ4-compatible compressed streams.
+This small specification intents to provide enough information
+to anyone willing to produce LZ4-compatible compressed streams
+using any programming language.
 
 LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
-There is no entropy encoder backend nor framing layer -- the latter is
-assumed to be handled by other parts of the system.
-
-This document only describes the format, not how the LZ4 compressor nor
-decompressor actually works. The correctness of the decompressor should not
-depend on implementation details of the compressor, and vice versa.
-
 The most important design principle behind LZ4 is simplicity.
-It is meant to create an easy to read and maintain source code.
+It helps to create an easy to read and maintain source code.
 It also helps later on for optimisations, compactness, and speed.
+There is no entropy encoder backend nor framing layer.
+The latter is assumed to be handled by other parts of the system.
+
+This document only describes the format,
+not how the LZ4 compressor nor decompressor actually work.
+The correctness of the decompressor should not depend
+on implementation details of the compressor, and vice versa.
+
 
 
 -- Compressed stream format --
@@ -32,8 +34,8 @@ Therefore each field ranges from 0 to 15.
 
 The first field uses the 4 high-bits of the token.
 It provides the length of literals to follow.
-A literal is a not-compressed byte.
-If it is 0, then there is no literal.
+(Note : a literal is a not-compressed byte).
+If the field value is 0, then there is no literal.
 If it is 15, then we need to add some more bytes to indicate the full length.
 Each additionnal byte then represent a value from 0 to 255,
 which is added to the previous value to produce a total length.
@@ -107,7 +109,7 @@ and stops right after literals.
 
 There is no assumption nor limits to the way the compressor
 searches and selects matches within the source stream.
-It could be a fast scan,  a multi-probe, a full search using BST,
+It could be a fast scan, a multi-probe, a full search using BST,
 standard hash chains or MMC, well whatever.
 
 Advanced parsing strategies can also be implemented, such as lazy match,
@@ -115,5 +117,5 @@ or full optimal parsing.
 
 All these trade-off offer distinctive speed/memory/compression advantages.
 Whatever the method used by the compressor, its result will be decodable
-by any LZ4 decoder if it follows the format described above.
+by any LZ4 decoder if it follows the format specification described above.
author	yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>	2012-03-01 15:19:08 (GMT)
committer	yann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>	2012-03-01 15:19:08 (GMT)
commit	89767cc28059fff5e782c4b8ddd5b0b03ddedb90 (patch)
tree	fd55089bc20c348c5f3402be86725c7eaa42cff4
parent	3430ee1bc0d90e32c47a036eb39c3b0d16ba5451 (diff)
download	lz4-89767cc28059fff5e782c4b8ddd5b0b03ddedb90.zip lz4-89767cc28059fff5e782c4b8ddd5b0b03ddedb90.tar.gz lz4-89767cc28059fff5e782c4b8ddd5b0b03ddedb90.tar.bz2