From b1e707f77277441feae978f501ac3455e8bc9b25 Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Wed, 24 Oct 2012 12:59:19 +0000 Subject: - Corrected issue 31 : LZ4 correctly accepts compressing data when the output buffer has exactly the required size (it was a bit over-cautious in previous version). - Added : a fuzzer tool Thanks to Andrew Mahone, for contribution on both points git-svn-id: https://lz4.googlecode.com/svn/trunk@79 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- Makefile | 25 +++++----- fuzzer.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lz4.c | 18 +++---- 3 files changed, 188 insertions(+), 20 deletions(-) create mode 100644 fuzzer.c diff --git a/Makefile b/Makefile index dbefc12..eca9966 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,25 @@ -OS := $(shell uname) +CC=gcc +CFLAGS=-I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration +OS := $(shell uname) ifeq ($(OS),Linux) - OUTPUT32 = lz4demo32 - OUTPUT = lz4demo +EXT = else - OUTPUT32 = LZ4Demo32.exe - OUTPUT = LZ4Demo.exe +EXT =.exe endif default: lz4demo -all: lz4demo lz4demo32 +all: lz4demo lz4demo32 fuzzer -lz4demo: lz4.c lz4.h lz4hc.c lz4hc.h bench.c lz4demo.c - gcc -O3 -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4hc.c lz4.c bench.c lz4demo.c -o $(OUTPUT) +lz4demo: lz4.c lz4hc.c bench.c lz4demo.c + $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) -lz4demo32: lz4.c lz4.h lz4hc.c lz4hc.h bench.c lz4demo.c - gcc -m32 -Os -march=native -I. -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration lz4hc.c lz4.c bench.c lz4demo.c -o $(OUTPUT32) +lz4demo32: lz4.c lz4hc.c bench.c lz4demo.c + $(CC) -m32 -Os -march=native $(CFLAGS) $^ -o $@$(EXT) +fuzzer : lz4.c fuzzer.c + $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) + clean: - rm -f core *.o $(OUTPUT32) $(OUTPUT) + rm -f core *.o lz4demo$(EXT) lz4demo32$(EXT) fuzzer$(EXT) diff --git a/fuzzer.c b/fuzzer.c new file mode 100644 index 0000000..2f75923 --- /dev/null +++ b/fuzzer.c @@ -0,0 +1,165 @@ +/* + fuzzer.c - Fuzzer test tool for LZ4 + Copyright (C) Andrew Mahone - Yann Collet 2012 + Original code by Andrew Mahone / Modified by Yann Collet + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ +*/ + +//************************************** +// Remove Visual warning messages +//************************************** +#define _CRT_SECURE_NO_WARNINGS // fgets + + +//************************************** +// Includes +//************************************** +#include +#include // fgets +#include // timeb +#include "lz4.h" + + +//************************************** +// Constants +//************************************** +#define NB_ATTEMPTS (1<<18) +#define LEN ((1<<15)) +#define SEQ_POW 2 +#define NUM_SEQ (1 << SEQ_POW) +#define SEQ_MSK ((NUM_SEQ) - 1) +#define MOD_SEQ(x) ((((x) >> 8) & 255) == 0) +#define NEW_SEQ(x) ((((x) >> 10) %10) == 0) +#define PAGE_SIZE 4096 +#define ROUND_PAGE(x) (((x) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) +#define PRIME1 2654435761U +#define PRIME2 2246822519U +#define PRIME3 3266489917U + + + +//********************************************************* +// Functions +//********************************************************* +static int FUZ_GetMilliStart() +{ + struct timeb tb; + int nCount; + ftime( &tb ); + nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); + return nCount; +} + +static int FUZ_GetMilliSpan( int nTimeStart ) +{ + int nSpan = FUZ_GetMilliStart() - nTimeStart; + if ( nSpan < 0 ) + nSpan += 0x100000 * 1000; + return nSpan; +} + + +unsigned int FUZ_rand(unsigned int* src) +{ + *src = ((*src) * PRIME1) + PRIME2; + return *src; +} + + +int test_canary(unsigned char *buf) { + int i; + for (i = 0; i < 2048; i++) + if (buf[i] != buf[i + 2048]) + return 0; + return 1; +} + +//int main(int argc, char *argv[]) { +int main() { + unsigned long long bytes = 0; + unsigned long long cbytes = 0; + unsigned char buf[LEN]; +# undef max +# define max LZ4_compressBound(LEN) +# define avail ROUND_PAGE(max) + const int off_full = avail - max; + unsigned char cbuf[avail + PAGE_SIZE]; + unsigned int seed, cur_seq, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart(); + int i, j, k, ret, len; + char userInput[30] = {0}; + + printf("starting LZ4 fuzzer\n"); + printf("Select an Initialisation number (default : random) : "); + fflush(stdout); + if ( fgets(userInput, sizeof userInput, stdin) ) + { + if ( sscanf(userInput, "%d", &seed) == 1 ) {} + else seed = FUZ_GetMilliSpan(timestamp); + } + printf("Seed = %u\n", seed); + + for (i = 0; i < 2048; i++) + cbuf[avail + i] = cbuf[avail + 2048 + i] = FUZ_rand(&seed) >> 16; + + for (i = 0; i < NB_ATTEMPTS; i++) { + printf("\r%7i /%7i\r", i, NB_ATTEMPTS); + FUZ_rand(&seed); + for (j = 0; j < NUM_SEQ; j++) { + seeds[j] = FUZ_rand(&seed) << 8; + seeds[j] ^= (FUZ_rand(&seed) >> 8) & 65535; + } + for (j = 0; j < LEN; j++) { + k = FUZ_rand(&seed); + if (j == 0 || NEW_SEQ(k)) + cur_seq = seeds[(FUZ_rand(&seed) >> 16) & SEQ_MSK]; + if (MOD_SEQ(k)) { + k = (FUZ_rand(&seed) >> 16) & SEQ_MSK; + seeds[k] = FUZ_rand(&seed) << 8; + seeds[k] ^= (FUZ_rand(&seed) >> 8) & 65535; + } + buf[j] = FUZ_rand(&cur_seq) >> 16; + } + ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, max); + len = ret; + + // Test compression with output size being exactly what's necessary + ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[avail-len], LEN, len); + if (!test_canary(&cbuf[avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d\n", seed, LEN, len); return 1; } + if (ret == 0) { printf("compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); return 1; } + + // Test compression with just one missing byte into output buffer => should fail + ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[avail-(len-1)], LEN, len-1); + if (ret) { printf("compression overran output buffer: seed %u, len %d, olen %d => ret %d", seed, LEN, len-1, ret); return 1; } + if (!test_canary(&cbuf[avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d", seed, LEN, len-1); return 1; } + + /* No longer useful + // Test compression with not enough output size + ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[avail-len/2], LEN, len/2); + if (!test_canary(&cbuf[avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d", seed, LEN, len/2); return 1; } + */ + + bytes += LEN; + cbytes += len; + } + printf("all tests completed successfully \n"); + printf("compression ratio: %0.3f%%\n", (double)cbytes/bytes*100); + return 0; +} diff --git a/lz4.c b/lz4.c index 13fb5ac..17c4a04 100644 --- a/lz4.c +++ b/lz4.c @@ -402,7 +402,7 @@ static inline int LZ4_compressCtx(void** ctx, // Encode Literal length length = (int)(ip - anchor); token = op++; - if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit + if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit #ifdef _MSC_VER if (length>=(int)RUN_MASK) { @@ -449,7 +449,7 @@ _endCount: // Encode MatchLength len = (int)(ip - anchor); - if unlikely(op + (1 + LASTLITERALS) + (len>>8) >= oend) return 0; // Check output limit + if unlikely(op + (1 + LASTLITERALS) + (len>>8) > oend) return 0; // Check output limit if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } else *token += len; @@ -473,7 +473,7 @@ _last_literals: // Encode Last Literals { int lastRun = (int)(iend - anchor); - if (((char*)op - dest) + lastRun + 1 + ((lastRun-15)/255) >= maxOutputSize) return 0; + if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } else *op++ = (lastRun<>8) >= oend) return 0; // Check output limit + if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit #ifdef _MSC_VER if (length>=(int)RUN_MASK) { @@ -614,7 +614,7 @@ _endCount: // Encode MatchLength len = (int)(ip - anchor); - if unlikely(op + (1 + LASTLITERALS) + (len>>8) >= oend) return 0; // Check output limit + if unlikely(op + (1 + LASTLITERALS) + (len>>8) > oend) return 0; // Check output limit if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } else *token += len; @@ -638,7 +638,7 @@ _last_literals: // Encode Last Literals { int lastRun = (int)(iend - anchor); - if (((char*)op - dest) + lastRun + 1 + ((lastRun)>>8) >= maxOutputSize) return 0; + if (op + lastRun + 1 + (lastRun-RUN_MASK+255)/255 > oend) return 0; if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } else *op++ = (lastRun<oend-COPYLENGTH) { - if (cpy > oend) goto _output_error; // Error : request to write outside of destination buffer + if (cpy > oend) goto _output_error; // Error : request to write outside of destination buffer LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); while(op