From 03b9c5fcae218b28bb0f7a2a9e5487b86230f391 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 28 Apr 2014 21:45:35 +0100 Subject: Introduce "External Dictionary" de/compression API --- lz4.c | 137 ++++++++++++++------ lz4.h | 47 ++++--- programs/Makefile | 4 +- programs/fullbench.c | 34 +++-- programs/fuzzer.c | 349 +++++++++++++++++++++++++++++++++++++-------------- 5 files changed, 402 insertions(+), 169 deletions(-) diff --git a/lz4.c b/lz4.c index fd229ef..f9820fe 100644 --- a/lz4.c +++ b/lz4.c @@ -264,7 +264,7 @@ typedef struct { typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive; typedef enum { byPtr, byU32, byU16 } tableType_t; -typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; +typedef enum { noDict = 0, withPrefix64k = 1, withExtDict=2 } dict_directive; typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; typedef enum { full = 0, partial = 1 } earlyEnd_directive; @@ -420,11 +420,11 @@ FORCE_INLINE int LZ4_compress_generic( limitedOutput_directive limitedOutput, tableType_t tableType, - prefix64k_directive prefix) + dict_directive dict) { const BYTE* ip = (const BYTE*) source; - const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source; - const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source); + const BYTE* const base = (dict==withPrefix64k) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source; + const BYTE* const lowLimit = ((dict==withPrefix64k) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source); const BYTE* anchor = (const BYTE*) source; const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; @@ -439,8 +439,8 @@ FORCE_INLINE int LZ4_compress_generic( /* Init conditions */ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ - if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0; /* must continue from end of previous block */ - if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend; /* do it now, due to potential early exit */ + if ((dict==withPrefix64k) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0; /* must continue from end of previous block */ + if (dict==withPrefix64k) ((LZ4_Data_Structure*)ctx)->nextBlock=iend; /* do it now, due to potential early exit */ if ((tableType == byU16) && (inputSize>=(int)LZ4_64KLIMIT)) return 0; /* Size too large (not within 64K limit) */ if (inputSize oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); @@ -792,20 +795,56 @@ FORCE_INLINE int LZ4_decompress_generic( /* get offset */ LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; - if ((prefix64k==noPrefix) && (unlikely(ref < (BYTE* const)dest))) goto _output_error; /* Error : offset outside destination buffer */ + if ((dict==noDict) && (unlikely(ref < (BYTE* const)dest))) goto _output_error; /* Error : offset outside destination buffer */ /* get matchlength */ if ((length=(token&ML_MASK)) == ML_MASK) { - while ((!endOnInput) || (ip oend-LASTLITERALS)) goto _output_error; + + if (length+MINMATCH <= (size_t)(dest-(char*)ref)) + { + ref = dictEnd - (dest-(char*)ref); + memcpy(op, ref, length+MINMATCH); + op += length+MINMATCH; + } + else + { + size_t copySize = (size_t)(dest-(char*)ref); + memcpy(op, dictEnd - copySize, copySize); + op += copySize; + copySize = length+MINMATCH - copySize; + if (copySize > (size_t)((char*)op-dest)) + { + BYTE* const cpy = op + copySize; + const BYTE* ref = (BYTE*)dest; + while (op < cpy) *op++ = *ref++; + } + else + { + memcpy(op, dest, copySize); + op += copySize; + } + } + continue; + } + /* copy repeated sequence */ if (unlikely((op-ref)<(int)STEPSIZE)) { @@ -814,12 +853,9 @@ FORCE_INLINE int LZ4_decompress_generic( op[1] = ref[1]; op[2] = ref[2]; op[3] = ref[3]; - /*op += 4, ref += 4; ref -= dec32table[op-ref]; + op += 4, ref += 4; ref -= dec32table[op-ref]; A32(op) = A32(ref); - op += STEPSIZE-4; ref -= dec64;*/ - ref += dec32table[op-ref]; - A32(op+4) = A32(ref); - op += STEPSIZE; ref -= dec64; + op += STEPSIZE-4; ref -= dec64; } else { LZ4_COPYSTEP(op,ref); } cpy = op + length - (STEPSIZE-4); @@ -847,34 +883,53 @@ _output_error: } -int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, noDict, NULL, 0); +} + +int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, partial, targetOutputSize, noDict, NULL, 0); +} + +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { - return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0); + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, NULL, 0); } -int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize) +int LZ4_decompress_safe_withDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) { - return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0); + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withExtDict, dictStart, dictSize); } -int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize) +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) { - return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize); + return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, NULL, 0); } -int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize) +int LZ4_decompress_fast_withDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) { - return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0); + return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withExtDict, dictStart, dictSize); } -int LZ4_decompress_fast(const char* source, char* dest, int outputSize) +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { #ifdef _MSC_VER /* This version is faster with Visual */ - return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0); + return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, noDict, NULL, 0); #else - return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0); + return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, NULL, 0); #endif } + + +/************************************** + Obsolete Functions +**************************************/ +/* +These functions are deprecated and should no longer be used. +They are provided here for compatibility with existing user programs. +*/ int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } diff --git a/lz4.h b/lz4.h index 3bf0a48..4759b2c 100644 --- a/lz4.h +++ b/lz4.h @@ -42,8 +42,8 @@ extern "C" { Version **************************************/ #define LZ4_VERSION_MAJOR 1 /* for major interface/format changes */ -#define LZ4_VERSION_MINOR 1 /* for minor interface/format changes */ -#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_MINOR 2 /* for minor interface/format changes */ +#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ /************************************** @@ -59,7 +59,7 @@ extern "C" { **************************************/ int LZ4_compress (const char* source, char* dest, int inputSize); -int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize); +int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxOutputSize); /* LZ4_compress() : @@ -128,7 +128,7 @@ int LZ4_decompress_fast (const char* source, char* dest, int originalSize); /* LZ4_decompress_safe_partial() : - This function decompress a compressed block of size 'inputSize' at position 'source' + This function decompress a compressed block of size 'compressedSize' at position 'source' into output buffer 'dest' of size 'maxOutputSize'. The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, reducing decompression time. @@ -138,7 +138,7 @@ LZ4_decompress_safe_partial() : If the source stream is detected malformed, the function will stop decoding and return a negative result. This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets */ -int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize); +int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize); /* @@ -150,7 +150,7 @@ Note that tables must be aligned on 4-bytes boundaries, otherwise compression wi The allocated memory can be provided to the compressions functions using 'void* state' parameter. LZ4_compress_withState() and LZ4_compress_limitedOutput_withState() are equivalent to previously described functions. -They just use the externally allocated memory area instead of allocating their own (on stack, or on heap). +They just use the externally allocated memory area instead of allocating their own one (on stack, or on heap). */ int LZ4_sizeofState(void); int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); @@ -167,7 +167,7 @@ char* LZ4_slideInputBuffer (void* LZ4_Data); int LZ4_free (void* LZ4_Data); /* -These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks. +These functions allow the compression of chained blocks, where each block benefits from prior 64 KB within preceding blocks. In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function : void* LZ4_create (const char* inputBuffer); @@ -196,11 +196,8 @@ When compression is completed, a call to LZ4_free() will release the memory used */ -int LZ4_sizeofStreamState(void); -int LZ4_resetStreamState(void* state, const char* inputBuffer); - /* -These functions achieve the same result as : +The following functions achieve the same result as : void* LZ4_create (const char* inputBuffer); They are provided here to allow the user program to allocate memory using its own routines. @@ -219,17 +216,33 @@ The same space can be re-used multiple times, just by initializing it each time return value of LZ4_resetStreamState() must be 0 is OK. Any other value means there was an error (typically, pointer is not aligned on 4-bytes boundaries). */ +int LZ4_sizeofStreamState(void); +int LZ4_resetStreamState(void* state, const char* inputBuffer); -int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize); -int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize); - /* *_withPrefix64k() : - These decoding functions work the same as their "normal name" versions, - but can use up to 64KB of data in front of 'char* dest'. - These functions are necessary to decode inter-dependant blocks. + These decoding functions work the same as their "normal" versions, + but can also use up to 64KB of data in front of 'char* dest' + to decode chained blocks. + The last 64KB of previous block must be present there. */ +int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int compressedSize, int maxOutputSize); +int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int originalSize); + + +/************************************** + Experimental Functions +**************************************/ +/* +*_withDict() : + These decoding functions work the same as their "normal" versions, + but can also use up to 64KB of dictionary data + to decode chained blocks. +*/ +int LZ4_decompress_safe_withDict (const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize); +int LZ4_decompress_fast_withDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize); + /************************************** diff --git a/programs/Makefile b/programs/Makefile index 88924ee..6dba83c 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -64,10 +64,10 @@ lz4c : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c bench.c xxhash.c lz4io.c lz4cli.c lz4c32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c bench.c xxhash.c lz4io.c lz4cli.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) -fuzzer : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c fuzzer.c +fuzzer : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fuzzer.c $(CC) $(FLAGS) $^ -o $@$(EXT) -fuzzer32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c fuzzer.c +fuzzer32: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fuzzer.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) fullbench : $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c xxhash.c fullbench.c diff --git a/programs/fullbench.c b/programs/fullbench.c index 6304029..8f01c02 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -118,8 +118,8 @@ #define MAX_MEM (1984<<20) #define DEFAULT_CHUNKSIZE (4<<20) -#define ALL_COMPRESSORS -1 -#define ALL_DECOMPRESSORS -1 +#define ALL_COMPRESSORS 0 +#define ALL_DECOMPRESSORS 0 //************************************** @@ -250,9 +250,9 @@ static U64 BMK_GetFileSize(char* infilename) } -//********************************************************* -// Public function -//********************************************************* +/********************************************************* + Benchmark function +*********************************************************/ static inline int local_LZ4_compress_limitedOutput(const char* in, char* out, int inSize) { @@ -457,7 +457,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) void* (*initFunction)(const char*) = NULL; double bestTime = 100000000.; - if ((compressionAlgo != ALL_COMPRESSORS) && (compressionAlgo != cAlgNb)) continue; + if ((compressionAlgo != ALL_COMPRESSORS) && (compressionAlgo != cAlgNb+1)) continue; switch(cAlgNb) { @@ -532,7 +532,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) int (*decompressionFunction)(const char*, char*, int, int); double bestTime = 100000000.; - if ((decompressionAlgo != ALL_DECOMPRESSORS) && (decompressionAlgo != dAlgNb)) continue; + if ((decompressionAlgo != ALL_DECOMPRESSORS) && (decompressionAlgo != dAlgNb+1)) continue; switch(dAlgNb) { @@ -629,8 +629,8 @@ int usage(char* exename) int usage_advanced() { DISPLAY( "\nAdvanced options :\n"); - DISPLAY( " -c# : test only compression function # [%c-%c]\n", MINCOMPRESSIONCHAR, MAXCOMPRESSIONCHAR); - DISPLAY( " -d# : test only decompression function # [%c-%c]\n", MINDECOMPRESSIONCHAR, MAXDECOMPRESSIONCHAR); + DISPLAY( " -c# : test only compression function # [1-%i]\n", NB_COMPRESSION_ALGORITHMS); + DISPLAY( " -d# : test only decompression function # [1-%i]\n", NB_DECOMPRESSION_ALGORITHMS); DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); DISPLAY( " -B# : Block size [4-7](default : 7)\n"); //DISPLAY( " -BD : Block dependency (improve compression ratio)\n"); @@ -679,15 +679,23 @@ int main(int argc, char** argv) // Select compression algorithm only case 'c': decompressionTest = 0; - if ((argument[1]>= MINCOMPRESSIONCHAR) && (argument[1]<= MAXCOMPRESSIONCHAR)) - compressionAlgo = argument[1] - '0', argument++; + while ((argument[1]>= '0') && (argument[1]<= '9')) + { + compressionAlgo *= 10; + compressionAlgo += argument[1] - '0'; + argument++; + } break; // Select decompression algorithm only case 'd': compressionTest = 0; - if ((argument[1]>= MINDECOMPRESSIONCHAR) && (argument[1]<= MAXDECOMPRESSIONCHAR)) - decompressionAlgo = argument[1] - '0', argument++; + while ((argument[1]>= '0') && (argument[1]<= '9')) + { + decompressionAlgo *= 10; + decompressionAlgo += argument[1] - '0'; + argument++; + } break; // Display help on usage diff --git a/programs/fuzzer.c b/programs/fuzzer.c index ec1316f..7c2c671 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -1,6 +1,6 @@ /* fuzzer.c - Fuzzer test tool for LZ4 - Copyright (C) Yann Collet - Andrew Mahone 2012-2014 + Copyright (C) Yann Collet 2012-2014 GPL v2 License This program is free software; you can redistribute it and/or modify @@ -22,31 +22,54 @@ - LZ4 source repository : http://code.google.com/p/lz4/ */ -//************************************** -// Remove Visual warning messages -//************************************** +/************************************** + Remove Visual warning messages +**************************************/ #define _CRT_SECURE_NO_WARNINGS // fgets -//************************************** -// Includes -//************************************** +/************************************** + Includes +**************************************/ #include #include // fgets, sscanf #include // timeb #include // strcmp #include "lz4.h" #include "lz4hc.h" +#include "xxhash.h" + + +/************************************** + Basic Types +**************************************/ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif -//************************************** -// Constants -//************************************** +/************************************** + Constants +**************************************/ #ifndef LZ4_VERSION -# define LZ4_VERSION "" +# define LZ4_VERSION "rc118" #endif #define NB_ATTEMPTS (1<<16) +#define COMPRESSIBLE_NOISE_LENGTH (1 << 21) +#define FUZ_MAX_BLOCK_SIZE (1 << 17) +#define FUZ_MAX_DICT_SIZE (1 << 15) #define LEN ((1<<15)) #define SEQ_POW 2 #define NUM_SEQ (1 << SEQ_POW) @@ -60,9 +83,9 @@ #define PRIME3 3266489917U -//********************************************************* -// Functions -//********************************************************* +/********************************************************* + Fuzzer functions +*********************************************************/ static int FUZ_GetMilliStart() { struct timeb tb; @@ -84,18 +107,47 @@ static int FUZ_GetMilliSpan( int nTimeStart ) unsigned int FUZ_rand(unsigned int* src) { - *src = ((*src) * PRIME1) + PRIME2; + *src = XXH32(src, sizeof(src), 0); return *src; } -int test_canary(unsigned char *buf) +#define FUZ_RAND15BITS ((FUZ_rand(seed) >> 3) & 32767) +#define FUZ_RANDLENGTH ( ((FUZ_rand(seed) >> 7) & 3) ? (FUZ_rand(seed) % 14) : (FUZ_rand(seed) & 511) + 15) +void FUZ_fillCompressibleNoiseBuffer(void* buffer, int bufferSize, double proba, U32* seed) { - int i; - for (i = 0; i < 2048; i++) - if (buf[i] != buf[i + 2048]) - return 0; - return 1; + BYTE* BBuffer = (BYTE*)buffer; + int pos = 0; + U32 P32 = (U32)(32768 * proba); + + // First Byte + BBuffer[pos++] = (BYTE)(FUZ_rand(seed)); + + while (pos < bufferSize) + { + // Select : Literal (noise) or copy (within 64K) + if (FUZ_RAND15BITS < P32) + { + // Copy (within 64K) + int ref, d; + int length = FUZ_RANDLENGTH + 4; + int offset = FUZ_RAND15BITS + 1; + if (offset > pos) offset = pos; + if (pos + length > bufferSize) length = bufferSize - pos; + ref = pos - offset; + d = pos + length; + while (pos < d) BBuffer[pos++] = BBuffer[ref++]; + } + else + { + // Literal (noise) + int d; + int length = FUZ_RANDLENGTH; + if (pos + length > bufferSize) length = bufferSize - pos; + d = pos + length; + while (pos < d) BBuffer[pos++] = (BYTE)(FUZ_rand(seed) >> 5); + } + } } @@ -127,21 +179,25 @@ int main(int argc, char** argv) { unsigned long long bytes = 0; unsigned long long cbytes = 0; unsigned long long hcbytes = 0; - unsigned char buf[LEN]; - unsigned char testOut[LEN+1]; + unsigned long long ccbytes = 0; + void* CNBuffer; + char* compressedBuffer; + char* decodedBuffer; # define FUZ_max LZ4_COMPRESSBOUND(LEN) # define FUZ_avail ROUND_PAGE(FUZ_max) - const int off_full = FUZ_avail - FUZ_max; - unsigned char cbuf[FUZ_avail + PAGE_SIZE]; - unsigned int seed, randState, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart(); - int i, j, k, ret, len, lenHC, attemptNb; + unsigned int seed, randState, timestamp=FUZ_GetMilliStart(); + int ret, attemptNb; char userInput[30] = {0}; -# define FUZ_CHECKTEST(cond, message) if (cond) { printf("Test %i : %s : seed %u, cycle %i \n", testNb, message, seed, attemptNb); goto _output_error; } -# define FUZ_DISPLAYTEST testNb++; no_prompt ? 0 : printf("%2i\b\b", testNb); +# define FUZ_CHECKTEST(cond, ...) if (cond) { printf("Test %i : ", testNb); printf(__VA_ARGS__); \ + printf(" (seed %u, cycle %i) \n", seed, attemptNb); goto _output_error; } +# define FUZ_DISPLAYTEST testNb++; no_prompt ? 0 : printf("%2i\b\b", testNb); void* stateLZ4 = malloc(LZ4_sizeofState()); void* stateLZ4HC = malloc(LZ4_sizeofStateHC()); + void* LZ4continue; + U32 crcOrig, crcCheck; - printf("starting LZ4 fuzzer (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION); + // Get Seed + printf("Starting LZ4 fuzzer (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION); printf("Select an Initialisation number (default : random) : "); fflush(stdout); if ( no_prompt || fgets(userInput, sizeof userInput, stdin) ) @@ -154,12 +210,20 @@ int main(int argc, char** argv) { //FUZ_SecurityTest(); - for (i = 0; i < 2048; i++) - cbuf[FUZ_avail + i] = cbuf[FUZ_avail + 2048 + i] = FUZ_rand(&randState) >> 16; + // Create compressible test buffer + CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); + FUZ_fillCompressibleNoiseBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, 0.5, &randState); + compressedBuffer = malloc(LZ4_compressBound(FUZ_MAX_BLOCK_SIZE)); + decodedBuffer = malloc(LZ4_compressBound(FUZ_MAX_BLOCK_SIZE)); + // Test loop for (attemptNb = 0; attemptNb < NB_ATTEMPTS; attemptNb++) { int testNb = 0; + char* dict; + char* block; + int dictSize, blockSize, blockStart, compressedSize, HCcompressedSize; + int blockContinueCompressedSize; // note : promptThrottle is throtting stdout to prevent // Travis-CI's output limit (10MB) and false hangup detection. @@ -167,144 +231,237 @@ int main(int argc, char** argv) { if (!no_prompt || attemptNb == 0 || promptThrottle) { printf("\r%7i /%7i - ", attemptNb, NB_ATTEMPTS); - if (no_prompt) - { - fflush(stdout); - } + if (no_prompt) fflush(stdout); } - for (j = 0; j < NUM_SEQ; j++) { - seeds[j] = FUZ_rand(&randState) << 8; - seeds[j] ^= (FUZ_rand(&randState) >> 8) & 65535; - } - for (j = 0; j < LEN; j++) { - k = FUZ_rand(&randState); - if (j == 0 || NEW_SEQ(k)) - cur_seq = seeds[(FUZ_rand(&randState) >> 16) & SEQ_MSK]; - if (MOD_SEQ(k)) { - k = (FUZ_rand(&randState) >> 16) & SEQ_MSK; - seeds[k] = FUZ_rand(&randState) << 8; - seeds[k] ^= (FUZ_rand(&randState) >> 8) & 65535; - } - buf[j] = FUZ_rand(&cur_seq) >> 16; - } + // Select block to test + blockSize = FUZ_rand(&randState) % FUZ_MAX_BLOCK_SIZE; + blockStart = FUZ_rand(&randState) % (COMPRESSIBLE_NOISE_LENGTH - blockSize); + dictSize = FUZ_rand(&randState) % FUZ_MAX_DICT_SIZE; + if (dictSize > blockStart) dictSize = blockStart; + block = ((char*)CNBuffer) + blockStart; + dict = block - dictSize; + + /* Compression tests */ // Test compression HC - FUZ_DISPLAYTEST; // 1 - ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); - FUZ_CHECKTEST(ret==0, "LZ4_compressHC_limitedOutput() failed despite sufficient space"); - lenHC = ret; + FUZ_DISPLAYTEST; + ret = LZ4_compressHC(block, compressedBuffer, blockSize); + FUZ_CHECKTEST(ret==0, "LZ4_compressHC() failed"); + HCcompressedSize = ret; // Test compression HC using external state - FUZ_DISPLAYTEST; // 1 - ret = LZ4_compressHC_withStateHC(stateLZ4HC, (const char*)buf, (char*)&cbuf[off_full], LEN); + FUZ_DISPLAYTEST; + ret = LZ4_compressHC_withStateHC(stateLZ4HC, block, compressedBuffer, blockSize); FUZ_CHECKTEST(ret==0, "LZ4_compressHC_withStateHC() failed"); // Test compression using external state - FUZ_DISPLAYTEST; // 2 - ret = LZ4_compress_withState(stateLZ4, (const char*)buf, (char*)&cbuf[off_full], LEN); + FUZ_DISPLAYTEST; + ret = LZ4_compress_withState(stateLZ4, block, compressedBuffer, blockSize); FUZ_CHECKTEST(ret==0, "LZ4_compress_withState() failed"); // Test compression - FUZ_DISPLAYTEST; // 2 - ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); - FUZ_CHECKTEST(ret==0, "LZ4_compress_limitedOutput() failed despite sufficient space"); - len = ret; + FUZ_DISPLAYTEST; + ret = LZ4_compress(block, compressedBuffer, blockSize); + FUZ_CHECKTEST(ret==0, "LZ4_compress() failed"); + compressedSize = ret; + + /* Decompression tests */ + + crcOrig = XXH32(block, blockSize, 0); // Test decoding with output size being exactly what's necessary => must work - FUZ_DISPLAYTEST; // 3 - ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN); + FUZ_DISPLAYTEST; + ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize); FUZ_CHECKTEST(ret<0, "LZ4_decompress_fast failed despite correct space"); + FUZ_CHECKTEST(ret!=compressedSize, "LZ4_decompress_fast failed : did not fully read compressed data"); + crcCheck = XXH32(decodedBuffer, blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast corrupted decoded data"); // Test decoding with one byte missing => must fail - FUZ_DISPLAYTEST; // 4 - ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN-1); + FUZ_DISPLAYTEST; + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize-1); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast overrun specified output buffer"); // Test decoding with one byte too much => must fail FUZ_DISPLAYTEST; - ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN+1); + ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize+1); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large"); - // Test decoding with enough output size => must work + // Test decoding with output size exactly what's necessary => must work FUZ_DISPLAYTEST; - ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN+1); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize); FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space"); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size"); + crcCheck = XXH32(decodedBuffer, blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data"); - // Test decoding with output size being exactly what's necessary => must work + // Test decoding with more than enough output size => must work FUZ_DISPLAYTEST; - ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN); - FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space"); + decodedBuffer[blockSize] = 0; + decodedBuffer[blockSize+1] = 0; + ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize+1); + FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite amply sufficient space"); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data"); + //FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe wrote more than target size"); // well, is that an issue ? + FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size"); + crcCheck = XXH32(decodedBuffer, blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data"); // Test decoding with output size being one byte too short => must fail FUZ_DISPLAYTEST; - ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN-1); + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-1); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe overrun specified output buffer size"); + + // Test decoding with output size being 10 bytes too short => must fail + FUZ_DISPLAYTEST; + if (blockSize>10) + { + decodedBuffer[blockSize-10] = 0; + ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-10); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being 10 bytes too short"); + FUZ_CHECKTEST(decodedBuffer[blockSize-10], "LZ4_decompress_safe overrun specified output buffer size"); + } // Test decoding with input size being one byte too short => must fail FUZ_DISPLAYTEST; - ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len-1, LEN); - FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short"); + ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize-1, blockSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short (blockSize=%i, ret=%i, compressedSize=%i)", blockSize, ret, compressedSize); // Test decoding with input size being one byte too large => must fail FUZ_DISPLAYTEST; - ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len+1, LEN); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize+1, blockSize); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large"); - //if (ret>=0) { printf("Test 10 : decompression should have failed, due to input size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size"); // Test partial decoding with target output size being max/2 => must work FUZ_DISPLAYTEST; - ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN/2, LEN); + ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize/2, blockSize); FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); // Test partial decoding with target output size being just below max => must work FUZ_DISPLAYTEST; - ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN-3, LEN); + ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize-3, blockSize); FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); + /* Test Compression with limited output size */ + // Test compression with output size being exactly what's necessary (should work) FUZ_DISPLAYTEST; - ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, len); + ret = LZ4_compress_limitedOutput(block, compressedBuffer, blockSize, compressedSize); FUZ_CHECKTEST(ret==0, "LZ4_compress_limitedOutput() failed despite sufficient space"); - FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer"); // Test compression with output size being exactly what's necessary and external state (should work) - FUZ_DISPLAYTEST; // 2 - ret = LZ4_compress_limitedOutput_withState(stateLZ4, (const char*)buf, (char*)&cbuf[off_full], LEN, len); + FUZ_DISPLAYTEST; + ret = LZ4_compress_limitedOutput_withState(stateLZ4, block, compressedBuffer, blockSize, compressedSize); FUZ_CHECKTEST(ret==0, "LZ4_compress_limitedOutput_withState() failed despite sufficient space"); - FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer"); // Test HC compression with output size being exactly what's necessary (should work) FUZ_DISPLAYTEST; - ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, lenHC); + ret = LZ4_compressHC_limitedOutput(block, compressedBuffer, blockSize, HCcompressedSize); FUZ_CHECKTEST(ret==0, "LZ4_compressHC_limitedOutput() failed despite sufficient space"); // Test HC compression with output size being exactly what's necessary (should work) FUZ_DISPLAYTEST; - ret = LZ4_compressHC_limitedOutput_withStateHC(stateLZ4HC, (const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, lenHC); + ret = LZ4_compressHC_limitedOutput_withStateHC(stateLZ4HC, block, compressedBuffer, blockSize, HCcompressedSize); FUZ_CHECKTEST(ret==0, "LZ4_compressHC_limitedOutput_withStateHC() failed despite sufficient space"); // Test compression with just one missing byte into output buffer => must fail FUZ_DISPLAYTEST; - ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, len-1); - FUZ_CHECKTEST(ret, "compression overran output buffer"); - FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer"); + compressedBuffer[compressedSize-1] = 0; + ret = LZ4_compress_limitedOutput(block, compressedBuffer, blockSize, compressedSize-1); + FUZ_CHECKTEST(ret, "LZ4_compress_limitedOutput should have failed (output buffer too small by 1 byte)"); + FUZ_CHECKTEST(compressedBuffer[compressedSize-1], "LZ4_compress_limitedOutput overran output buffer") // Test HC compression with just one missing byte into output buffer => must fail FUZ_DISPLAYTEST; - ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, lenHC-1); - FUZ_CHECKTEST(ret, "HC compression overran output buffer"); + compressedBuffer[compressedSize-1] = 0; + ret = LZ4_compressHC_limitedOutput(block, compressedBuffer, blockSize, HCcompressedSize-1); + FUZ_CHECKTEST(ret, "LZ4_compressHC_limitedOutput should have failed (output buffer too small by 1 byte)"); + FUZ_CHECKTEST(compressedBuffer[compressedSize-1], "LZ4_compressHC_limitedOutput overran output buffer") + + /* Dictionary tests */ + + // Compress using dictionary + FUZ_DISPLAYTEST; + LZ4continue = LZ4_create (dict); + LZ4_compress_continue (LZ4continue, dict, compressedBuffer, dictSize); // Just to fill hash tables + blockContinueCompressedSize = LZ4_compress_continue (LZ4continue, block, compressedBuffer, blockSize); + FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_continue failed"); + LZ4_free (LZ4continue); + + // Decompress with dictionary as prefix + FUZ_DISPLAYTEST; + memcpy(decodedBuffer, dict, dictSize); + ret = LZ4_decompress_fast_withPrefix64k(compressedBuffer, decodedBuffer+dictSize, blockSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_withPrefix64k did not read all compressed block input"); + crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_withPrefix64k corrupted decoded data"); + + FUZ_DISPLAYTEST; + ret = LZ4_decompress_safe_withPrefix64k(compressedBuffer, decodedBuffer+dictSize, blockContinueCompressedSize, blockSize); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_withPrefix64k did not regenerate original data"); + crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_withPrefix64k corrupted decoded data"); + + // Decompress with dictionary as external + FUZ_DISPLAYTEST; + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_fast_withDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_withDict did not read all compressed block input"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_fast_withDict overrun specified output buffer size") + crcCheck = XXH32(decodedBuffer, blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_withDict corrupted decoded data"); + + FUZ_DISPLAYTEST; + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_safe_withDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_withDict did not regenerate original data"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_withDict overrun specified output buffer size") + crcCheck = XXH32(decodedBuffer, blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_withDict corrupted decoded data"); + + FUZ_DISPLAYTEST; + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_fast_withDict(compressedBuffer, decodedBuffer, blockSize-1, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast_withDict should have failed : wrong original size (-1 byte)"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast_withDict overrun specified output buffer size"); + + FUZ_DISPLAYTEST; + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_safe_withDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-1, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_withDict should have failed : not enough output size (-1 byte)"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_withDict overrun specified output buffer size"); + + FUZ_DISPLAYTEST; + if (blockSize > 10) + { + decodedBuffer[blockSize-10] = 0; + ret = LZ4_decompress_safe_withDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-10, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_withDict should have failed : not enough output size (-10 byte)"); + FUZ_CHECKTEST(decodedBuffer[blockSize-10], "LZ4_decompress_safe_withDict overrun specified output buffer size (-10 byte) (blockSize=%i)", blockSize); + } + - bytes += LEN; - cbytes += len; - hcbytes += lenHC; - FUZ_rand(&randState); + // Fill stats + bytes += blockSize; + cbytes += compressedSize; + hcbytes += HCcompressedSize; + ccbytes += blockContinueCompressedSize; } printf("\r%7i /%7i - ", attemptNb, NB_ATTEMPTS); printf("all tests completed successfully \n"); printf("compression ratio: %0.3f%%\n", (double)cbytes/bytes*100); printf("HC compression ratio: %0.3f%%\n", (double)hcbytes/bytes*100); + printf("ratio with dict: %0.3f%%\n", (double)ccbytes/bytes*100); if(!no_prompt) getchar(); return 0; -- cgit v0.12