From a2e93db805918a84908373a579c44732d3e6854e Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Thu, 18 Apr 2013 08:53:13 +0000 Subject: Added : function LZ4_compressHC_limitedOutput() Updated : LZ4 Streaming Format.odt, to version 1.4 New : LZ4c now supports Stream Checksum (default) and Skippable chunks Updated : Fuzzer, testing LZ4_compressHC_limitedOutput() git-svn-id: https://lz4.googlecode.com/svn/trunk@93 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- LZ4 Streaming Format.odt | Bin 80344 -> 90196 bytes Makefile | 2 +- fuzzer.c | 24 +++++++++-- lz4.c | 18 +++------ lz4.h | 10 ++--- lz4c.c | 103 ++++++++++++++++++++++++++++++++++------------- lz4hc.c | 40 ++++++++++++------ lz4hc.h | 16 +++++++- xxhash.c | 4 +- xxhash.h | 2 +- 10 files changed, 153 insertions(+), 66 deletions(-) diff --git a/LZ4 Streaming Format.odt b/LZ4 Streaming Format.odt index 62aeecb..614eb71 100644 Binary files a/LZ4 Streaming Format.odt and b/LZ4 Streaming Format.odt differ diff --git a/Makefile b/Makefile index 745888b..ea60d11 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ lz4c: lz4.c lz4hc.c bench.c xxhash.c lz4c.c lz4c32: lz4.c lz4hc.c bench.c xxhash.c lz4c.c $(CC) -m32 -Os -march=native $(CFLAGS) $^ -o $@$(EXT) -fuzzer : lz4.c fuzzer.c +fuzzer : lz4.c lz4hc.c fuzzer.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) clean: diff --git a/fuzzer.c b/fuzzer.c index 211c805..180f9e9 100644 --- a/fuzzer.c +++ b/fuzzer.c @@ -42,7 +42,7 @@ //************************************** // Constants //************************************** -#define NB_ATTEMPTS (1<<18) +#define NB_ATTEMPTS (1<<17) #define LEN ((1<<15)) #define SEQ_POW 2 #define NUM_SEQ (1 << SEQ_POW) @@ -99,7 +99,7 @@ int FUZ_SecurityTest() char* input; int i, r; - printf("Starting security tests (issue 52)..."); + printf("Starting overflow tests (issue 52)..."); input = (char*) malloc (20<<20); output = (char*) malloc (20<<20); input[0] = 0x0F; @@ -111,7 +111,7 @@ int FUZ_SecurityTest() free(input); free(output); - printf(" Completed (return = %i < 0)\n",r); + printf(" Passed (return = %i < 0)\n",r); return 0; } @@ -120,6 +120,7 @@ int FUZ_SecurityTest() int main() { unsigned long long bytes = 0; unsigned long long cbytes = 0; + unsigned long long hcbytes = 0; unsigned char buf[LEN]; unsigned char testOut[LEN+1]; # define FUZ_max LZ4_COMPRESSBOUND(LEN) @@ -127,7 +128,7 @@ int main() { const int off_full = FUZ_avail - FUZ_max; unsigned char cbuf[FUZ_avail + PAGE_SIZE]; unsigned int seed, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart(); - int i, j, k, ret, len; + int i, j, k, ret, len, lenHC; char userInput[30] = {0}; printf("starting LZ4 fuzzer\n"); @@ -166,6 +167,11 @@ int main() { buf[j] = FUZ_rand(&cur_seq) >> 16; } + // Test compression HC + ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); + if (ret == 0) { printf("HC compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + lenHC = ret; + // Test compression ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); if (ret == 0) { printf("compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } @@ -208,17 +214,27 @@ int main() { if (!test_canary(&cbuf[FUZ_avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d\n", seed, LEN, len); goto _output_error; } if (ret == 0) { printf("compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + // Test HC compression with output size being exactly what's necessary (should work) + ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, lenHC); + if (ret == 0) { printf("HC compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + // Test compression with just one missing byte into output buffer => must fail ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, len-1); if (ret) { printf("compression overran output buffer: seed %u, len %d, olen %d => ret %d", seed, LEN, len-1, ret); goto _output_error; } if (!test_canary(&cbuf[FUZ_avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d", seed, LEN, len-1); goto _output_error; } + // Test HC compression with just one missing byte into output buffer => must fail + ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, lenHC-1); + if (ret) { printf("HC compression overran output buffer: seed %u, len %d, olen %d => ret %d", seed, LEN, lenHC-1, ret); goto _output_error; } + bytes += LEN; cbytes += len; + hcbytes += lenHC; } printf("all tests completed successfully \n"); printf("compression ratio: %0.3f%%\n", (double)cbytes/bytes*100); + printf("HC compression ratio: %0.3f%%\n", (double)hcbytes/bytes*100); getchar(); return 0; diff --git a/lz4.c b/lz4.c index 2660f5d..2fac1a6 100644 --- a/lz4.c +++ b/lz4.c @@ -181,16 +181,10 @@ typedef struct _U64_S { U64 v; } U64_S; #define HASHTABLESIZE (1 << HASH_LOG) #define HASH_MASK (HASHTABLESIZE - 1) -// NOTCOMPRESSIBLE_DETECTIONLEVEL : -// Decreasing this value will make the algorithm skip faster data segments considered "incompressible" -// This may decrease compression ratio dramatically, but will be faster on incompressible data -// Increasing this value will make the algorithm search more before declaring a segment "incompressible" -// This could improve compression a bit, but will be slower on incompressible data -// The default value (6) is recommended -#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 +#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 // Increasing this value will make the algorithm slower on incompressible data #define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2) #define STACKLIMIT 13 -#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()). +#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()). #define COPYLENGTH 8 #define LASTLITERALS 5 #define MFLIMIT (COPYLENGTH+MINMATCH) @@ -338,7 +332,7 @@ static inline int LZ4_NbCommonBytes (register U32 val) static inline int LZ4_compressCtx(void** ctx, const char* source, char* dest, - int isize, + int inputSize, int maxOutputSize) { #if HEAPMODE @@ -351,7 +345,7 @@ static inline int LZ4_compressCtx(void** ctx, const BYTE* ip = (BYTE*) source; INITBASE(base); const BYTE* anchor = ip; - const BYTE* const iend = ip + isize; + const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; #define matchlimit (iend - LASTLITERALS) @@ -364,7 +358,7 @@ static inline int LZ4_compressCtx(void** ctx, // Init - if (isize (U32)maxOutputSize) return 0; + if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } else *op++ = (BYTE)(lastRun<>20)); + if (streamChecksum) XXH32_update(streamChecksumState, in_buff, inSize); // Compress Block - outSize = compressionFunction(in_buff, out_buff+4, inSize); - if (outSize < inSize) compressedfilesize += outSize+4; else compressedfilesize += inSize+4; + outSize = compressionFunction(in_buff, out_buff+4, inSize, inSize-1); + if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += inSize+4; if (blockChecksum) compressedfilesize+=4; if (displayLevel) DISPLAY("Read : %i MB ==> %.2f%%\r", (int)(filesize>>20), (double)compressedfilesize/filesize*100); // Write Block - if (outSize < inSize) + if (outSize > 0) { unsigned int checksum; int sizeToWrite; @@ -417,6 +431,14 @@ int compress_file(char* input_filename, char* output_filename, int compressionle sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write end of stream"); compressedfilesize += 4; + if (streamChecksum) + { + unsigned int checksum = XXH32_digest(streamChecksumState); + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum"); + compressedfilesize += 4; + } // Status end = clock(); @@ -497,7 +519,8 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) int decodedBytes; unsigned int maxBlockSize; size_t sizeCheck; - int blockChecksumFlag; + int blockChecksumFlag, streamChecksumFlag; + void* streamChecksumState=NULL; // Decode stream descriptor nbReadBytes = fread(descriptor, 1, 3, finput); @@ -506,7 +529,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) int version = (descriptor[0] >> 6) & _2BITS; int independance = (descriptor[0] >> 5) & _1BIT; int streamSize = (descriptor[0] >> 3) & _1BIT; - int reserved1 = (descriptor[0] >> 1) & _2BITS; + int reserved1 = (descriptor[0] >> 1) & _1BIT; int dictionary = (descriptor[0] >> 0) & _1BIT; int reserved2 = (descriptor[1] >> 7) & _1BIT; @@ -516,6 +539,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) int checkBits_xxh32; blockChecksumFlag = (descriptor[0] >> 4) & _1BIT; + streamChecksumFlag= (descriptor[0] >> 2) & _1BIT; if (version != 1) EXM_THROW(62, "Wrong version number"); if (independance != 1) EXM_THROW(63, "Does not support block inter-dependence"); @@ -537,6 +561,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) in_buff = (char*)malloc(maxBlockSize); out_buff = (char*)malloc(maxBlockSize); if (!in_buff || !out_buff) EXM_THROW(70, "Allocation error : not enough memory"); + if (streamChecksumFlag) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); // Main Loop while (1) @@ -561,10 +586,10 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) { unsigned int checksum = XXH32(in_buff, blockSize, LZ4S_CHECKSUM_SEED); unsigned int readChecksum; - nbReadBytes = fread(&readChecksum, 1, 4, finput); - if( nbReadBytes != 4 ) EXM_THROW(74, "Read error : cannot read next block size"); + sizeCheck = fread(&readChecksum, 1, 4, finput); + if( sizeCheck != 4 ) EXM_THROW(74, "Read error : cannot read next block size"); readChecksum = LITTLE_ENDIAN_32(readChecksum); // Convert to little endian - if (checksum != readChecksum) EXM_THROW(75, "Error : invalid checksum detected"); + if (checksum != readChecksum) EXM_THROW(75, "Error : invalid block checksum detected"); } if (uncompressedFlag) @@ -573,6 +598,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) sizeCheck = fwrite(in_buff, 1, blockSize, foutput); if (sizeCheck != (size_t)blockSize) EXM_THROW(76, "Write error : cannot write data block"); filesize += blockSize; + if (streamChecksumFlag) XXH32_update(streamChecksumState, in_buff, blockSize); } else { @@ -580,6 +606,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) decodedBytes = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, blockSize, maxBlockSize); if (decodedBytes < 0) EXM_THROW(77, "Decoding Failed ! Corrupted input detected !"); filesize += decodedBytes; + if (streamChecksumFlag) XXH32_update(streamChecksumState, out_buff, decodedBytes); // Write Block sizeCheck = fwrite(out_buff, 1, decodedBytes, foutput); @@ -587,6 +614,17 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) } } + // Stream Checksum + if (streamChecksumFlag) + { + unsigned int checksum = XXH32_digest(streamChecksumState); + unsigned int readChecksum; + sizeCheck = fread(&readChecksum, 1, 4, finput); + if (sizeCheck != 4) EXM_THROW(74, "Read error : cannot read stream checksum"); + readChecksum = LITTLE_ENDIAN_32(readChecksum); // Convert to little endian + if (checksum != readChecksum) EXM_THROW(75, "Error : invalid stream checksum detected"); + } + // Free free(in_buff); free(out_buff); @@ -597,7 +635,8 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) unsigned long long selectDecoder( FILE* finput, FILE* foutput) { - unsigned int magicNumber; + unsigned int magicNumber, size; + int errorNb; size_t nbReadBytes; // Check Archive Header @@ -605,6 +644,8 @@ unsigned long long selectDecoder( FILE* finput, FILE* foutput) if (nbReadBytes==0) return 0; // EOF if (nbReadBytes != MAGICNUMBER_SIZE) EXM_THROW(41, "Unrecognized header : Magic Number unreadable"); magicNumber = LITTLE_ENDIAN_32(magicNumber); // Convert to Little Endian format + if ((magicNumber & LZ4S_SKIPPABLEMASK) == LZ4S_SKIPPABLE0) magicNumber = LZ4S_SKIPPABLE0; // fold skippable magic numbers + switch(magicNumber) { case LZ4S_MAGICNUMBER: @@ -612,8 +653,15 @@ unsigned long long selectDecoder( FILE* finput, FILE* foutput) case LEGACY_MAGICNUMBER: DISPLAY("Detected : Legacy format \n"); return decodeLegacyStream(finput, foutput); + case LZ4S_SKIPPABLE0: + nbReadBytes = fread(&size, 1, 4, finput); + if (nbReadBytes != 4) EXM_THROW(42, "Stream error : skippable size unreadable"); + size = LITTLE_ENDIAN_32(size); // Convert to Little Endian format + errorNb = fseek(finput, size, SEEK_CUR); + if (errorNb != 0) EXM_THROW(43, "Stream error : cannot skip skippable area"); + return selectDecoder(finput, foutput); default: - if (ftell(finput) == MAGICNUMBER_SIZE) EXM_THROW(42,"Unrecognized header : file cannot be decoded"); + if (ftell(finput) == MAGICNUMBER_SIZE) EXM_THROW(44,"Unrecognized header : file cannot be decoded"); // Wrong magic number at the beginning of 1st stream DISPLAY("Stream followed by unrecognized data\n"); return 0; } @@ -691,14 +739,15 @@ int main(int argc, char** argv) switch(argument[0]) { // Display help on usage - case 'H': usage(exename); return 0; + case 'H': usage(exename); usage_advanced(); return 0; // Compression (default) case 'c': if ((argument[1] >='0') && (argument[1] <='1')) { cLevel=argument[1] - '0'; argument++; } break; case 'h': if (argument[1]=='c') { cLevel=1; argument++; } break; - // disable checksum - case 'x': blockChecksum=0; break; + // Checksum management + case 'x': blockChecksum=1; break; + case 'n': if (argument[1]=='x') { streamChecksum=0; argument++; break; } else { badusage(exename); return 1; } // Use Legacy format (hidden option) case 'l': legacy_format=1; break; diff --git a/lz4hc.c b/lz4hc.c index e3b73d8..99c7770 100644 --- a/lz4hc.c +++ b/lz4hc.c @@ -116,6 +116,7 @@ #include // calloc, free #include // memset, memcpy #include "lz4hc.h" +#include "lz4.h" #define ALLOCATOR(s) calloc(1,s) #define FREEMEM free @@ -499,7 +500,7 @@ _endCount: } -forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref) +forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int matchLength, const BYTE* ref, BYTE* oend) { int length, len; BYTE* token; @@ -507,6 +508,7 @@ forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE // Encode Literal length length = (int)(*ip - *anchor); token = (*op)++; + if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; // Check output limit if (length>=(int)RUN_MASK) { *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<>8) > oend) return 1; // Check output limit if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } else *token += (BYTE)len; // Prepare next loop - *ip += ml; + *ip += matchLength; *anchor = *ip; return 0; @@ -536,15 +539,17 @@ forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx, const char* source, char* dest, - int isize) + int inputSize, + int maxOutputSize) { const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; - const BYTE* const iend = ip + isize; + const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; const BYTE* const matchlimit = (iend - LASTLITERALS); BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; int ml, ml2, ml3, ml0; const BYTE* ref=NULL; @@ -575,7 +580,7 @@ _Search2: if (ml2 == ml) // No better match { - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); + if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0; continue; } @@ -627,9 +632,9 @@ _Search3: // ip & ref are known; Now for ml if (start2 < ip+ml) ml = (int)(start2 - ip); // Now, encode 2 sequences - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); + if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0; ip = start2; - LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2); + if (LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2, oend)) return 0; continue; } @@ -651,7 +656,7 @@ _Search3: } } - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); + if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0; ip = start3; ref = ref3; ml = ml3; @@ -690,7 +695,7 @@ _Search3: ml = (int)(start2 - ip); } } - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); + if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0; ip = start2; ref = ref2; @@ -707,6 +712,7 @@ _Search3: // Encode Last Literals { int lastRun = (int)(iend - anchor); + if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } else *op++ = (BYTE)(lastRun<