From 27efcd4d4582add02884698ddca3c2cb96a281c5 Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Mon, 12 Aug 2013 08:35:52 +0000 Subject: Removed dependency to "lz4_encoder.h" and "lz4hc_encoder.h" Improved speed of LZ4_decompress_fast() with GCC Improved speed of LZ4_decompress_safe() for 32-bits Made the fast LZ4 compression compatible with low-memory systems (buffer address < 64K). Thanks Francois Gretief for report and suggestion. Makefile : added fuzzer32 Makefile : added fullbench32 fullbench : added ability to select one specific function to benchmark lz4.c : copy macros follow memcpy() arguments convention Small coding style modifications, hinted by cppCheck. git-svn-id: https://lz4.googlecode.com/svn/trunk@101 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- Makefile | 10 +- bench.c | 106 ++++++++------- fullbench.c | 109 +++++++++------ fuzzer.c | 41 ++++-- lz4.c | 408 ++++++++++++++++++++++++++++++-------------------------- lz4.h | 2 +- lz4_encoder.h | 262 ------------------------------------ lz4c.c | 21 ++- lz4hc.c | 314 +++++++++++++++++++++++++++++++++++++------ lz4hc_encoder.h | 349 ------------------------------------------------ 10 files changed, 665 insertions(+), 957 deletions(-) delete mode 100644 lz4_encoder.h delete mode 100644 lz4hc_encoder.h diff --git a/Makefile b/Makefile index aafa94f..995b387 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ endif default: lz4c -all: lz4c lz4cs lz4c32 fuzzer fullbench +all: lz4c lz4cs lz4c32 fuzzer fuzzer32 fullbench fullbench32 lz4c: lz4.c lz4hc.c bench.c xxhash.c lz4c.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) @@ -24,8 +24,14 @@ lz4c32: lz4.c lz4hc.c bench.c xxhash.c lz4c.c fuzzer : lz4.c lz4hc.c fuzzer.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) +fuzzer32 : lz4.c lz4hc.c fuzzer.c + $(CC) -m32 -O3 $(CFLAGS) $^ -o $@$(EXT) + fullbench : lz4.c lz4hc.c xxhash.c fullbench.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) +fullbench32 : lz4.c lz4hc.c xxhash.c fullbench.c + $(CC) -m32 -O3 $(CFLAGS) $^ -o $@$(EXT) + clean: - rm -f core *.o lz4c$(EXT) lz4cs$(EXT) lz4c32$(EXT) fuzzer$(EXT) fullbench$(EXT) + rm -f core *.o lz4c$(EXT) lz4cs$(EXT) lz4c32$(EXT) fuzzer$(EXT) fullbench$(EXT) fullbench32$(EXT) diff --git a/bench.c b/bench.c index 07fd091..016a46e 100644 --- a/bench.c +++ b/bench.c @@ -246,17 +246,7 @@ static U64 BMK_GetFileSize(char* infilename) int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) { int fileIdx=0; - FILE* fileIn; - char* infilename; - U64 largefilesize; - size_t benchedSize; - int nbChunks; - int maxCChunkSize; - size_t readSize; char* orig_buff; - char* compressed_buff; int compressed_buff_size; - struct chunkParameters* chunkP; - U32 crcc, crcd=0; struct compressionParameters compP; U64 totals = 0; @@ -281,40 +271,51 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) // Loop for each file while (fileIdx largefilesize) benchedSize = (size_t)largefilesize; - if (benchedSize < largefilesize) + inFileSize = BMK_GetFileSize(inFileName); + benchedSize = (size_t) BMK_findMaxMem(inFileSize) / 2; + if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) { - DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20)); + DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20)); } // Alloc chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters)); orig_buff = (char*)malloc((size_t )benchedSize); nbChunks = (int) (benchedSize / chunkSize) + 1; - maxCChunkSize = LZ4_compressBound(chunkSize); - compressed_buff_size = nbChunks * maxCChunkSize; - compressed_buff = (char*)malloc((size_t )compressed_buff_size); + maxCompressedChunkSize = LZ4_compressBound(chunkSize); + compressedBuffSize = nbChunks * maxCompressedChunkSize; + compressedBuffer = (char*)malloc((size_t )compressedBuffSize); - if (!orig_buff || !compressed_buff) + if (!orig_buff || !compressedBuffer) { DISPLAY("\nError: not enough memory!\n"); free(orig_buff); - free(compressed_buff); + free(compressedBuffer); free(chunkP); - fclose(fileIn); + fclose(inFile); return 12; } @@ -323,51 +324,54 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) int i; size_t remaining = benchedSize; char* in = orig_buff; - char* out = compressed_buff; + char* out = compressedBuffer; for (i=0; i chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; } - chunkP[i].compressedBuffer = out; out += maxCChunkSize; + chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize; chunkP[i].compressedSize = 0; } } // Fill input buffer - DISPLAY("Loading %s... \r", infilename); - readSize = fread(orig_buff, 1, benchedSize, fileIn); - fclose(fileIn); + DISPLAY("Loading %s... \r", inFileName); + readSize = fread(orig_buff, 1, benchedSize, inFile); + fclose(inFile); if (readSize != benchedSize) { - DISPLAY("\nError: problem reading file '%s' !! \n", infilename); + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); free(orig_buff); - free(compressed_buff); + free(compressedBuffer); free(chunkP); return 13; } // Calculating input Checksum - crcc = XXH32(orig_buff, (unsigned int)benchedSize,0); + crcOrig = XXH32(orig_buff, (unsigned int)benchedSize,0); // Bench { - int loopNb, nb_loops, chunkNb; + int loopNb, chunkNb; size_t cSize=0; - int milliTime; double fastestC = 100000000., fastestD = 100000000.; double ratio=0.; + U32 crcCheck=0; DISPLAY("\r%79s\r", ""); for (loopNb = 1; loopNb <= nbIterations; loopNb++) { + int nbLoops; + int milliTime; + // Compression - DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedSize); - { size_t i; for (i=0; i\r", loopNb, inFileName, (int)benchedSize); + { size_t i; for (i=0; i %9i (%5.2f%%),%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000.); + DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000.); // Decompression { size_t i; for (i=0; i %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime/nbLoops; + DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); // CRC Checking - crcd = XXH32(orig_buff, (unsigned int)benchedSize,0); - if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; } + crcCheck = XXH32(orig_buff, (unsigned int)benchedSize,0); + if (crcOrig!=crcCheck) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOrig, (unsigned)crcCheck); break; } } - if (crcc==crcd) + if (crcOrig==crcCheck) { if (ratio<100.) - DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); else - DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); } totals += benchedSize; totalz += cSize; @@ -428,7 +432,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) } free(orig_buff); - free(compressed_buff); + free(compressedBuffer); free(chunkP); } diff --git a/fullbench.c b/fullbench.c index 66aa0ed..779e1ba 100644 --- a/fullbench.c +++ b/fullbench.c @@ -116,6 +116,9 @@ #define MAX_MEM (1984<<20) #define DEFAULT_CHUNKSIZE (4<<20) +#define ALL_COMPRESSORS -1 +#define ALL_DECOMPRESSORS -1 + //************************************** // Local structures @@ -145,6 +148,8 @@ static int nbIterations = NBLOOPS; static int BMK_pause = 0; static int compressionTest = 1; static int decompressionTest = 1; +static int compressionAlgo = ALL_COMPRESSORS; +static int decompressionAlgo = ALL_DECOMPRESSORS; void BMK_SetBlocksize(int bsize) { @@ -277,22 +282,16 @@ static inline int local_LZ4_decompress_safe_partial(const char* in, char* out, i int fullSpeedBench(char** fileNamesTable, int nbFiles) { int fileIdx=0; - FILE* fileIn; - char* infilename; - U64 largefilesize; - size_t benchedSize; - int nbChunks; - int maxCChunkSize; - size_t readSize; char* orig_buff; - char* compressed_buff; int compressed_buff_size; - struct chunkParameters* chunkP; - U32 crcc, crcd=0; # define NB_COMPRESSION_ALGORITHMS 4 +# define MINCOMPRESSIONCHAR '0' +# define MAXCOMPRESSIONCHAR '3' static char* compressionNames[] = { "LZ4_compress", "LZ4_compress_limitedOutput", "LZ4_compressHC", "LZ4_compressHC_limitedOutput" }; double totalCTime[NB_COMPRESSION_ALGORITHMS] = {0}; double totalCSize[NB_COMPRESSION_ALGORITHMS] = {0}; # define NB_DECOMPRESSION_ALGORITHMS 5 +# define MINDECOMPRESSIONCHAR '0' +# define MAXDECOMPRESSIONCHAR '4' static char* decompressionNames[] = { "LZ4_decompress_fast", "LZ4_decompress_fast_withPrefix64k", "LZ4_decompress_safe", "LZ4_decompress_safe_withPrefix64k", "LZ4_decompress_safe_partial" }; double totalDTime[NB_DECOMPRESSION_ALGORITHMS] = {0}; @@ -302,31 +301,42 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) // Loop for each file while (fileIdx largefilesize) benchedSize = (size_t)largefilesize; - if (benchedSize < largefilesize) + inFileSize = BMK_GetFileSize(inFileName); + benchedSize = (size_t) BMK_findMaxMem(inFileSize) / 2; + if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) { - DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20)); + DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20)); } // Alloc chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters)); orig_buff = (char*) malloc((size_t)benchedSize); nbChunks = (int) (benchedSize / chunkSize) + 1; - maxCChunkSize = LZ4_compressBound(chunkSize); - compressed_buff_size = nbChunks * maxCChunkSize; - compressed_buff = (char*)malloc((size_t)compressed_buff_size); + maxCompressedChunkSize = LZ4_compressBound(chunkSize); + compressedBuffSize = nbChunks * maxCompressedChunkSize; + compressed_buff = (char*)malloc((size_t)compressedBuffSize); if(!orig_buff || !compressed_buff) @@ -335,7 +345,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) free(orig_buff); free(compressed_buff); free(chunkP); - fclose(fileIn); + fclose(inFile); return 12; } @@ -350,19 +360,19 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) chunkP[i].id = i; chunkP[i].origBuffer = in; in += chunkSize; if ((int)remaining > chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; } - chunkP[i].compressedBuffer = out; out += maxCChunkSize; + chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize; chunkP[i].compressedSize = 0; } } // Fill input buffer - DISPLAY("Loading %s... \r", infilename); - readSize = fread(orig_buff, 1, benchedSize, fileIn); - fclose(fileIn); + DISPLAY("Loading %s... \r", inFileName); + readSize = fread(orig_buff, 1, benchedSize, inFile); + fclose(inFile); if(readSize != benchedSize) { - DISPLAY("\nError: problem reading file '%s' !! \n", infilename); + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); free(orig_buff); free(compressed_buff); free(chunkP); @@ -370,7 +380,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) } // Calculating input Checksum - crcc = XXH32(orig_buff, (unsigned int)benchedSize,0); + crcOriginal = XXH32(orig_buff, (unsigned int)benchedSize,0); // Bench @@ -380,7 +390,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) double ratio=0.; DISPLAY("\r%79s\r", ""); - DISPLAY(" %s : \n", infilename); + DISPLAY(" %s : \n", inFileName); // Compression Algorithms for (cAlgNb=0; (cAlgNb < NB_COMPRESSION_ALGORITHMS) && (compressionTest); cAlgNb++) @@ -389,6 +399,8 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) int (*compressionFunction)(const char*, char*, int); double bestTime = 100000000.; + if ((compressionAlgo != ALL_COMPRESSORS) && (compressionAlgo != cAlgNb)) continue; + switch(cAlgNb) { case 0: compressionFunction = LZ4_compress; break; @@ -452,6 +464,8 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) int (*decompressionFunction)(const char*, char*, int, int); double bestTime = 100000000.; + if ((decompressionAlgo != ALL_DECOMPRESSORS) && (decompressionAlgo != dAlgNb)) continue; + switch(dAlgNb) { case 0: decompressionFunction = local_LZ4_decompress_fast; break; @@ -466,8 +480,9 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) { double averageTime; int milliTime; + U32 crcDecoded; - DISPLAY("%1i-%-19.19s : %9i ->\r", loopNb, dName, (int)benchedSize); + DISPLAY("%1i-%-24.24s :%10i ->\r", loopNb, dName, (int)benchedSize); nb_loops = 0; milliTime = BMK_GetMilliStart(); @@ -487,13 +502,14 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) averageTime = (double)milliTime / nb_loops; if (averageTime < bestTime) bestTime = averageTime; - DISPLAY("%1i-%-19.19s : %9i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.); + DISPLAY("%1i-%-24.24s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.); + + // CRC Checking + crcDecoded = XXH32(orig_buff, (int)benchedSize, 0); + if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); } } - // CRC Checking - crcd = XXH32(orig_buff, (int)benchedSize, 0); - if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); exit(1); } - DISPLAY("%-21.21s : %9i -> %7.1f MB/s\n", dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.); + DISPLAY("%-26.26s :%10i -> %7.1f MB/s\n", dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.); totalDTime[dAlgNb] += bestTime; } @@ -514,11 +530,13 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) for (AlgNb = 0; (AlgNb < NB_COMPRESSION_ALGORITHMS) && (compressionTest); AlgNb ++) { char* cName = compressionNames[AlgNb]; + if ((compressionAlgo != ALL_COMPRESSORS) && (compressionAlgo != AlgNb)) continue; DISPLAY("%-21.21s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s\n", cName, (long long unsigned int)totals, (long long unsigned int)totalCSize[AlgNb], (double)totalCSize[AlgNb]/(double)totals*100., (double)totals/totalCTime[AlgNb]/1000.); } for (AlgNb = 0; (AlgNb < NB_DECOMPRESSION_ALGORITHMS) && (decompressionTest); AlgNb ++) { char* dName = decompressionNames[AlgNb]; + if ((decompressionAlgo != ALL_DECOMPRESSORS) && (decompressionAlgo != AlgNb)) continue; DISPLAY("%-21.21s :%10llu -> %6.1f MB/s\n", dName, (long long unsigned int)totals, (double)totals/totalDTime[AlgNb]/1000.); } } @@ -536,16 +554,18 @@ int usage(char* exename) DISPLAY( "Arguments :\n"); DISPLAY( " -c : compression tests only\n"); DISPLAY( " -d : decompression tests only\n"); - DISPLAY( " -H : Help (this text + advanced options)\n"); + DISPLAY( " -H/-h : Help (this text + advanced options)\n"); return 0; } int usage_advanced() { DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -c# : test only compression function # [%c-%c]\n", MINCOMPRESSIONCHAR, MAXCOMPRESSIONCHAR); + DISPLAY( " -d# : test only compression function # [%c-%c]\n", MINDECOMPRESSIONCHAR, MAXDECOMPRESSIONCHAR); + DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); DISPLAY( " -B# : Block size [4-7](default : 7)\n"); //DISPLAY( " -BD : Block dependency (improve compression ratio)\n"); - DISPLAY( " -i# : iteration loops [1-9](default : 6)\n"); return 0; } @@ -584,12 +604,21 @@ int main(int argc, char** argv) switch(argument[0]) { // Select compression algorithm only - case 'c': decompressionTest = 0; break; + case 'c': + decompressionTest = 0; + if ((argument[1]>= MINCOMPRESSIONCHAR) && (argument[1]<= MAXCOMPRESSIONCHAR)) + compressionAlgo = argument[1] - '0', argument++; + break; // Select decompression algorithm only - case 'd': compressionTest = 0; break; + case 'd': + compressionTest = 0; + if ((argument[1]>= MINDECOMPRESSIONCHAR) && (argument[1]<= MAXDECOMPRESSIONCHAR)) + decompressionAlgo = argument[1] - '0', argument++; + break; // Display help on usage + case 'h' : case 'H': usage(exename); usage_advanced(); return 0; // Modify Block Properties diff --git a/fuzzer.c b/fuzzer.c index a10b222..fdbc3c0 100644 --- a/fuzzer.c +++ b/fuzzer.c @@ -101,7 +101,7 @@ int FUZ_SecurityTest() char* input; int i, r; - printf("Overflow test (issue 52)..."); + printf("Overflow test (issue 52)...\n"); input = (char*) malloc (20<<20); output = (char*) malloc (20<<20); input[0] = 0x0F; @@ -132,7 +132,8 @@ int main() { unsigned int seed, randState, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart(); int i, j, k, ret, len, lenHC, attemptNb; char userInput[30] = {0}; -# define FUZ_CHECKTEST(cond, message) testNb++; if (cond) { printf("Test %i : %s : seed %u, cycle %i \n", testNb, message, seed, attemptNb); goto _output_error; } +# define FUZ_CHECKTEST(cond, message) if (cond) { printf("Test %i : %s : seed %u, cycle %i \n", testNb, message, seed, attemptNb); goto _output_error; } +# define FUZ_DISPLAYTEST testNb++; printf("%2i\b\b", testNb); printf("starting LZ4 fuzzer\n"); printf("Select an Initialisation number (default : random) : "); @@ -145,7 +146,7 @@ int main() { printf("Seed = %u\n", seed); randState = seed; - FUZ_SecurityTest(); + //FUZ_SecurityTest(); for (i = 0; i < 2048; i++) cbuf[FUZ_avail + i] = cbuf[FUZ_avail + 2048 + i] = FUZ_rand(&randState) >> 16; @@ -154,7 +155,7 @@ int main() { { int testNb = 0; - printf("\r%7i /%7i\r", attemptNb, NB_ATTEMPTS); + printf("\r%7i /%7i - ", attemptNb, NB_ATTEMPTS); for (j = 0; j < NUM_SEQ; j++) { seeds[j] = FUZ_rand(&randState) << 8; @@ -173,71 +174,87 @@ int main() { } // Test compression HC + FUZ_DISPLAYTEST; // 1 ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); FUZ_CHECKTEST(ret==0, "HC compression failed despite sufficient space"); lenHC = ret; // Test compression + FUZ_DISPLAYTEST; // 2 ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); FUZ_CHECKTEST(ret==0, "compression failed despite sufficient space"); len = ret; // Test decoding with output size being exactly what's necessary => must work + FUZ_DISPLAYTEST; // 3 ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN); - FUZ_CHECKTEST(ret<0, "decompression failed despite correct space"); + FUZ_CHECKTEST(ret<0, "LZ4_decompress_fast failed despite correct space"); // Test decoding with one byte missing => must fail + FUZ_DISPLAYTEST; // 4 ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN-1); - FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to Output Size being too small"); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small"); // Test decoding with one byte too much => must fail + FUZ_DISPLAYTEST; ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN+1); - FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to Output Size being too large"); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large"); // Test decoding with enough output size => must work + FUZ_DISPLAYTEST; ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN+1); - FUZ_CHECKTEST(ret<0, "decompression failed despite sufficient space"); + FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space"); // Test decoding with output size being exactly what's necessary => must work + FUZ_DISPLAYTEST; ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN); - FUZ_CHECKTEST(ret<0, "decompression failed despite sufficient space"); + FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space"); // Test decoding with output size being one byte too short => must fail + FUZ_DISPLAYTEST; ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN-1); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short"); // Test decoding with input size being one byte too short => must fail + FUZ_DISPLAYTEST; ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len-1, LEN); FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short"); // Test decoding with input size being one byte too large => must fail + FUZ_DISPLAYTEST; ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len+1, LEN); - FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to input size being too large"); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large"); //if (ret>=0) { printf("Test 10 : decompression should have failed, due to input size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; } // Test partial decoding with target output size being max/2 => must work + FUZ_DISPLAYTEST; ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN/2, LEN); - FUZ_CHECKTEST(ret<0, "partial decompression failed despite sufficient space"); + FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); // Test partial decoding with target output size being just below max => must work + FUZ_DISPLAYTEST; ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN-3, LEN); - FUZ_CHECKTEST(ret<0, "partial decompression failed despite sufficient space"); + FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space"); // Test compression with output size being exactly what's necessary (should work) + FUZ_DISPLAYTEST; ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, len); FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer"); FUZ_CHECKTEST(ret==0, "compression failed despite sufficient space"); // Test HC compression with output size being exactly what's necessary (should work) + FUZ_DISPLAYTEST; ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, lenHC); FUZ_CHECKTEST(ret==0, "HC compression failed despite sufficient space"); // Test compression with just one missing byte into output buffer => must fail + FUZ_DISPLAYTEST; ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, len-1); FUZ_CHECKTEST(ret, "compression overran output buffer"); FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer"); // Test HC compression with just one missing byte into output buffer => must fail + FUZ_DISPLAYTEST; ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, lenHC-1); FUZ_CHECKTEST(ret, "HC compression overran output buffer"); diff --git a/lz4.c b/lz4.c index 779d68a..549edad 100644 --- a/lz4.c +++ b/lz4.c @@ -31,10 +31,6 @@ - LZ4 source repository : http://code.google.com/p/lz4/ */ -/* -Note : this source file requires "lz4_encoder.h" -*/ - //************************************** // Tuning parameters //************************************** @@ -46,10 +42,8 @@ Note : this source file requires "lz4_encoder.h" #define MEMORY_USAGE 14 // HEAPMODE : -// Select how default compression function will allocate memory for its hash table, +// Select how default compression functions will allocate memory for their hash table, // in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)). -// Default allocation strategy is to use stack (HEAPMODE 0) -// Note : explicit functions *_stack* and *_heap* are unaffected by this setting #define HEAPMODE 0 @@ -113,7 +107,7 @@ Note : this source file requires "lz4_encoder.h" #endif #ifdef _MSC_VER // Visual Studio -# define forceinline static __forceinline +# define FORCE_INLINE static __forceinline # include // For Visual 2005 # if LZ4_ARCH64 // 64-bits # pragma intrinsic(_BitScanForward64) // For Visual 2005 @@ -125,9 +119,9 @@ Note : this source file requires "lz4_encoder.h" # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant #else # ifdef __GNUC__ -# define forceinline static inline __attribute__((always_inline)) +# define FORCE_INLINE static inline __attribute__((always_inline)) # else -# define forceinline static inline +# define FORCE_INLINE static inline # endif #endif @@ -175,6 +169,8 @@ Note : this source file requires "lz4_encoder.h" typedef unsigned long long U64; #endif +typedef const BYTE* Ptr; + #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) # define _PACKED __attribute__ ((packed)) #else @@ -232,15 +228,15 @@ typedef struct {size_t v;} _PACKED size_t_S; // Architecture-specific macros //************************************** #define STEPSIZE sizeof(size_t) -#define LZ4_COPYSTEP(s,d) { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; } -#define LZ4_COPY8(s,d) { LZ4_COPYSTEP(s,d); if (STEPSIZE<8) LZ4_COPYSTEP(s,d); } -#define LZ4_SECURECOPY(s,d,e) { if ((STEPSIZE==8)&&(d=e; //**************************** @@ -265,7 +260,7 @@ typedef struct {size_t v;} _PACKED size_t_S; //**************************** #if LZ4_ARCH64 -forceinline int LZ4_NbCommonBytes (register U64 val) +FORCE_INLINE int LZ4_NbCommonBytes (register U64 val) { # if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -297,7 +292,7 @@ forceinline int LZ4_NbCommonBytes (register U64 val) #else -forceinline int LZ4_NbCommonBytes (register U32 val) +FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) { # if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -329,201 +324,234 @@ forceinline int LZ4_NbCommonBytes (register U32 val) #endif - -//****************************** +//**************************** // Compression functions -//****************************** - -/* -int LZ4_compress_stack( - const char* source, - char* dest, - int inputSize) - -Compress 'inputSize' bytes from 'source' into an output buffer 'dest'. -Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize). -return : the number of bytes written in buffer 'dest' -*/ -#define FUNCTION_NAME LZ4_compress_stack -#include "lz4_encoder.h" +//**************************** +#define LZ4_HASHLOG (MEMORY_USAGE-2) +typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive; +typedef enum { byPtr, byU32, byU16 } tableType_t; +FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType) +{ + if (tableType == byU16) + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} -/* -int LZ4_compress_stack_limitedOutput( - const char* source, - char* dest, - int inputSize, - int maxOutputSize) +FORCE_INLINE int LZ4_hashPosition(Ptr p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); } -Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. -If it cannot achieve it, compression will stop, and result of the function will be zero. -return : the number of bytes written in buffer 'dest', or 0 if the compression fails -*/ -#define FUNCTION_NAME LZ4_compress_stack_limitedOutput -#define LIMITED_OUTPUT -#include "lz4_encoder.h" +FORCE_INLINE void LZ4_putPositionOnHash(Ptr p, U32 h, void* tableBase, tableType_t tableType, Ptr srcBase) +{ + switch (tableType) + { + case byPtr: { Ptr* hashTable = (Ptr*) tableBase; hashTable[h] = p; break; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = p-srcBase; break; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; } + } +} +FORCE_INLINE void LZ4_putPosition(Ptr p, void* tableBase, tableType_t tableType, Ptr srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} -/* -int LZ4_compress64k_stack( - const char* source, - char* dest, - int inputSize) +FORCE_INLINE Ptr LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, Ptr srcBase) +{ + if (tableType == byPtr) { Ptr* hashTable = (Ptr*) tableBase; return hashTable[h]; } + if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } + { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } // default, to ensure a return +} -Compress 'inputSize' bytes from 'source' into an output buffer 'dest'. -This function compresses better than LZ4_compress_stack(), on the condition that -'inputSize' must be < to LZ4_64KLIMIT, or the function will fail. -Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize). -return : the number of bytes written in buffer 'dest', or 0 if compression fails -*/ -#define FUNCTION_NAME LZ4_compress64k_stack -#define COMPRESS_64K -#include "lz4_encoder.h" +FORCE_INLINE Ptr LZ4_getPosition(Ptr p, void* tableBase, tableType_t tableType, Ptr srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} -/* -int LZ4_compress64k_stack_limitedOutput( +FORCE_INLINE int LZ4_compress_generic( + void* ctx, const char* source, char* dest, int inputSize, - int maxOutputSize) + int maxOutputSize, -Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. -This function compresses better than LZ4_compress_stack_limitedOutput(), on the condition that -'inputSize' must be < to LZ4_64KLIMIT, or the function will fail. -If it cannot achieve it, compression will stop, and result of the function will be zero. -return : the number of bytes written in buffer 'dest', or 0 if the compression fails -*/ -#define FUNCTION_NAME LZ4_compress64k_stack_limitedOutput -#define COMPRESS_64K -#define LIMITED_OUTPUT -#include "lz4_encoder.h" + limitedOutput_directive limitedOutput, + tableType_t tableType) +{ + Ptr ip = (Ptr) source; + Ptr const base = (Ptr) source; + Ptr anchor = (Ptr) source; + Ptr const iend = ip + inputSize; + Ptr const mflimit = iend - MFLIMIT; + Ptr const matchlimit = iend - LASTLITERALS; + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; -/* -void* LZ4_createHeapMemory(); -int LZ4_freeHeapMemory(void* ctx); + int length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; -Used to allocate and free hashTable memory -to be used by the LZ4_compress_heap* family of functions. -LZ4_createHeapMemory() returns NULL is memory allocation fails. -*/ -void* LZ4_create() { return malloc(HASHTABLESIZE); } -int LZ4_free(void* ctx) { free(ctx); return 0; } + // Init conditions + if (inputSize=LZ4_64KLIMIT)) return 0; // Size too large (not within 64K limit) + // First Byte + LZ4_putPosition(ip, ctx, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); -/* -int LZ4_compress_heap( - void* ctx, - const char* source, - char* dest, - int inputSize) + // Main Loop + for ( ; ; ) + { + int findMatchAttempts = (1U << skipStrength) + 3; + Ptr forwardIp = ip; + Ptr ref; + BYTE* token; + + // Find a match + do { + U32 h = forwardH; + int step = findMatchAttempts++ >> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if unlikely(forwardIp > mflimit) { goto _last_literals; } + + forwardH = LZ4_hashPosition(forwardIp, tableType); + ref = LZ4_getPositionOnHash(h, ctx, tableType, base); + LZ4_putPositionOnHash(ip, h, ctx, tableType, base); + + } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip))); + + // Catch up + while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; } + + // Encode Literal length + length = (int)(ip - anchor); + token = op++; + if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend)) return 0; // Check output limit + if (length>=(int)RUN_MASK) + { + int len = length-RUN_MASK; + *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(length<>8) > oend)) return 0; // Check output limit + if (length>=(int)ML_MASK) + { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length-=255; *op++ = 255; } + *op++ = (BYTE)length; + } + else *token += (BYTE)(length); + // Test end of chunk + if (ip > mflimit) { anchor = ip; break; } -/* -int LZ4_compress64k_heap( - void* ctx, - const char* source, - char* dest, - int inputSize) + // Fill table + LZ4_putPosition(ip-2, ctx, tableType, base); -Compress 'inputSize' bytes from 'source' into an output buffer 'dest'. -The memory used for compression must be created by LZ4_createHeapMemory() and provided by pointer 'ctx'. -'inputSize' must be < to LZ4_64KLIMIT, or the function will fail. -Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize). -return : the number of bytes written in buffer 'dest' -*/ -#define FUNCTION_NAME LZ4_compress64k_heap -#define COMPRESS_64K -#define USE_HEAPMEMORY -#include "lz4_encoder.h" + // Test next position + ref = LZ4_getPosition(ip, ctx, tableType, base); + LZ4_putPosition(ip, ctx, tableType, base); + if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } + // Prepare next loop + anchor = ip++; + forwardH = LZ4_hashPosition(ip, tableType); + } -/* -int LZ4_compress64k_heap_limitedOutput( - void* ctx, - const char* source, - char* dest, - int inputSize, - int maxOutputSize) +_last_literals: + // Encode Last Literals + { + int lastRun = (int)(iend - anchor); + if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; // Check output limit + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun< compression not done - if (inputSize < LZ4_64KLIMIT) - result = LZ4_compress64k_heap(ctx, source, dest, inputSize); - else result = LZ4_compress_heap(ctx, source, dest, inputSize); - LZ4_free(ctx); - return result; +#if (HEAPMODE) + void* ctx = calloc(1U<<(MEMORY_USAGE-2), 4); // Aligned on 4-bytes boundaries #else - if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack(source, dest, inputSize); - return LZ4_compress_stack(source, dest, inputSize); + U32 ctx[1U<<(MEMORY_USAGE-2)] = {0}; // Ensure data is aligned on 4-bytes boundaries +#endif + int result; + + if (inputSize < (int)LZ4_64KLIMIT) + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16); + else + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr); + +#if (HEAPMODE) + free(ctx); #endif + return result; } int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { -#if HEAPMODE - void* ctx = LZ4_create(); - int result; - if (ctx == NULL) return 0; // Failed allocation => compression not done - if (inputSize < LZ4_64KLIMIT) - result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize); - else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, inputSize, maxOutputSize); - LZ4_free(ctx); - return result; +#if (HEAPMODE) + void* ctx = calloc(1U<<(MEMORY_USAGE-2), 4); // Aligned on 4-bytes boundaries #else - if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress64k_stack_limitedOutput(source, dest, inputSize, maxOutputSize); - return LZ4_compress_stack_limitedOutput(source, dest, inputSize, maxOutputSize); + U32 ctx[1U<<(MEMORY_USAGE-2)] = {0}; // Ensure data is aligned on 4-bytes boundaries +#endif + int result; + + if (inputSize < (int)LZ4_64KLIMIT) + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16); + else + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr); + +#if (HEAPMODE) + free(ctx); #endif + return result; } + //**************************** // Decompression functions //**************************** @@ -537,7 +565,7 @@ typedef enum { full = 0, partial = 1 } earlyEnd_directive; // It shall be instanciated several times, using different sets of directives // Note that it is essential this generic function is really inlined, // in order to remove useless branches during compilation optimisation. -forceinline int LZ4_decompress_generic( +FORCE_INLINE int LZ4_decompress_generic( const char* source, char* dest, int inputSize, // @@ -550,9 +578,9 @@ forceinline int LZ4_decompress_generic( ) { // Local Variables - const BYTE* restrict ip = (const BYTE*) source; - const BYTE* ref; - const BYTE* const iend = ip + inputSize; + Ptr restrict ip = (Ptr) source; + Ptr ref; + Ptr const iend = ip + inputSize; BYTE* op = (BYTE*) dest; BYTE* const oend = op + outputSize; @@ -609,7 +637,7 @@ forceinline int LZ4_decompress_generic( op += length; break; // Necessarily EOF, due to parsing restrictions } - LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; + LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; // get offset LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; @@ -618,7 +646,7 @@ forceinline int LZ4_decompress_generic( // get matchlength if ((length=(token&ML_MASK)) == ML_MASK) { - for ( ; (!endOnInput) || (ipoend-(COPYLENGTH)-(STEPSIZE-4)) + if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4)) { if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals - LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); + LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH)); while(op> ((MINMATCH*8)-HASHLOG)) -#define LZ4_HASHVALUE(p) LZ4_HASH(A32(p)) - - - -//**************************** -// Function code -//**************************** - -int FUNCTION_NAME( -#ifdef USE_HEAPMEMORY - void* ctx, -#endif - const char* source, - char* dest, - int inputSize -#ifdef LIMITED_OUTPUT - ,int maxOutputSize -#endif - ) -{ -#ifdef USE_HEAPMEMORY - CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx; -#else - CURRENT_H_TYPE HashTable[HASHTABLE_NBCELLS] = {0}; -#endif - - const BYTE* ip = (BYTE*) source; - CURRENTBASE(base); - const BYTE* anchor = ip; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE* op = (BYTE*) dest; -#ifdef LIMITED_OUTPUT - BYTE* const oend = op + maxOutputSize; -#endif - - int length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - - // Init - if (inputSize=LZ4_64KLIMIT) return 0; // Size too large (not within 64K limit) -#endif -#ifdef USE_HEAPMEMORY - memset((void*)HashTable, 0, HASHTABLESIZE); -#endif - - // First Byte - HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); - ip++; forwardH = LZ4_HASHVALUE(ip); - - // Main Loop - for ( ; ; ) - { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE* forwardIp = ip; - const BYTE* ref; - BYTE* token; - - // Find a match - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if unlikely(forwardIp > mflimit) { goto _last_literals; } - - forwardH = LZ4_HASHVALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = (CURRENT_H_TYPE)(ip - base); - - } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); - - // Catch up - while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; } - - // Encode Literal length - length = (int)(ip - anchor); - token = op++; -#ifdef LIMITED_OUTPUT - if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit -#endif - if (length>=(int)RUN_MASK) - { - int len = length-RUN_MASK; - *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (BYTE)(length<>8) > oend) return 0; // Check output limit -#endif - if (length>=(int)ML_MASK) - { - *token += ML_MASK; - length -= ML_MASK; - for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } - if (length >= 255) { length-=255; *op++ = 255; } - *op++ = (BYTE)length; - } - else *token += (BYTE)(length); - - // Test end of chunk - if (ip > mflimit) { anchor = ip; break; } - - // Fill table - HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base); - - // Test next position - ref = base + HashTable[LZ4_HASHVALUE(ip)]; - HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); - if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } - - // Prepare next loop - anchor = ip++; - forwardH = LZ4_HASHVALUE(ip); - } - -_last_literals: - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); -#ifdef LIMITED_OUTPUT - if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit -#endif - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (BYTE)(lastRun< 0) { - unsigned int checksum; int sizeToWrite; * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize); if (blockChecksum) { - checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED); + unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED); * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum); } sizeToWrite = 4 + outSize + (4*blockChecksum); @@ -462,7 +461,6 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i } else // Copy Original { - unsigned int checksum; * (unsigned int*) out_buff = LITTLE_ENDIAN_32(inSize|0x80000000); // Add Uncompressed flag sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header"); @@ -470,7 +468,7 @@ int compress_file_blockDependency(char* input_filename, char* output_filename, i if (sizeCheck!=(size_t)(inSize)) EXM_THROW(35, "Write error : cannot write block"); if (blockChecksum) { - checksum = XXH32(in_start, inSize, LZ4S_CHECKSUM_SEED); + unsigned int checksum = XXH32(in_start, inSize, LZ4S_CHECKSUM_SEED); * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum"); @@ -589,12 +587,11 @@ int compress_file(char* input_filename, char* output_filename, int compressionle // Write Block if (outSize > 0) { - unsigned int checksum; int sizeToWrite; * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize); if (blockChecksum) { - checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED); + unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED); * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum); } sizeToWrite = 4 + outSize + (4*blockChecksum); @@ -603,7 +600,6 @@ int compress_file(char* input_filename, char* output_filename, int compressionle } else // Copy Original Uncompressed { - unsigned int checksum; * (unsigned int*) out_buff = LITTLE_ENDIAN_32(((unsigned long)readSize)|0x80000000); // Add Uncompressed flag sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header"); @@ -611,7 +607,7 @@ int compress_file(char* input_filename, char* output_filename, int compressionle if (sizeCheck!=readSize) EXM_THROW(35, "Write error : cannot write block"); if (blockChecksum) { - checksum = XXH32(in_buff, (int)readSize, LZ4S_CHECKSUM_SEED); + unsigned int checksum = XXH32(in_buff, (int)readSize, LZ4S_CHECKSUM_SEED); * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); sizeCheck = fwrite(out_buff, 1, 4, foutput); if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum"); @@ -661,10 +657,7 @@ unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput) unsigned long long filesize = 0; char* in_buff; char* out_buff; - size_t uselessRet; - int sinkint; unsigned int blockSize; - size_t sizeCheck; // Allocate Memory @@ -675,6 +668,10 @@ unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput) // Main Loop while (1) { + size_t uselessRet; + int sinkint; + size_t sizeCheck; + // Block Size uselessRet = fread(&blockSize, 1, 4, finput); if( uselessRet==0 ) break; // Nothing to read : file read is completed diff --git a/lz4hc.c b/lz4hc.c index 8f2b422..79fd3f7 100644 --- a/lz4hc.c +++ b/lz4hc.c @@ -31,11 +31,6 @@ - LZ4 source repository : http://code.google.com/p/lz4/ */ -/* -Note : this source file requires "lz4hc_encoder.h" -*/ - - //************************************** // Memory routines //************************************** @@ -100,7 +95,7 @@ Note : this source file requires "lz4hc_encoder.h" #endif #ifdef _MSC_VER // Visual Studio -# define forceinline static __forceinline +# define FORCE_INLINE static __forceinline # include // For Visual 2005 # if LZ4_ARCH64 // 64-bits # pragma intrinsic(_BitScanForward64) // For Visual 2005 @@ -113,9 +108,9 @@ Note : this source file requires "lz4hc_encoder.h" # pragma warning(disable : 4701) // disable: C4701: potentially uninitialized local variable used #else # ifdef __GNUC__ -# define forceinline static inline __attribute__((always_inline)) +# define FORCE_INLINE static inline __attribute__((always_inline)) # else -# define forceinline static inline +# define FORCE_INLINE static inline # endif #endif @@ -273,7 +268,7 @@ typedef struct //************************************** #if LZ4_ARCH64 -forceinline int LZ4_NbCommonBytes (register U64 val) +FORCE_INLINE int LZ4_NbCommonBytes (register U64 val) { #if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -305,7 +300,7 @@ forceinline int LZ4_NbCommonBytes (register U64 val) #else -forceinline int LZ4_NbCommonBytes (register U32 val) +FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) { #if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -337,7 +332,7 @@ forceinline int LZ4_NbCommonBytes (register U32 val) #endif -forceinline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base) +FORCE_INLINE int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base) { MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); @@ -365,7 +360,7 @@ int LZ4_freeHC (void* LZ4HC_Data) // Update chains up to ip (excluded) -forceinline void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) +FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) { U16* chainTable = hc4->chainTable; HTYPE* HashTable = hc4->hashTable; @@ -403,7 +398,7 @@ char* LZ4_slideInputBufferHC(void* LZ4HC_Data) } -forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) +FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) { const BYTE* p1t = p1; @@ -421,7 +416,7 @@ forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYT } -forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos) +FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos) { U16* const chainTable = hc4->chainTable; HTYPE* const HashTable = hc4->hashTable; @@ -489,7 +484,7 @@ forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const B } -forceinline int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos) +FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos) { U16* const chainTable = hc4->chainTable; HTYPE* const HashTable = hc4->hashTable; @@ -548,37 +543,276 @@ _endCount: } +typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; -//************************************** -// Compression functions -//************************************** +FORCE_INLINE int LZ4HC_encodeSequence ( + const BYTE** ip, + BYTE** op, + const BYTE** anchor, + int matchLength, + const BYTE* ref, + limitedOutput_directive limitedOutputBuffer, + BYTE* oend) +{ + int length; + BYTE* token; -/* -int LZ4_compressHC( - const char* source, - char* dest, - int inputSize) + // Encode Literal length + length = (int)(*ip - *anchor); + token = (*op)++; + if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; // Check output limit + if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } + else *token = (BYTE)(length<>8) > oend)) return 1; // Check output limit + if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } + else *token += (BYTE)(length); + + // Prepare next loop + *ip += matchLength; + *anchor = *ip; + + return 0; +} + + +static int LZ4HC_compress_generic ( + void* ctxvoid, + const char* source, char* dest, int inputSize, - int maxOutputSize) + int maxOutputSize, + limitedOutput_directive limit + ) +{ + LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid; + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int ml, ml2, ml3, ml0; + const BYTE* ref=NULL; + const BYTE* start2=NULL; + const BYTE* ref2=NULL; + const BYTE* start3=NULL; + const BYTE* ref3=NULL; + const BYTE* start0; + const BYTE* ref0; + + + // Ensure blocks follow each other + if (ip != ctx->end) return 0; + ctx->end += inputSize; + + ip++; + + // Main Loop + while (ip < mflimit) + { + ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref)); + if (!ml) { ip++; continue; } -Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. -If it cannot achieve it, compression will stop, and result of the function will be zero. -return : the number of bytes written in buffer 'dest', or 0 if the compression fails -*/ -#define FUNCTION_NAME LZ4_compressHC_limitedOutput -#define LIMITED_OUTPUT -#include "lz4hc_encoder.h" + // saved, in case we would skip too much + start0 = ip; + ref0 = ref; + ml0 = ml; + +_Search2: + if (ip+ml < mflimit) + ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2); + else ml2 = ml; + + if (ml2 == ml) // No better match + { + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + continue; + } + + if (start0 < ip) + { + if (start2 < ip + ml0) // empirical + { + ip = start0; + ref = ref0; + ml = ml0; + } + } + + // Here, start0==ip + if ((start2 - ip) < 3) // First Match too small : removed + { + ml = ml2; + ip = start2; + ref =ref2; + goto _Search2; + } + +_Search3: + // Currently we have : + // ml2 > ml1, and + // ip1+3 <= ip2 (usually < ip1+ml1) + if ((start2 - ip) < OPTIMAL_ML) + { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; + if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) + { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) + + if (start2 + ml2 < mflimit) + ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3); + else ml3 = ml2; + + if (ml3 == ml2) // No better match : 2 sequences to encode + { + // ip & ref are known; Now for ml + if (start2 < ip+ml) ml = (int)(start2 - ip); + // Now, encode 2 sequences + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + ip = start2; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; + continue; + } + + if (start3 < ip+ml+3) // Not enough space for match 2 : remove it + { + if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 + { + if (start2 < ip+ml) + { + int correction = (int)(ip+ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) + { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _Search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _Search3; + } + + // OK, now we have 3 ascending matches; let's write at least the first one + // ip & ref are known; Now for ml + if (start2 < ip+ml) + { + if ((start2 - ip) < (int)ML_MASK) + { + int correction; + if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) + { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + else + { + ml = (int)(start2 - ip); + } + } + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _Search3; + + } + + // Encode Last Literals + { + int lastRun = (int)(iend - anchor); + if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; // Check output limit + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<>8)) > oend) return 1; // Check output limit -#endif - if (length>=(int)RUN_MASK) { *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } - else *token = (BYTE)(length<>8) > oend) return 1; // Check output limit -#endif - if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } - else *token += (BYTE)(length); - - // Prepare next loop - *ip += matchLength; - *anchor = *ip; - - return 0; -} - - -int COMBINED_NAME(FUNCTION_NAME,_continue) ( - void* ctxvoid, - const char* source, - char* dest, - int inputSize -#ifdef LIMITED_OUTPUT - ,int maxOutputSize -#endif - ) -{ - LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid; - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - - BYTE* op = (BYTE*) dest; -#ifdef LIMITED_OUTPUT - BYTE* const oend = op + maxOutputSize; -#endif - - int ml, ml2, ml3, ml0; - const BYTE* ref=NULL; - const BYTE* start2=NULL; - const BYTE* ref2=NULL; - const BYTE* start3=NULL; - const BYTE* ref3=NULL; - const BYTE* start0; - const BYTE* ref0; - - // Ensure blocks follow each other - if (ip != ctx->end) return 0; - ctx->end += inputSize; - - ip++; - - // Main Loop - while (ip < mflimit) - { - ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref)); - if (!ml) { ip++; continue; } - - // saved, in case we would skip too much - start0 = ip; - ref0 = ref; - ml0 = ml; - -_Search2: - if (ip+ml < mflimit) - ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2); - else ml2 = ml; - - if (ml2 == ml) // No better match - { - ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); - continue; - } - - if (start0 < ip) - { - if (start2 < ip + ml0) // empirical - { - ip = start0; - ref = ref0; - ml = ml0; - } - } - - // Here, start0==ip - if ((start2 - ip) < 3) // First Match too small : removed - { - ml = ml2; - ip = start2; - ref =ref2; - goto _Search2; - } - -_Search3: - // Currently we have : - // ml2 > ml1, and - // ip1+3 <= ip2 (usually < ip1+ml1) - if ((start2 - ip) < OPTIMAL_ML) - { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) - - if (start2 + ml2 < mflimit) - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3); - else ml3 = ml2; - - if (ml3 == ml2) // No better match : 2 sequences to encode - { - // ip & ref are known; Now for ml - if (start2 < ip+ml) ml = (int)(start2 - ip); - // Now, encode 2 sequences - ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); - ip = start2; - ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend); - continue; - } - - if (start3 < ip+ml+3) // Not enough space for match 2 : remove it - { - if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 - { - if (start2 < ip+ml) - { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) - { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; - } - - // OK, now we have 3 ascending matches; let's write at least the first one - // ip & ref are known; Now for ml - if (start2 < ip+ml) - { - if ((start2 - ip) < (int)ML_MASK) - { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - else - { - ml = (int)(start2 - ip); - } - } - ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); - - ip = start2; - ref = ref2; - ml = ml2; - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - - goto _Search3; - - } - - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); -#ifdef LIMITED_OUTPUT - if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit -#endif - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (BYTE)(lastRun<