From 7f436a1215f11b0fb872c34f088b8b5888d0630d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Mar 2015 02:20:42 +0100 Subject: lz4 cli supports frame content size --- NEWS | 5 +- programs/Makefile | 30 ++++---- programs/bench.c | 13 ++-- programs/lz4cli.c | 2 +- programs/lz4io.c | 207 +++++++++++++++++++++++++++++++----------------------- programs/lz4io.h | 2 + 6 files changed, 149 insertions(+), 110 deletions(-) diff --git a/NEWS b/NEWS index 3b16456..aea9cd1 100644 --- a/NEWS +++ b/NEWS @@ -1,13 +1,12 @@ r128: -New : lz4 cli sparse file support +New : lz4cli sparse file support New : command -m, to compress multiple files in a single command -New : lz4frame supports frame content size Fixed : Restored lz4hc compression ratio (was slightly lower since r124) New : lz4 cli supports long commands +New : lz4frame & lz4cli frame content size support New : lz4frame supports skippable frames Changed:Default "make install" directory is /usr/local New : lz4 cli supports "pass-through" mode -New : long commands New : g++ compatibility tests New : datagen can generate sparse files New : scan-build tests diff --git a/programs/Makefile b/programs/Makefile index f62adb1..6aade89 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -160,19 +160,13 @@ test-lz4-sparse: lz4 datagen test-lz4-contentSize: lz4 datagen @echo ---- test original size support ---- - ./datagen -g15M | ./lz4 -v | ./lz4 -t - ./datagen -g15M | ./lz4 -v --frame-content-size | ./lz4 -t + ./datagen -g15M > tmp + ./lz4 -v tmp | ./lz4 -t + ./lz4 -v --frame-content-size tmp | ./lz4 -d > tmp2 + diff -s tmp tmp2 + @rm tmp* -test-lz4: lz4 datagen test-lz4-sparse test-lz4-contentSize - @echo ---- test lz4 basic compression/decompression ---- - ./datagen -g16KB | ./lz4 -9 | ./lz4 -t - ./datagen | ./lz4 | ./lz4 -t - ./datagen -g6M -P99 | ./lz4 -9BD | ./lz4 -t - ./datagen -g17M | ./lz4 -9v | ./lz4 -tq - ./datagen -g33M | ./lz4 --no-frame-crc | ./lz4 -t - ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -t - ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -t - ./datagen -g6GB | ./lz4 -vq9BD | ./lz4 -t +test-lz4-frame-concatenation: lz4 datagen @echo ---- test frame concatenation ---- @echo -n > empty.test @echo hi > nonempty.test @@ -184,6 +178,18 @@ test-lz4: lz4 datagen test-lz4-sparse test-lz4-contentSize sdiff orig.test result.test @rm *.test @echo frame concatenation test completed + +test-lz4: lz4 datagen test-lz4-sparse test-lz4-contentSize test-lz4-frame-concatenation + @echo ---- test lz4 basic compression/decompression ---- + ./datagen -g0 | ./lz4 -v | ./lz4 -t + ./datagen -g16KB | ./lz4 -9 | ./lz4 -t + ./datagen | ./lz4 | ./lz4 -t + ./datagen -g6M -P99 | ./lz4 -9BD | ./lz4 -t + ./datagen -g17M | ./lz4 -9v | ./lz4 -tq + ./datagen -g33M | ./lz4 --no-frame-crc | ./lz4 -t + ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -t + ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -t + ./datagen -g6GB | ./lz4 -vq9BD | ./lz4 -t @echo ---- test multiple input files ---- @./datagen -s1 > file1 @./datagen -s2 > file2 diff --git a/programs/bench.c b/programs/bench.c index f0a27ef..e1b5357 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -26,9 +26,11 @@ /************************************** * Compiler Options ***************************************/ -/* Disable some Visual warning messages */ -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE /* VS2005 */ +#if defined(_MSC_VER) || defined(_WIN32) +# define _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_DEPRECATE /* VS2005 */ +# define BMK_LEGACY_TIMER 1 /* S_ISREG & gettimeofday() are not supported by MSVC */ +#endif /* Unix Large Files support (>4GB) */ #define _FILE_OFFSET_BITS 64 @@ -38,11 +40,6 @@ # define _LARGEFILE64_SOURCE #endif -/* S_ISREG & gettimeofday() are not supported by MSVC */ -#if defined(_MSC_VER) || defined(_WIN32) -# define BMK_LEGACY_TIMER 1 -#endif - /************************************** * Includes diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 7fe5c3f..6c57864 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -300,7 +300,7 @@ int main(int argc, char** argv) if (!strcmp(argument, "--stdout")) { forceStdout=1; output_filename=stdoutmark; displayLevel=1; continue; } if (!strcmp(argument, "--to-stdout")) { forceStdout=1; output_filename=stdoutmark; displayLevel=1; continue; } if (!strcmp(argument, "--no-frame-crc")) { LZ4IO_setStreamChecksumMode(0); continue; } - if (!strcmp(argument, "--frame-content-size")) { continue; } + if (!strcmp(argument, "--frame-content-size")) { LZ4IO_setContentSize(1); continue; } if (!strcmp(argument, "--sparse-support")) { LZ4IO_setSparseFile(1); continue; } if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; } if (!strcmp(argument, "--quiet")) { if (displayLevel) displayLevel--; continue; } diff --git a/programs/lz4io.c b/programs/lz4io.c index 8d356c1..f5c5e98 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -46,10 +46,12 @@ /***************************** * Includes *****************************/ -#include /* fprintf, fopen, fread, stdin, stdout */ -#include /* malloc, free */ -#include /* strcmp, strlen */ -#include /* clock */ +#include /* fprintf, fopen, fread, stdin, stdout */ +#include /* malloc, free */ +#include /* strcmp, strlen */ +#include /* clock */ +#include /* stat64 */ +#include /* stat64 */ #include "lz4io.h" #include "lz4.h" /* still required for legacy format */ #include "lz4hc.h" /* still required for legacy format */ @@ -126,6 +128,7 @@ static int g_blockChecksum = 0; static int g_streamChecksum = 1; static int g_blockIndependence = 1; static int g_sparseFileSupport = 0; +static int g_contentSizeFlag = 0; static const int minBlockSizeID = 4; static const int maxBlockSizeID = 7; @@ -209,6 +212,13 @@ int LZ4IO_setSparseFile(int enable) return g_sparseFileSupport; } +/* Default setting : 0 (disabled) */ +int LZ4IO_setContentSize(int enable) +{ + g_contentSizeFlag = (enable!=0); + return g_contentSizeFlag; +} + static unsigned LZ4IO_GetMilliSpan(clock_t nPrevious) { clock_t nCurrent = clock(); @@ -216,6 +226,20 @@ static unsigned LZ4IO_GetMilliSpan(clock_t nPrevious) return nSpan; } +static unsigned long long LZ4IO_GetFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); +#else + struct stat statbuf; + r = stat(infilename, &statbuf); +#endif + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (unsigned long long)statbuf.st_size; +} + /* ************************************************************************ ** ** ********************** LZ4 File / Pipe compression ********************* ** @@ -396,6 +420,11 @@ int LZ4IO_compressFilename(const char* input_filename, const char* output_filena prefs.frameInfo.blockMode = (blockMode_t)g_blockIndependence; prefs.frameInfo.blockSizeID = (blockSizeID_t)g_blockSizeId; prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_streamChecksum; + if (g_contentSizeFlag) + { + unsigned long long fileSize = LZ4IO_GetFileSize(input_filename); + prefs.frameInfo.frameOSize = fileSize; /* == 0 if input == stdin */ + } /* Allocate Memory */ in_buff = (char*)malloc(blockSize); @@ -485,7 +514,6 @@ int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, } - /* ********************************************************************* */ /* ********************** LZ4 file-stream Decompression **************** */ /* ********************************************************************* */ @@ -557,109 +585,116 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) void* outBuff; # define HEADERMAX 20 char headerBuff[HEADERMAX]; - size_t sizeCheck, nextToRead, outBuffSize, inBuffSize; + size_t sizeCheck; + const size_t inBuffSize = 256 KB; + const size_t outBuffSize = 256 KB; LZ4F_decompressionContext_t ctx; LZ4F_errorCode_t errorCode; - LZ4F_frameInfo_t frameInfo; unsigned storedSkips = 0; /* init */ errorCode = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION); - if (LZ4F_isError(errorCode)) EXM_THROW(60, "Allocation error : can't create context : %s", LZ4F_getErrorName(errorCode)); + if (LZ4F_isError(errorCode)) EXM_THROW(60, "Can't create context : %s", LZ4F_getErrorName(errorCode)); LZ4IO_writeLE32(headerBuff, LZ4IO_MAGICNUMBER); /* regenerated here, as it was already read from finput */ - /* Decode stream descriptor */ - outBuffSize = 0; inBuffSize = 0; sizeCheck = MAGICNUMBER_SIZE; - nextToRead = LZ4F_decompress(ctx, NULL, &outBuffSize, headerBuff, &sizeCheck, NULL); - if (LZ4F_isError(nextToRead)) EXM_THROW(61, "Decompression error : %s", LZ4F_getErrorName(nextToRead)); - if (nextToRead > HEADERMAX) EXM_THROW(62, "Header too large (%i>%i)", (int)nextToRead, HEADERMAX); - sizeCheck = fread(headerBuff, 1, nextToRead, finput); - if (sizeCheck!=nextToRead) EXM_THROW(63, "Read error "); - nextToRead = LZ4F_decompress(ctx, NULL, &outBuffSize, headerBuff, &sizeCheck, NULL); - errorCode = LZ4F_getFrameInfo(ctx, &frameInfo, NULL, &inBuffSize); - if (LZ4F_isError(errorCode)) EXM_THROW(64, "can't decode frame header : %s", LZ4F_getErrorName(errorCode)); - /* Allocate Memory */ - outBuffSize = LZ4IO_setBlockSizeID(frameInfo.blockSizeID); - inBuffSize = outBuffSize + 4; - inBuff = malloc(inBuffSize); - outBuff = malloc(outBuffSize); - if (!inBuff || !outBuff) EXM_THROW(65, "Allocation error : not enough memory"); + inBuff = malloc(256 KB); + outBuff = malloc(256 KB); + if (!inBuff || !outBuff) EXM_THROW(61, "Allocation error : not enough memory"); - /* Main Loop */ - while (nextToRead != 0) + /* Init feed with magic number (already consumed from FILE) */ { - size_t decodedBytes = outBuffSize; + size_t inSize = 4; + size_t outSize=0; + LZ4IO_writeLE32(inBuff, LZ4IO_MAGICNUMBER); + errorCode = LZ4F_decompress(ctx, outBuff, &outSize, inBuff, &inSize, NULL); + if (LZ4F_isError(errorCode)) EXM_THROW(62, "Header error : %s", LZ4F_getErrorName(errorCode)); + } - /* Read Block */ - sizeCheck = fread(inBuff, 1, nextToRead, finput); - if (sizeCheck!=nextToRead) EXM_THROW(66, "Read error "); - /* Decode Block */ - errorCode = LZ4F_decompress(ctx, outBuff, &decodedBytes, inBuff, &sizeCheck, NULL); - if (LZ4F_isError(errorCode)) EXM_THROW(67, "Decompression error : %s", LZ4F_getErrorName(errorCode)); - if (sizeCheck!=nextToRead) EXM_THROW(67, "Synchronization error"); - nextToRead = errorCode; - filesize += decodedBytes; + /* Main Loop */ + for (;;) + { + size_t readSize; + size_t pos = 0; - /* Write Block */ - if (g_sparseFileSupport) + /* Read input */ + readSize = fread(inBuff, 1, inBuffSize, finput); + if (!readSize) break; /* empty file or stream */ + + while (pos < readSize) { - size_t* const oBuffStartT = (size_t*)outBuff; /* since outBuff is malloc'ed, it's aligned on size_t */ - size_t* oBuffPosT = oBuffStartT; - size_t oBuffSizeT = decodedBytes / sizeT; - size_t* const oBuffEndT = oBuffStartT + oBuffSizeT; - static const size_t bs0T = (32 KB) / sizeT; - while (oBuffPosT < oBuffEndT) + /* Decode Input (at least partially) */ + size_t remaining = readSize - pos; + size_t decodedBytes = outBuffSize; + errorCode = LZ4F_decompress(ctx, outBuff, &decodedBytes, (char*)inBuff+pos, &remaining, NULL); + if (LZ4F_isError(errorCode)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(errorCode)); + pos += remaining; + + if (decodedBytes) { - size_t seg0SizeT = bs0T; - size_t nb0T; - int seekResult; - if (seg0SizeT > oBuffSizeT) seg0SizeT = oBuffSizeT; - oBuffSizeT -= seg0SizeT; - for (nb0T=0; (nb0T < seg0SizeT) && (oBuffPosT[nb0T] == 0); nb0T++) ; - storedSkips += (unsigned)(nb0T * sizeT); - if (storedSkips > 1 GB) /* avoid int overflow */ + /* Write Block */ + filesize += decodedBytes; + if (g_sparseFileSupport) { - seekResult = fseek(foutput, 1 GB, SEEK_CUR); - if (seekResult != 0) EXM_THROW(68, "1 GB skip error (sparse file)"); - storedSkips -= 1 GB; + size_t* const oBuffStartT = (size_t*)outBuff; /* since outBuff is malloc'ed, it's aligned on size_t */ + size_t* oBuffPosT = oBuffStartT; + size_t oBuffSizeT = decodedBytes / sizeT; + size_t* const oBuffEndT = oBuffStartT + oBuffSizeT; + static const size_t bs0T = (32 KB) / sizeT; + while (oBuffPosT < oBuffEndT) + { + size_t seg0SizeT = bs0T; + size_t nb0T; + int seekResult; + if (seg0SizeT > oBuffSizeT) seg0SizeT = oBuffSizeT; + oBuffSizeT -= seg0SizeT; + for (nb0T=0; (nb0T < seg0SizeT) && (oBuffPosT[nb0T] == 0); nb0T++) ; + storedSkips += (unsigned)(nb0T * sizeT); + if (storedSkips > 1 GB) /* avoid int overflow */ + { + seekResult = fseek(foutput, 1 GB, SEEK_CUR); + if (seekResult != 0) EXM_THROW(68, "1 GB skip error (sparse file)"); + storedSkips -= 1 GB; + } + if (nb0T != seg0SizeT) /* not all 0s */ + { + seekResult = fseek(foutput, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(68, "Skip error (sparse file)"); + storedSkips = 0; + seg0SizeT -= nb0T; + oBuffPosT += nb0T; + sizeCheck = fwrite(oBuffPosT, sizeT, seg0SizeT, foutput); + if (sizeCheck != seg0SizeT) EXM_THROW(68, "Write error : cannot write decoded block"); + } + oBuffPosT += seg0SizeT; + } + if (decodedBytes & maskT) /* size not multiple of sizeT (necessarily end of block) */ + { + const char* const restStart = (char*)oBuffEndT; + const char* restPtr = restStart; + size_t restSize = decodedBytes & maskT; + const char* const restEnd = restStart + restSize; + for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; + storedSkips += (unsigned) (restPtr - restStart); + if (restPtr != restEnd) + { + int seekResult = fseek(foutput, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(68, "Skip error (end of block)"); + storedSkips = 0; + sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, foutput); + if (sizeCheck != (size_t)(restEnd - restPtr)) EXM_THROW(68, "Write error : cannot write decoded end of block"); + } + } } - if (nb0T != seg0SizeT) /* not all 0s */ + else { - seekResult = fseek(foutput, storedSkips, SEEK_CUR); - if (seekResult) EXM_THROW(68, "Skip error (sparse file)"); - storedSkips = 0; - seg0SizeT -= nb0T; - oBuffPosT += nb0T; - sizeCheck = fwrite(oBuffPosT, sizeT, seg0SizeT, foutput); - if (sizeCheck != seg0SizeT) EXM_THROW(68, "Write error : cannot write decoded block"); - } - oBuffPosT += seg0SizeT; - } - if (decodedBytes & maskT) /* size not multiple of sizeT (necessarily end of block) */ - { - const char* const restStart = (char*)oBuffEndT; - const char* restPtr = restStart; - size_t restSize = decodedBytes & maskT; - const char* const restEnd = restStart + restSize; - for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; - storedSkips += (unsigned) (restPtr - restStart); - if (restPtr != restEnd) - { - int seekResult = fseek(foutput, storedSkips, SEEK_CUR); - if (seekResult) EXM_THROW(68, "Skip error (end of block)"); - storedSkips = 0; - sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, foutput); - if (sizeCheck != (size_t)(restEnd - restPtr)) EXM_THROW(68, "Write error : cannot write decoded end of block"); + sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); + if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block"); } } } - else - { - sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); - if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block"); - } + } if ((g_sparseFileSupport) && (storedSkips>0)) diff --git a/programs/lz4io.h b/programs/lz4io.h index b4d75f7..75a36e1 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -82,3 +82,5 @@ int LZ4IO_setNotificationLevel(int level); /* Default setting : 0 (disabled) */ int LZ4IO_setSparseFile(int enable); +/* Default setting : 0 (disabled) */ +int LZ4IO_setContentSize(int enable); -- cgit v0.12