From 97679fa5a17d79f87c0a2d1e2d44c3215d48958b Mon Sep 17 00:00:00 2001 From: Takayuki MATSUOKA Date: Thu, 12 Feb 2015 15:46:02 +0900 Subject: Google Code Issue 155: lz4 cli should support sparse file https://code.google.com/p/lz4/issues/detail?id=155 This is experimental implementation. Just a proof of concept. It works Linux and Windows. # Build To build experimental version, define 'LZ4IO_ENABLE_SPARSE_FILE' like the following command : make lz4programs 'CFLAGS=-O3 -DLZ4IO_ENABLE_SPARSE_FILE=1' ./programs/lz4 -h You will see "EXPERIMENTAL_SPARSE_FILE" as lz4 revision : "*** LZ4 command line interface 64-bits EXPERIMENTAL_SPARSE_FILE, by Yann Collet (...) ***" # Experiment This experimental version adds option "-x" for sparse file for decompression. You can use this option like this : ./programs/lz4 -9 -f my-file ./programs/lz4 -d -f -x my-file.lz4 my-file.lz4.out cmp my-file my-file.lz4.out --- programs/lz4cli.c | 17 +++++++++++++++ programs/lz4io.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ programs/lz4io.h | 5 +++++ 3 files changed, 87 insertions(+) diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 0da5dce..5b54143 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -83,6 +83,11 @@ /***************************** * Constants ******************************/ +#if defined(LZ4IO_ENABLE_SPARSE_FILE) +# undef LZ4_VERSION +# define LZ4_VERSION "EXPERIMENTAL_SPARSE_FILE" +#endif + #define COMPRESSOR_NAME "LZ4 command line interface" #ifndef LZ4_VERSION # define LZ4_VERSION "r126" @@ -186,6 +191,10 @@ static int usage_advanced(void) DISPLAY( " -y : overwrite output without prompting \n"); DISPLAY( " -s : suppress warnings \n"); #endif /* ENABLE_LZ4C_LEGACY_OPTIONS */ +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + DISPLAY( "Experimental : Sparse file\n"); + DISPLAY( " -x : enable sparse file\n"); +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ EXTENDED_HELP; return 0; } @@ -276,6 +285,9 @@ int main(int argc, char** argv) /* Init */ programName = argv[0]; LZ4IO_setOverwrite(0); +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + LZ4IO_setSparseFile(0); +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ blockSize = LZ4IO_setBlockSizeID(LZ4_BLOCKSIZEID_DEFAULT); /* lz4cat behavior */ @@ -403,6 +415,11 @@ int main(int argc, char** argv) /* Pause at the end (hidden option) */ case 'p': main_pause=1; BMK_SetPause(); break; +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + /* Experimental : Enable sparse file */ + case 'x': LZ4IO_setSparseFile(1); break; +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ + /* Specific commands for customized versions */ EXTENDED_ARGUMENTS; diff --git a/programs/lz4io.c b/programs/lz4io.c index 023824e..4e583f6 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -76,10 +76,18 @@ # endif # define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) # define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +# if defined(LZ4IO_ENABLE_SPARSE_FILE) +# include +# define SET_SPARSE_FILE_MODE(file) do { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } while(0) +# define fseek _fseeki64 +# endif /* LZ4IO_ENABLE_SPARSE_FILE */ #else # include /* isatty */ # define SET_BINARY_MODE(file) # define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +# if defined(LZ4IO_ENABLE_SPARSE_FILE) +# define SET_SPARSE_FILE_MODE(file) +# endif /* LZ4IO_ENABLE_SPARSE_FILE */ #endif @@ -133,6 +141,9 @@ static int globalBlockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; static int blockChecksum = 0; static int streamChecksum = 1; static int blockIndependence = 1; +#if defined(LZ4IO_ENABLE_SPARSE_FILE) +static int sparseFile = 0; +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ static const int minBlockSizeID = 4; static const int maxBlockSizeID = 7; @@ -174,6 +185,28 @@ int LZ4IO_setOverwrite(int yes) return overwrite; } +#if defined(LZ4IO_ENABLE_SPARSE_FILE) +/* Default setting : sparseFile = 0; (Disable) + return : sparse file mode (0:Disable / 1:Enable) */ +int LZ4IO_setSparseFile(int yes) +{ + sparseFile = yes; + return sparseFile; +} + +static int isSparse(const void* p, size_t size) +{ + const char* p8 = p; + for(; size; --size) { + if(*p8 != 0) { + return 0; + } + ++p8; + } + return 1; +} +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ + /* blockSizeID : valid values : 4-5-6-7 */ int LZ4IO_setBlockSizeID(int bsid) { @@ -539,6 +572,9 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) LZ4F_decompressionContext_t ctx; LZ4F_errorCode_t errorCode; LZ4F_frameInfo_t frameInfo; +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + size_t sparsePending = 0; +#endif /* init */ errorCode = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION); @@ -580,9 +616,30 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) filesize += decodedBytes; /* Write Block */ +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + if(sparseFile) { + if(isSparse(outBuff, decodedBytes)) { + sparsePending += decodedBytes; + continue; + } + if(sparsePending > 0) { + fseek(foutput, sparsePending, SEEK_CUR); + sparsePending = 0; + } + } +#endif sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block\n"); } +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + if(sparseFile) { + if(sparsePending > 0) { + fseek(foutput, sparsePending-1, SEEK_CUR); + fputc(0, foutput); + sparsePending = 0; + } + } +#endif /* Free */ free(inBuff); @@ -645,6 +702,14 @@ int LZ4IO_decompressFilename(char* input_filename, char* output_filename) start = clock(); get_fileHandle(input_filename, output_filename, &finput, &foutput); +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + if (sparseFile!=0 && foutput!=0) + { + DISPLAY("Experimental : Using sparse file\n"); + SET_SPARSE_FILE_MODE(foutput); + } +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ + /* Loop over multiple streams */ do { diff --git a/programs/lz4io.h b/programs/lz4io.h index 7869a43..a0c6119 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -75,3 +75,8 @@ int LZ4IO_setStreamChecksumMode(int xxhash); /* Default setting : 0 (no notification) */ int LZ4IO_setNotificationLevel(int level); + +#if defined(LZ4IO_ENABLE_SPARSE_FILE) +/* Default setting : 0 (sparseFile = 0; disable sparse file) */ +int LZ4IO_setSparseFile(int yes); +#endif -- cgit v0.12 From 4a5d92b1d9207b5d1309e1b21ecc1164cc14a149 Mon Sep 17 00:00:00 2001 From: Takayuki MATSUOKA Date: Thu, 12 Feb 2015 16:13:23 +0900 Subject: Adjust coding style --- programs/lz4io.c | 65 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/programs/lz4io.c b/programs/lz4io.c index 4e583f6..9f80531 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -79,7 +79,9 @@ # if defined(LZ4IO_ENABLE_SPARSE_FILE) # include # define SET_SPARSE_FILE_MODE(file) do { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } while(0) -# define fseek _fseeki64 +# if defined(_MSC_VER) && (_MSC_VER >= 1400) +# define fseek _fseeki64 +# endif # endif /* LZ4IO_ENABLE_SPARSE_FILE */ #else # include /* isatty */ @@ -197,13 +199,15 @@ int LZ4IO_setSparseFile(int yes) static int isSparse(const void* p, size_t size) { const char* p8 = p; - for(; size; --size) { - if(*p8 != 0) { - return 0; - } - ++p8; - } - return 1; + for(; size; --size) + { + if(*p8 != 0) + { + return 0; + } + ++p8; + } + return 1; } #endif /* LZ4IO_ENABLE_SPARSE_FILE */ @@ -574,7 +578,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) LZ4F_frameInfo_t frameInfo; #if defined(LZ4IO_ENABLE_SPARSE_FILE) size_t sparsePending = 0; -#endif +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ /* init */ errorCode = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION); @@ -617,29 +621,34 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) /* Write Block */ #if defined(LZ4IO_ENABLE_SPARSE_FILE) - if(sparseFile) { - if(isSparse(outBuff, decodedBytes)) { - sparsePending += decodedBytes; - continue; - } - if(sparsePending > 0) { - fseek(foutput, sparsePending, SEEK_CUR); - sparsePending = 0; - } - } -#endif + if(sparseFile) + { + if(isSparse(outBuff, decodedBytes)) + { + sparsePending += decodedBytes; + continue; + } + if(sparsePending > 0) + { + fseek(foutput, sparsePending, SEEK_CUR); + sparsePending = 0; + } + } +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block\n"); } #if defined(LZ4IO_ENABLE_SPARSE_FILE) - if(sparseFile) { - if(sparsePending > 0) { - fseek(foutput, sparsePending-1, SEEK_CUR); - fputc(0, foutput); - sparsePending = 0; - } - } -#endif + if(sparseFile) + { + if(sparsePending > 0) + { + fseek(foutput, sparsePending-1, SEEK_CUR); + fputc(0, foutput); + sparsePending = 0; + } + } +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ /* Free */ free(inBuff); -- cgit v0.12 From 01a24afdcf867a901076eaa87434b3dcb39d526f Mon Sep 17 00:00:00 2001 From: Takayuki MATSUOKA Date: Thu, 12 Feb 2015 19:58:49 +0900 Subject: Improve isSparse() --- programs/lz4io.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/programs/lz4io.c b/programs/lz4io.c index 9f80531..b1067ac 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -62,6 +62,27 @@ #include "lz4frame.h" +/************************************** + Basic Types +**************************************/ +#if defined(LZ4IO_ENABLE_SPARSE_FILE) +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ + + /**************************** * OS-specific Includes *****************************/ @@ -198,6 +219,7 @@ int LZ4IO_setSparseFile(int yes) static int isSparse(const void* p, size_t size) { +#if 0 const char* p8 = p; for(; size; --size) { @@ -208,6 +230,30 @@ static int isSparse(const void* p, size_t size) ++p8; } return 1; +#else + const U64* p64 = (const U64*) p; + const char* p8 = (const char*) p; + const size_t n = size / sizeof(*p64); + size_t i; + + for (i = 0; i < n; ++i) + { + if (p64[i] != 0) + { + return 0; + } + } + + for(i = n * sizeof(*p64); i < size; ++i) + { + if (p8[i] != 0) + { + return 0; + } + } + + return 1; +#endif } #endif /* LZ4IO_ENABLE_SPARSE_FILE */ -- cgit v0.12 From b372f45093aa8d86f7a99b7dd5e2deaeffb10033 Mon Sep 17 00:00:00 2001 From: Takayuki MATSUOKA Date: Thu, 12 Feb 2015 20:18:16 +0900 Subject: Add Neil's method to isSparse() --- programs/lz4io.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/programs/lz4io.c b/programs/lz4io.c index b1067ac..c69275d 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -220,6 +220,7 @@ int LZ4IO_setSparseFile(int yes) static int isSparse(const void* p, size_t size) { #if 0 + /* naive */ const char* p8 = p; for(; size; --size) { @@ -230,7 +231,8 @@ static int isSparse(const void* p, size_t size) ++p8; } return 1; -#else +#elif 0 + /* xz method */ const U64* p64 = (const U64*) p; const char* p8 = (const char*) p; const size_t n = size / sizeof(*p64); @@ -253,6 +255,10 @@ static int isSparse(const void* p, size_t size) } return 1; +#else + /* Neil's */ + const char* buf = (const char*) p; + return buf[0] == 0 && !memcmp(buf, buf + 1, size - 1); #endif } #endif /* LZ4IO_ENABLE_SPARSE_FILE */ -- cgit v0.12 From eed7952101fbb740018cd00d7d2958d192da8344 Mon Sep 17 00:00:00 2001 From: Takayuki MATSUOKA Date: Thu, 12 Feb 2015 21:00:50 +0900 Subject: Add GNU coreutil's is_nul() method to isSparse() See original source http://git.savannah.gnu.org/cgit/coreutils.git/tree/src/system.h --- programs/lz4io.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/programs/lz4io.c b/programs/lz4io.c index c69275d..7315d69 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -255,10 +255,24 @@ static int isSparse(const void* p, size_t size) } return 1; -#else +#elif 0 /* Neil's */ const char* buf = (const char*) p; return buf[0] == 0 && !memcmp(buf, buf + 1, size - 1); +#else + /* GNU Core Utilities : coreutils/src/system.h / is_nul() */ + const U64* wp = (const U64*) p; + const char* cbuf = (const char*) p; + const char* cp; + + // Find first nonzero *word*, or the word with the sentinel. + while(*wp++ == 0) ; + + // Find the first nonzero *byte*, or the sentinel. + cp = (const char*) (wp - 1); + while(*cp++ == 0) ; + + return cbuf + size < cp; #endif } #endif /* LZ4IO_ENABLE_SPARSE_FILE */ @@ -652,7 +666,12 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) outBuffSize = LZ4IO_setBlockSizeID(frameInfo.blockSizeID); inBuffSize = outBuffSize + 4; inBuff = (char*)malloc(inBuffSize); +#if defined(LZ4IO_ENABLE_SPARSE_FILE) + outBuff = (char*)malloc(outBuffSize+1); + outBuff[outBuffSize] = 1; /* sentinel */ +#else /* LZ4IO_ENABLE_SPARSE_FILE */ outBuff = (char*)malloc(outBuffSize); +#endif /* LZ4IO_ENABLE_SPARSE_FILE */ if (!inBuff || !outBuff) EXM_THROW(65, "Allocation error : not enough memory"); /* Main Loop */ -- cgit v0.12 From de5c930c901539082c57193b390fa3666a732883 Mon Sep 17 00:00:00 2001 From: Takayuki MATSUOKA Date: Thu, 12 Feb 2015 21:26:32 +0900 Subject: Fix sentinel size miscalculation Since is_nul() style wild-comparing stride is U64, we must add sizeof(U64) to the sentinel. --- programs/lz4io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/lz4io.c b/programs/lz4io.c index 7315d69..03d93af 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -667,7 +667,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) inBuffSize = outBuffSize + 4; inBuff = (char*)malloc(inBuffSize); #if defined(LZ4IO_ENABLE_SPARSE_FILE) - outBuff = (char*)malloc(outBuffSize+1); + outBuff = (char*)malloc(outBuffSize+sizeof(U64)); outBuff[outBuffSize] = 1; /* sentinel */ #else /* LZ4IO_ENABLE_SPARSE_FILE */ outBuff = (char*)malloc(outBuffSize); -- cgit v0.12 From dcdd628d5ec8a78401dbe277151e7654e300eafc Mon Sep 17 00:00:00 2001 From: Takayuki MATSUOKA Date: Thu, 12 Feb 2015 23:50:14 +0900 Subject: Fix sentinel bit pattern Set all bits of U64. --- programs/lz4io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/lz4io.c b/programs/lz4io.c index 03d93af..109db98 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -668,7 +668,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) inBuff = (char*)malloc(inBuffSize); #if defined(LZ4IO_ENABLE_SPARSE_FILE) outBuff = (char*)malloc(outBuffSize+sizeof(U64)); - outBuff[outBuffSize] = 1; /* sentinel */ + *(U64*) &outBuff[outBuffSize] = (U64) -1; /* sentinel */ #else /* LZ4IO_ENABLE_SPARSE_FILE */ outBuff = (char*)malloc(outBuffSize); #endif /* LZ4IO_ENABLE_SPARSE_FILE */ -- cgit v0.12