From 12ab41571ef7fd11b8b2013aa943beae373cef8a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Mar 2015 19:42:37 +0100 Subject: Preliminary support for sparse files --- programs/Makefile | 9 ++++-- programs/lz4cli.c | 4 +++ programs/lz4io.c | 91 ++++++++++++++++++++++++++++++++++--------------------- programs/lz4io.h | 8 +++-- 4 files changed, 74 insertions(+), 38 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index 2c883d0..b9bb5b3 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -143,10 +143,11 @@ test-lz4: lz4 datagen ./datagen -g16KB | ./lz4 -9 | ./lz4 -vdq > $(VOID) ./datagen | ./lz4 | ./lz4 -vdq > $(VOID) ./datagen -g6M -P100 | ./lz4 -9BD | ./lz4 -vdq > $(VOID) - ./datagen -g17M | ./lz4 -9v | ./lz4 -vdq > $(VOID) + ./datagen -g17M | ./lz4 -9v | ./lz4 -dq > $(VOID) ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID) ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID) # test frame concatenation with null-length frame + @echo *** test frame concatenation *** @echo -n > empty.test @echo hi > nonempty.test cat nonempty.test empty.test nonempty.test > orig.test @@ -158,12 +159,16 @@ test-lz4: lz4 datagen @rm *.test @echo frame concatenation test completed # test frame concatenation with null-length frame - @echo test multiple input files + @echo *** test multiple input files *** @./datagen -s1 > file1 @./datagen -s2 > file2 @./datagen -s3 > file3 ./lz4 -f -m file1 file2 file3 @rm file1 file2 file3 file1.lz4 file2.lz4 file3.lz4 + @echo *** test sparse file support *** + ./datagen -g50M -P100 | ./lz4 -B4 | ./lz4 -dvX > tmp + ls -ls tmp + @rm tmp test-lz4c: lz4c datagen diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 10b980f..7ecfa93 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -176,6 +176,7 @@ static int usage_advanced(void) DISPLAY( " -BD : Block dependency (improve compression ratio)\n"); /* DISPLAY( " -BX : enable block checksum (default:disabled)\n"); *//* Option currently inactive */ DISPLAY( " -Sx : disable stream checksum (default:enabled)\n"); + DISPLAY( " -X : enable sparse file (default:disabled)(experimental)\n"); DISPLAY( "Benchmark arguments :\n"); DISPLAY( " -b : benchmark file(s)\n"); DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n"); @@ -390,6 +391,9 @@ int main(int argc, char** argv) /* Modify Stream properties */ case 'S': if (argument[1]=='x') { LZ4IO_setStreamChecksumMode(0); argument++; break; } else { badusage(); } + /* Enable Sparse File support (experimental) */ + case 'X': LZ4IO_setSparseFile(1); break; + /* Benchmark */ case 'b': bench=1; multiple_inputs=1; if (inFileNames == NULL) diff --git a/programs/lz4io.c b/programs/lz4io.c index fd98247..6e977f1 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -104,11 +104,13 @@ * Macros ***************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } -#define DISPLAYUPDATE(l, ...) if (displayLevel>=l) { \ - if ((LZ4IO_GetMilliSpan(g_time) > refreshRate) || (displayLevel>=4)) \ +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static int g_displayLevel = 0; /* 0 : no display ; 1: errors ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */ + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((LZ4IO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \ { g_time = clock(); DISPLAY(__VA_ARGS__); \ - if (displayLevel>=4) fflush(stdout); } } + if (g_displayLevel>=4) fflush(stdout); } } static const unsigned refreshRate = 150; static clock_t g_time = 0; @@ -116,12 +118,12 @@ static clock_t g_time = 0; /************************************** * Local Parameters ***************************************/ -static int displayLevel = 0; /* 0 : no display ; 1: errors ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */ -static int overwrite = 1; -static int globalBlockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; -static int blockChecksum = 0; -static int streamChecksum = 1; -static int blockIndependence = 1; +static int g_overwrite = 1; +static int g_blockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; +static int g_blockChecksum = 0; +static int g_streamChecksum = 1; +static int g_blockIndependence = 1; +static int g_sparseFileSupport = 0; static const int minBlockSizeID = 4; static const int maxBlockSizeID = 7; @@ -158,8 +160,8 @@ static const int maxBlockSizeID = 7; /* Default setting : overwrite = 1; return : overwrite mode (0/1) */ int LZ4IO_setOverwrite(int yes) { - overwrite = (yes!=0); - return overwrite; + g_overwrite = (yes!=0); + return g_overwrite; } /* blockSizeID : valid values : 4-5-6-7 */ @@ -167,35 +169,42 @@ int LZ4IO_setBlockSizeID(int bsid) { static const int blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB }; if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return -1; - globalBlockSizeId = bsid; - return blockSizeTable[globalBlockSizeId-minBlockSizeID]; + g_blockSizeId = bsid; + return blockSizeTable[g_blockSizeId-minBlockSizeID]; } int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode) { - blockIndependence = (blockMode == LZ4IO_blockIndependent); - return blockIndependence; + g_blockIndependence = (blockMode == LZ4IO_blockIndependent); + return g_blockIndependence; } /* Default setting : no checksum */ int LZ4IO_setBlockChecksumMode(int xxhash) { - blockChecksum = (xxhash != 0); - return blockChecksum; + g_blockChecksum = (xxhash != 0); + return g_blockChecksum; } /* Default setting : checksum enabled */ int LZ4IO_setStreamChecksumMode(int xxhash) { - streamChecksum = (xxhash != 0); - return streamChecksum; + g_streamChecksum = (xxhash != 0); + return g_streamChecksum; } /* Default setting : 0 (no notification) */ int LZ4IO_setNotificationLevel(int level) { - displayLevel = level; - return displayLevel; + g_displayLevel = level; + return g_displayLevel; +} + +/* Default setting : 0 (disabled) */ +int LZ4IO_setSparseFile(int enable) +{ + g_sparseFileSupport = enable; + return g_sparseFileSupport; } static unsigned LZ4IO_GetMilliSpan(clock_t nPrevious) @@ -242,12 +251,12 @@ static int get_fileHandle(const char* input_filename, const char* output_filenam if (*pfoutput!=0) { fclose(*pfoutput); - if (!overwrite) + if (!g_overwrite) { char ch; DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename); DISPLAYLEVEL(2, "Overwrite ? (Y/N) : "); - if (displayLevel <= 1) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); /* No interaction possible */ + if (g_displayLevel <= 1) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); /* No interaction possible */ ch = (char)getchar(); if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); } @@ -299,7 +308,7 @@ int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output if (compressionlevel < 3) compressionFunction = LZ4_compress; else compressionFunction = LZ4_compressHC; get_fileHandle(input_filename, output_filename, &finput, &foutput); - if ((displayLevel==2) && (compressionlevel==1)) displayLevel=3; + if ((g_displayLevel==2) && (compressionlevel==1)) g_displayLevel=3; /* Allocate Memory */ in_buff = (char*)malloc(LEGACY_BLOCKSIZE); @@ -374,18 +383,18 @@ int LZ4IO_compressFilename(const char* input_filename, const char* output_filena /* Init */ start = clock(); memset(&prefs, 0, sizeof(prefs)); - if ((displayLevel==2) && (compressionLevel>=3)) displayLevel=3; + if ((g_displayLevel==2) && (compressionLevel>=3)) g_displayLevel=3; errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); if (LZ4F_isError(errorCode)) EXM_THROW(30, "Allocation error : can't create LZ4F context : %s", LZ4F_getErrorName(errorCode)); get_fileHandle(input_filename, output_filename, &finput, &foutput); - blockSize = LZ4S_GetBlockSize_FromBlockId (globalBlockSizeId); + blockSize = LZ4S_GetBlockSize_FromBlockId (g_blockSizeId); /* Set compression parameters */ prefs.autoFlush = 1; prefs.compressionLevel = compressionLevel; - prefs.frameInfo.blockMode = (blockMode_t)blockIndependence; - prefs.frameInfo.blockSizeID = (blockSizeID_t)globalBlockSizeId; - prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)streamChecksum; + prefs.frameInfo.blockMode = (blockMode_t)g_blockIndependence; + prefs.frameInfo.blockSizeID = (blockSizeID_t)g_blockSizeId; + prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_streamChecksum; /* Allocate Memory */ in_buff = (char*)malloc(blockSize); @@ -541,8 +550,8 @@ static unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput) static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) { unsigned long long filesize = 0; - char* inBuff; - char* outBuff; + void* inBuff; + void* outBuff; # define HEADERMAX 20 char headerBuff[HEADERMAX]; size_t sizeCheck, nextToRead, outBuffSize, inBuffSize; @@ -569,8 +578,8 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) /* Allocate Memory */ outBuffSize = LZ4IO_setBlockSizeID(frameInfo.blockSizeID); inBuffSize = outBuffSize + 4; - inBuff = (char*)malloc(inBuffSize); - outBuff = (char*)malloc(outBuffSize); + inBuff = malloc(inBuffSize); + outBuff = malloc(outBuffSize); if (!inBuff || !outBuff) EXM_THROW(65, "Allocation error : not enough memory"); /* Main Loop */ @@ -590,6 +599,20 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) filesize += decodedBytes; /* Write Block */ + if (g_sparseFileSupport) + { + size_t* sPtr = (size_t*)outBuff; + size_t toCheckLength = decodedBytes / sizeof(size_t); + size_t checked; + size_t skippedLength; + int seekResult; + for (checked=0; (checked < toCheckLength) && (sPtr[checked] == 0); checked++) ; + skippedLength = checked * sizeof(size_t); + if (skippedLength == decodedBytes) skippedLength--; /* ensure 1 byte at least is written */ + seekResult = fseek(foutput, skippedLength, SEEK_CUR); + if (seekResult != 0) EXM_THROW(68, "Skip error (sparse file)\n"); + decodedBytes -= skippedLength; + } sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block\n"); } diff --git a/programs/lz4io.h b/programs/lz4io.h index f99e8bb..12c9a6a 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -70,11 +70,15 @@ int LZ4IO_setBlockSizeID(int blockSizeID); typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t; int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode); -/* Default setting : no checksum */ +/* Default setting : no block checksum */ int LZ4IO_setBlockChecksumMode(int xxhash); -/* Default setting : checksum enabled */ +/* Default setting : stream checksum enabled */ int LZ4IO_setStreamChecksumMode(int xxhash); /* Default setting : 0 (no notification) */ int LZ4IO_setNotificationLevel(int level); + +/* Default setting : 0 (disabled) */ +int LZ4IO_setSparseFile(int enable); + -- cgit v0.12