diff options
author | Yann Collet <Cyan4973@users.noreply.github.com> | 2018-10-02 17:37:20 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-02 17:37:20 (GMT) |
commit | a963621eb0938a991c417ec75cbfe85bee684fdd (patch) | |
tree | 020611d385b7f3531062d610daa307141d715558 | |
parent | b18b6e53e16258cbdc4c24a387d9d4338006f827 (diff) | |
parent | 294293d8990b8f25ebc8660ef95dd9b1485c5d37 (diff) | |
download | lz4-a963621eb0938a991c417ec75cbfe85bee684fdd.zip lz4-a963621eb0938a991c417ec75cbfe85bee684fdd.tar.gz lz4-a963621eb0938a991c417ec75cbfe85bee684fdd.tar.bz2 |
Merge pull request #586 from blezsan/custom_block_sizes
support custom block sizes
-rw-r--r-- | programs/lz4cli.c | 14 | ||||
-rw-r--r-- | programs/lz4io.c | 26 | ||||
-rw-r--r-- | programs/lz4io.h | 4 | ||||
-rw-r--r-- | tests/Makefile | 8 | ||||
-rw-r--r-- | tests/checkFrame.c | 306 | ||||
-rwxr-xr-x | tests/test_custom_block_sizes.sh | 72 |
6 files changed, 417 insertions, 13 deletions
diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 46a95cd..3709f50 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -134,7 +134,8 @@ static int usage_advanced(const char* exeName) DISPLAY( " -r : operate recursively on directories (sets also -m) \n"); #endif DISPLAY( " -l : compress using Legacy format (Linux kernel compression)\n"); - DISPLAY( " -B# : Block size [4-7] (default : 7) \n"); + DISPLAY( " -B# : cut file into blocks of size # bytes [32+] \n"); + DISPLAY( " or predefined block size [4-7] (default: 7) \n"); DISPLAY( " -BD : Block dependency (improve compression ratio) \n"); DISPLAY( " -BX : enable block checksum (default:disabled) \n"); DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n"); @@ -146,8 +147,6 @@ static int usage_advanced(const char* exeName) DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); DISPLAY( " -e# : test all compression levels from -bX to # (default : 1)\n"); DISPLAY( " -i# : minimum evaluation time in seconds (default : 3s) \n"); - DISPLAY( " -B# : cut file into independent blocks of size # bytes [32+] \n"); - DISPLAY( " or predefined block size [4-7] (default: 7) \n"); if (g_lz4c_legacy_commands) { DISPLAY( "Legacy arguments : \n"); DISPLAY( " -c0 : fast compression \n"); @@ -497,11 +496,12 @@ int main(int argc, const char** argv) DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); } else { if (B < 32) badusage(exeName); - BMK_setBlockSize(B); - if (B >= 1024) { - DISPLAYLEVEL(2, "bench: using blocks of size %u KB \n", (U32)(B>>10)); + blockSize = LZ4IO_setBlockSize(B); + BMK_setBlockSize(blockSize); + if (blockSize >= 1024) { + DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); } else { - DISPLAYLEVEL(2, "bench: using blocks of size %u bytes \n", (U32)(B)); + DISPLAYLEVEL(2, "using blocks of size %u bytes \n", (U32)(blockSize)); } } break; diff --git a/programs/lz4io.c b/programs/lz4io.c index cdc4c27..a35928d 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -110,6 +110,7 @@ static clock_t g_time = 0; static int g_overwrite = 1; static int g_testMode = 0; static int g_blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT; +static size_t g_blockSize = 0; static int g_blockChecksum = 0; static int g_streamChecksum = 1; static int g_blockIndependence = 1; @@ -178,7 +179,25 @@ size_t LZ4IO_setBlockSizeID(unsigned bsid) static const unsigned maxBlockSizeID = 7; if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0; g_blockSizeId = bsid; - return blockSizeTable[g_blockSizeId-minBlockSizeID]; + g_blockSize = blockSizeTable[g_blockSizeId-minBlockSizeID]; + return g_blockSize; +} + +size_t LZ4IO_setBlockSize(size_t blockSize) +{ + static const size_t minBlockSize = 32; + static const size_t maxBlockSize = 4 MB; + unsigned bsid = 0; + if (blockSize < minBlockSize) blockSize = minBlockSize; + if (blockSize > maxBlockSize) blockSize = maxBlockSize; + g_blockSize = blockSize; + blockSize--; + /* find which of { 64k, 256k, 1MB, 4MB } is closest to blockSize */ + while (blockSize >>= 2) + bsid++; + if (bsid < 7) bsid = 7; + g_blockSizeId = bsid-3; + return g_blockSize; } int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode) @@ -237,7 +256,6 @@ void LZ4IO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); } ** ********************** LZ4 File / Pipe compression ********************* ** ** ************************************************************************ */ -static int LZ4IO_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } static int LZ4IO_isSkippableMagicNumber(unsigned int magic) { return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0; } @@ -499,7 +517,7 @@ static LZ4F_CDict* LZ4IO_createCDict(void) { static cRess_t LZ4IO_createCResources(void) { - const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId); + const size_t blockSize = g_blockSize; cRess_t ress; LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION); @@ -543,7 +561,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, void* const srcBuffer = ress.srcBuffer; void* const dstBuffer = ress.dstBuffer; const size_t dstBufferSize = ress.dstBufferSize; - const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId); + const size_t blockSize = g_blockSize; size_t readSize; LZ4F_compressionContext_t ctx = ress.ctx; /* just a pointer */ LZ4F_preferences_t prefs; diff --git a/programs/lz4io.h b/programs/lz4io.h index 22c5e3e..33de41f 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -78,6 +78,10 @@ int LZ4IO_setTestMode(int yes); return : 0 if error, blockSize if OK */ size_t LZ4IO_setBlockSizeID(unsigned blockSizeID); +/* blockSize : valid values : 32 -> 4MB + return : 0 if error, actual blocksize if OK */ +size_t LZ4IO_setBlockSize(size_t blockSize); + /* Default setting : independent blocks */ typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t; int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode); diff --git a/tests/Makefile b/tests/Makefile index 3de111b..989ef85 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -63,7 +63,7 @@ NB_LOOPS ?= -i1 default: all -all: fullbench fuzzer frametest roundTripTest datagen +all: fullbench fuzzer frametest roundTripTest datagen checkFrame all32: CFLAGS+=-m32 all32: all @@ -109,6 +109,9 @@ roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c datagen : $(PRGDIR)/datagen.c datagencli.c $(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT) +checkFrame : lz4frame.o lz4.o lz4hc.o xxhash.o checkFrame.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + clean: @$(MAKE) -C $(LZ4DIR) $@ > $(VOID) @$(MAKE) -C $(PRGDIR) $@ > $(VOID) @@ -118,7 +121,8 @@ clean: fuzzer$(EXT) fuzzer32$(EXT) \ frametest$(EXT) frametest32$(EXT) \ fasttest$(EXT) roundTripTest$(EXT) \ - datagen$(EXT) checkTag$(EXT) + datagen$(EXT) checkTag$(EXT) \ + frameTest$(EXT) @rm -fR $(TESTDIR) @echo Cleaning completed diff --git a/tests/checkFrame.c b/tests/checkFrame.c new file mode 100644 index 0000000..f7e7a50 --- /dev/null +++ b/tests/checkFrame.c @@ -0,0 +1,306 @@ + /* + checkFrame - verify frame headers + Copyright (C) Yann Collet 2014-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 + */ + + /*-************************************ + * Compiler specific + **************************************/ + #ifdef _MSC_VER /* Visual Studio */ + # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ + # pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ + #endif + + + /*-************************************ + * Includes + **************************************/ + #include "util.h" /* U32 */ + #include <stdlib.h> /* malloc, free */ + #include <stdio.h> /* fprintf */ + #include <string.h> /* strcmp */ + #include <time.h> /* clock_t, clock(), CLOCKS_PER_SEC */ + #include <assert.h> + #include "lz4frame.h" /* include multiple times to test correctness/safety */ + #include "lz4frame.h" + #define LZ4F_STATIC_LINKING_ONLY + #include "lz4frame.h" + #include "lz4frame.h" + #include "lz4.h" /* LZ4_VERSION_STRING */ + #define XXH_STATIC_LINKING_ONLY + #include "xxhash.h" /* XXH64 */ + + + /*-************************************ + * Constants + **************************************/ + #define KB *(1U<<10) + #define MB *(1U<<20) + #define GB *(1U<<30) + + + /*-************************************ + * Macros + **************************************/ + #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + + /************************************** + * Exceptions + ***************************************/ + #ifndef DEBUG + # define DEBUG 0 + #endif + #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); + #define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + return(error); \ +} + + + +/*-*************************************** +* Local Parameters +*****************************************/ +static U32 no_prompt = 0; +static U32 displayLevel = 2; +static U32 use_pause = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) + +typedef struct { + void* srcBuffer; + size_t srcBufferSize; + void* dstBuffer; + size_t dstBufferSize; + LZ4F_decompressionContext_t ctx; +} cRess_t; + +static int createCResources(cRess_t *ress) +{ + ress->srcBufferSize = 4 MB; + ress->srcBuffer = malloc(ress->srcBufferSize); + ress->dstBufferSize = 4 MB; + ress->dstBuffer = malloc(ress->dstBufferSize); + + if (!ress->srcBuffer || !ress->dstBuffer) { + free(ress->srcBuffer); + free(ress->dstBuffer); + EXM_THROW(20, "Allocation error : not enough memory"); + } + + if (LZ4F_isError( LZ4F_createDecompressionContext(&(ress->ctx), LZ4F_VERSION) )) { + free(ress->srcBuffer); + free(ress->dstBuffer); + EXM_THROW(21, "Unable to create decompression context"); + } + return 0; +} + +static void freeCResources(cRess_t ress) +{ + free(ress.srcBuffer); + free(ress.dstBuffer); + + (void) LZ4F_freeDecompressionContext(ress.ctx); +} + +int frameCheck(cRess_t ress, FILE* const srcFile, unsigned bsid, size_t blockSize) +{ + LZ4F_errorCode_t nextToLoad = 0; + size_t curblocksize = 0; + int partialBlock = 0; + + /* Main Loop */ + for (;;) { + size_t readSize; + size_t pos = 0; + size_t decodedBytes = ress.dstBufferSize; + size_t remaining; + LZ4F_frameInfo_t frameInfo; + + /* Read input */ + readSize = fread(ress.srcBuffer, 1, ress.srcBufferSize, srcFile); + if (!readSize) break; /* reached end of file or stream */ + + while (pos < readSize) { /* still to read */ + /* Decode Input (at least partially) */ + if (!nextToLoad) { + /* LZ4F_decompress returned 0 : starting new frame */ + curblocksize = 0; + remaining = readSize - pos; + nextToLoad = LZ4F_getFrameInfo(ress.ctx, &frameInfo, (char*)(ress.srcBuffer)+pos, &remaining); + if (LZ4F_isError(nextToLoad)) EXM_THROW(22, "Error getting frame info: %s", LZ4F_getErrorName(nextToLoad)); /* XXX */ + if (frameInfo.blockSizeID != bsid) EXM_THROW(23, "Block size ID %u != expected %u", frameInfo.blockSizeID, bsid); + pos += remaining; + /* nextToLoad should be block header size */ + remaining = nextToLoad; + decodedBytes = ress.dstBufferSize; + nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad)); + pos += remaining; + } + decodedBytes = ress.dstBufferSize; + /* nextToLoad should be just enough to cover the next block */ + if (nextToLoad > (readSize - pos)) { + /* block is not fully contained in current buffer */ + partialBlock = 1; + remaining = readSize - pos; + } else { + if (partialBlock) { + partialBlock = 0; + } + remaining = nextToLoad; + } + nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad)); + curblocksize += decodedBytes; + pos += remaining; + if (!partialBlock) { + /* detect small block due to end of frame; the final 4-byte frame checksum could be left in the buffer */ + if ((curblocksize != 0) && (nextToLoad > 4)) { + if (curblocksize != blockSize) + EXM_THROW(25, "Block size %zu != expected %zu, pos %zu\n", curblocksize, blockSize, pos); + } + curblocksize = 0; + } + } + } + /* can be out because readSize == 0, which could be an fread() error */ + if (ferror(srcFile)) EXM_THROW(26, "Read error"); + + if (nextToLoad!=0) EXM_THROW(27, "Unfinished stream"); + + return 0; +} + +int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args] filename\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -b# : expected blocksizeID [4-7] (required)\n"); + DISPLAY( " -B# : expected blocksize [32-4194304] (required)\n"); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + int argNb; + int bsid=0; + size_t blockSize=0; + const char* const programName = argv[0]; + + /* Check command line */ + for (argNb=1; argNb<argc; argNb++) { + const char* argument = argv[argNb]; + + if(!argument) continue; /* Protection if argument empty */ + + /* Decode command (note : aggregated short commands are allowed) */ + if (argument[0]=='-') { + if (!strcmp(argument, "--no-prompt")) { + no_prompt=1; + displayLevel=1; + continue; + } + argument++; + + while (*argument!=0) { + switch(*argument) + { + case 'h': + return FUZ_usage(programName); + case 'v': + argument++; + displayLevel++; + break; + case 'q': + argument++; + displayLevel--; + break; + case 'p': /* pause at the end */ + argument++; + use_pause = 1; + break; + + case 'b': + argument++; + bsid=0; + while ((*argument>='0') && (*argument<='9')) { + bsid *= 10; + bsid += *argument - '0'; + argument++; + } + break; + + case 'B': + argument++; + blockSize=0; + while ((*argument>='0') && (*argument<='9')) { + blockSize *= 10; + blockSize += *argument - '0'; + argument++; + } + break; + + default: + ; + return FUZ_usage(programName); + } + } + } else { + int err; + FILE *srcFile; + cRess_t ress; + if (bsid == 0 || blockSize == 0) + return FUZ_usage(programName); + DISPLAY("Starting frame checker (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION_STRING); + err = createCResources(&ress); + if (err) return (err); + srcFile = fopen(argument, "rb"); + if ( srcFile==NULL ) { + freeCResources(ress); + EXM_THROW(1, "%s: %s \n", argument, strerror(errno)); + } + err = frameCheck(ress, srcFile, bsid, blockSize); + freeCResources(ress); + fclose(srcFile); + return (err); + } + } + return 0; +} diff --git a/tests/test_custom_block_sizes.sh b/tests/test_custom_block_sizes.sh new file mode 100755 index 0000000..aba6733 --- /dev/null +++ b/tests/test_custom_block_sizes.sh @@ -0,0 +1,72 @@ +#/usr/bin/env sh +set -e + +LZ4=../lz4 +CHECKFRAME=./checkFrame +DATAGEN=./datagen + +failures="" + +TMPFILE=/tmp/test_custom_block_sizes.$$ +TMPFILE1=/tmp/test_custom_block_sizes1.$$ +TMPFILE2=/tmp/test_custom_block_sizes2.$$ +$DATAGEN -g12345678 > $TMPFILE1 +$DATAGEN -g12345678 > $TMPFILE2 + +echo Testing -B31 +$LZ4 -f -B31 $TMPFILE1 && failures="31 (should fail) " + +for blocksize in 32 65535 65536 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b4 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 65537 262143 262144 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b5 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 262145 1048575 1048576 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b6 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 1048577 4194303 4194304 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b7 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 4194305 10485760 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B4194304 -b7 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +rm $TMPFILE.lz4 $TMPFILE1 $TMPFILE1.lz4 $TMPFILE2 $TMPFILE2.lz4 +if [ "$failures" == "" ] +then + echo ---- All tests passed + exit 0 +else + echo ---- The following tests had failures: $failures + exit 1 +fi |