From 64ad395d27ad4f935cbcf9a5ed21699fbf1ce89c Mon Sep 17 00:00:00 2001 From: Blaise Sanouillet <43636048+blezsan@users.noreply.github.com> Date: Fri, 28 Sep 2018 08:02:49 -0700 Subject: support custom block sizes --- programs/lz4cli.c | 14 +- programs/lz4io.c | 25 +++- programs/lz4io.h | 4 + tests/Makefile | 8 +- tests/checkFrame.c | 269 +++++++++++++++++++++++++++++++++++++++ tests/test_custom_block_sizes.sh | 72 +++++++++++ 6 files changed, 380 insertions(+), 12 deletions(-) create mode 100644 tests/checkFrame.c create mode 100755 tests/test_custom_block_sizes.sh diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 46a95cd..759f336 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -134,7 +134,8 @@ static int usage_advanced(const char* exeName) DISPLAY( " -r : operate recursively on directories (sets also -m) \n"); #endif DISPLAY( " -l : compress using Legacy format (Linux kernel compression)\n"); - DISPLAY( " -B# : Block size [4-7] (default : 7) \n"); + DISPLAY( " -B# : cut file into independent blocks of size # bytes [32+] \n"); + DISPLAY( " or predefined block size [4-7] (default: 7) \n"); DISPLAY( " -BD : Block dependency (improve compression ratio) \n"); DISPLAY( " -BX : enable block checksum (default:disabled) \n"); DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n"); @@ -146,8 +147,6 @@ static int usage_advanced(const char* exeName) DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); DISPLAY( " -e# : test all compression levels from -bX to # (default : 1)\n"); DISPLAY( " -i# : minimum evaluation time in seconds (default : 3s) \n"); - DISPLAY( " -B# : cut file into independent blocks of size # bytes [32+] \n"); - DISPLAY( " or predefined block size [4-7] (default: 7) \n"); if (g_lz4c_legacy_commands) { DISPLAY( "Legacy arguments : \n"); DISPLAY( " -c0 : fast compression \n"); @@ -497,11 +496,12 @@ int main(int argc, const char** argv) DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); } else { if (B < 32) badusage(exeName); - BMK_setBlockSize(B); - if (B >= 1024) { - DISPLAYLEVEL(2, "bench: using blocks of size %u KB \n", (U32)(B>>10)); + blockSize = LZ4IO_setBlockSize(B); + BMK_setBlockSize(blockSize); + if (blockSize >= 1024) { + DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); } else { - DISPLAYLEVEL(2, "bench: using blocks of size %u bytes \n", (U32)(B)); + DISPLAYLEVEL(2, "using blocks of size %u bytes \n", (U32)(blockSize)); } } break; diff --git a/programs/lz4io.c b/programs/lz4io.c index cdc4c27..17ff9a1 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -110,6 +110,7 @@ static clock_t g_time = 0; static int g_overwrite = 1; static int g_testMode = 0; static int g_blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT; +static int g_blockSize = 0; static int g_blockChecksum = 0; static int g_streamChecksum = 1; static int g_blockIndependence = 1; @@ -178,7 +179,25 @@ size_t LZ4IO_setBlockSizeID(unsigned bsid) static const unsigned maxBlockSizeID = 7; if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0; g_blockSizeId = bsid; - return blockSizeTable[g_blockSizeId-minBlockSizeID]; + g_blockSize = blockSizeTable[g_blockSizeId-minBlockSizeID]; + return g_blockSize; +} + +size_t LZ4IO_setBlockSize(size_t blockSize) +{ + static const size_t minBlockSize = 32; + static const size_t maxBlockSize = 4 MB; + unsigned bsid = 0; + if (blockSize < minBlockSize) blockSize = minBlockSize; + if (blockSize > maxBlockSize) blockSize = maxBlockSize; + g_blockSize = blockSize; + blockSize--; + /* find which of { 64k, 256k, 1MB, 4MB } is closest to blockSize */ + while (blockSize >>= 2) + bsid++; + if (bsid < 7) bsid = 7; + g_blockSizeId = bsid-3; + return g_blockSize; } int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode) @@ -499,7 +518,7 @@ static LZ4F_CDict* LZ4IO_createCDict(void) { static cRess_t LZ4IO_createCResources(void) { - const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId); + const size_t blockSize = g_blockSize; cRess_t ress; LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION); @@ -543,7 +562,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, void* const srcBuffer = ress.srcBuffer; void* const dstBuffer = ress.dstBuffer; const size_t dstBufferSize = ress.dstBufferSize; - const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId); + const size_t blockSize = g_blockSize; size_t readSize; LZ4F_compressionContext_t ctx = ress.ctx; /* just a pointer */ LZ4F_preferences_t prefs; diff --git a/programs/lz4io.h b/programs/lz4io.h index 22c5e3e..33de41f 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -78,6 +78,10 @@ int LZ4IO_setTestMode(int yes); return : 0 if error, blockSize if OK */ size_t LZ4IO_setBlockSizeID(unsigned blockSizeID); +/* blockSize : valid values : 32 -> 4MB + return : 0 if error, actual blocksize if OK */ +size_t LZ4IO_setBlockSize(size_t blockSize); + /* Default setting : independent blocks */ typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t; int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode); diff --git a/tests/Makefile b/tests/Makefile index 3de111b..989ef85 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -63,7 +63,7 @@ NB_LOOPS ?= -i1 default: all -all: fullbench fuzzer frametest roundTripTest datagen +all: fullbench fuzzer frametest roundTripTest datagen checkFrame all32: CFLAGS+=-m32 all32: all @@ -109,6 +109,9 @@ roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c datagen : $(PRGDIR)/datagen.c datagencli.c $(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT) +checkFrame : lz4frame.o lz4.o lz4hc.o xxhash.o checkFrame.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + clean: @$(MAKE) -C $(LZ4DIR) $@ > $(VOID) @$(MAKE) -C $(PRGDIR) $@ > $(VOID) @@ -118,7 +121,8 @@ clean: fuzzer$(EXT) fuzzer32$(EXT) \ frametest$(EXT) frametest32$(EXT) \ fasttest$(EXT) roundTripTest$(EXT) \ - datagen$(EXT) checkTag$(EXT) + datagen$(EXT) checkTag$(EXT) \ + frameTest$(EXT) @rm -fR $(TESTDIR) @echo Cleaning completed diff --git a/tests/checkFrame.c b/tests/checkFrame.c new file mode 100644 index 0000000..ace51c7 --- /dev/null +++ b/tests/checkFrame.c @@ -0,0 +1,269 @@ +/* + checkFrame - verify frame headers + Copyright (C) Yann Collet 2014-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ +#endif + + +/*-************************************ +* Includes +**************************************/ +#include "util.h" /* U32 */ +#include /* malloc, free */ +#include /* fprintf */ +#include /* strcmp */ +#include /* clock_t, clock(), CLOCKS_PER_SEC */ +#include +#include "lz4frame.h" /* include multiple times to test correctness/safety */ +#include "lz4frame.h" +#define LZ4F_STATIC_LINKING_ONLY +#include "lz4frame.h" +#include "lz4frame.h" +#include "lz4.h" /* LZ4_VERSION_STRING */ +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64 */ + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + + +/*-************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + +/************************************** +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + exit(error); \ +} + + + +/*-*************************************** +* Local Parameters +*****************************************/ +static U32 no_prompt = 0; +static U32 displayLevel = 2; +static U32 use_pause = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) + +void frameCheck(const char *srcFileName, unsigned bsid, size_t blockSize) +{ + FILE *srcFile; + LZ4F_decompressionContext_t dctx = NULL; + size_t srcBufferSize = 4 MB; + void *srcBuffer = malloc(srcBufferSize); + size_t dstBufferSize = 4 MB; + void *dstBuffer = malloc(dstBufferSize); + LZ4F_errorCode_t nextToLoad = 0; + size_t curblocksize = 0; + int partialBlock = 0; + + if (!srcBuffer || !dstBuffer) EXM_THROW(31, "Allocation error : not enough memory"); + srcFile = fopen(srcFileName, "rb"); + if ( srcFile==NULL ) EXM_THROW(1, "%s: %s \n", srcFileName, strerror(errno)); + + if (LZ4F_isError( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) )) { + EXM_THROW(32, "Unable to create decompression context"); + } + /* Main Loop */ + for (;;) { + size_t readSize; + size_t pos = 0; + size_t decodedBytes = dstBufferSize; + size_t remaining; + LZ4F_frameInfo_t frameInfo; + + /* Read input */ + readSize = fread(srcBuffer, 1, srcBufferSize, srcFile); + if (!readSize) break; /* reached end of file or stream */ + + while (pos < readSize) { /* still to read */ + /* Decode Input (at least partially) */ + if (!nextToLoad) { + /* LZ4F_decompress returned 0 : starting new frame */ + curblocksize = 0; + remaining = readSize - pos; + nextToLoad = LZ4F_getFrameInfo(dctx, &frameInfo, (char*)(srcBuffer)+pos, &remaining); + if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "Error getting frame info: %s", LZ4F_getErrorName(nextToLoad)); /* XXX */ + if (frameInfo.blockSizeID != bsid) EXM_THROW(66, "Block size ID %u != expected %u", frameInfo.blockSizeID, bsid); + pos += remaining; + /* nextToLoad should be block header size */ + remaining = nextToLoad; + decodedBytes = dstBufferSize; + nextToLoad = LZ4F_decompress(dctx, dstBuffer, &decodedBytes, (char*)(srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad)); + pos += remaining; + } + decodedBytes = dstBufferSize; + /* nextToLoad should be just enough to cover the next block */ + if (nextToLoad > (readSize - pos)) { + /* block is not fully contained in current buffer */ + partialBlock = 1; + remaining = readSize - pos; + } else { + if (partialBlock) { + partialBlock = 0; + } + remaining = nextToLoad; + } + nextToLoad = LZ4F_decompress(dctx, dstBuffer, &decodedBytes, (char*)(srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad)); + curblocksize += decodedBytes; + pos += remaining; + if (!partialBlock) { + /* detect small block due to end of frame; the final 4-byte frame checksum could be left in the buffer */ + if ((curblocksize != 0) && (nextToLoad > 4)) { + /* allow 4-byte fudge */ + if ((MAX(curblocksize, blockSize) - MIN(curblocksize, blockSize)) > 4) + EXM_THROW(66, "Block size %zu != expected %zu", curblocksize, blockSize); + } + curblocksize = 0; + } + } + } + /* can be out because readSize == 0, which could be an fread() error */ + if (ferror(srcFile)) EXM_THROW(67, "Read error"); + + if (nextToLoad!=0) EXM_THROW(68, "Unfinished stream"); + +} + +int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args] filename\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -b# : expected blocksizeID [4-7] (required)\n"); + DISPLAY( " -B# : expected blocksize [4-7] (required)\n"); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + int argNb; + int bsid=0; + size_t blockSize=0; + const char* const programName = argv[0]; + + /* Check command line */ + for (argNb=1; argNb='0') && (*argument<='9')) { + bsid *= 10; + bsid += *argument - '0'; + argument++; + } + break; + + case 'B': + argument++; + blockSize=0; + while ((*argument>='0') && (*argument<='9')) { + blockSize *= 10; + blockSize += *argument - '0'; + argument++; + } + break; + + default: + ; + return FUZ_usage(programName); + } + } + } else { + if (bsid == 0 || blockSize == 0) + return FUZ_usage(programName); + DISPLAY("Starting frame checker (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION_STRING); + frameCheck(argument, bsid, blockSize); + } + } + return 0; +} diff --git a/tests/test_custom_block_sizes.sh b/tests/test_custom_block_sizes.sh new file mode 100755 index 0000000..7317b65 --- /dev/null +++ b/tests/test_custom_block_sizes.sh @@ -0,0 +1,72 @@ +#/usr/bin/env sh +set -e + +LZ4=../lz4 +CHECKFRAME=./checkFrame +DATAGEN=./datagen + +failures="" + +TMPFILE=/tmp/test_custom_block_sizes.$$ +TMPFILE1=/tmp/test_custom_block_sizes1.$$ +TMPFILE2=/tmp/test_custom_block_sizes2.$$ +$DATAGEN -g12345678 > $TMPFILE1 +$DATAGEN -g12345678 > $TMPFILE2 + +echo Testing -B32 +$LZ4 -f -B32 $TMPFILE && failures="31 (should fail) " + +for blocksize in 512 65535 65536 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b4 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 65537 262143 262144 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b5 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 262145 1048575 1048576 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b6 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 1048577 4194303 4194304 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b7 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 4194305 10485760 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B4194304 -b7 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +rm $TMPFILE.lz4 $TMPFILE1 $TMPFILE1.lz4 $TMPFILE2 $TMPFILE2.lz4 +if [ "$failures" == "" ] +then + echo ---- All tests passed + exit 0 +else + echo ---- The following tests had failures: $failures + exit 1 +fi -- cgit v0.12