diff options
Diffstat (limited to 'programs')
-rw-r--r-- | programs/Makefile | 118 | ||||
-rw-r--r-- | programs/bench.c | 51 | ||||
-rw-r--r-- | programs/bench.h | 23 | ||||
-rw-r--r-- | programs/datagen.c | 329 | ||||
-rw-r--r-- | programs/datagen.h | 40 | ||||
-rw-r--r-- | programs/datagencli.c | 193 | ||||
-rw-r--r-- | programs/frametest.c | 260 | ||||
-rw-r--r-- | programs/fullbench.c | 204 | ||||
-rw-r--r-- | programs/fuzzer.c | 109 | ||||
-rw-r--r-- | programs/lz4.1 | 197 | ||||
-rw-r--r-- | programs/lz4c.1 | 33 | ||||
-rw-r--r-- | programs/lz4cat.1 | 32 | ||||
-rw-r--r-- | programs/lz4cli.c | 117 | ||||
-rw-r--r-- | programs/lz4io.c | 419 | ||||
-rw-r--r-- | programs/lz4io.h | 29 |
15 files changed, 1452 insertions, 702 deletions
diff --git a/programs/Makefile b/programs/Makefile index 543eb7c..6aade89 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -19,8 +19,8 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # You can contact the author at : -# - LZ4 source repository : http://code.google.com/p/lz4/ -# - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c +# - LZ4 source repository : https://github.com/Cyan4973/lz4 +# - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c # ########################################################################## # lz4 : Command Line Utility, supporting gzip-like arguments # lz4c : CLU, supporting also legacy lz4demo arguments @@ -31,10 +31,10 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -RELEASE?= r126 +RELEASE?= r128 DESTDIR?= -PREFIX ?= /usr +PREFIX ?= /usr/local CFLAGS ?= -O3 CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -pedantic -DLZ4_VERSION=\"$(RELEASE)\" FLAGS = -I../lib $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) @@ -92,10 +92,9 @@ frametest: $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/xxha frametest32: $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/xxhash.c frametest.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) -datagen : datagen.c +datagen : datagen.c datagencli.c $(CC) $(FLAGS) $^ -o $@$(EXT) - clean: @rm -f core *.o *.test \ lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) \ @@ -113,22 +112,26 @@ ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU)) install: lz4 lz4c @echo Installing binaries @install -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ - @install -m 755 lz4$(EXT) $(DESTDIR)$(BINDIR)/lz4$(EXT) - @ln -sf lz4$(EXT) $(DESTDIR)$(BINDIR)/lz4cat - @install -m 755 lz4c$(EXT) $(DESTDIR)$(BINDIR)/lz4c$(EXT) + @install -m 755 lz4 $(DESTDIR)$(BINDIR)/lz4 + @ln -sf lz4 $(DESTDIR)$(BINDIR)/lz4cat + @ln -sf lz4 $(DESTDIR)$(BINDIR)/unlz4 + @install -m 755 lz4c $(DESTDIR)$(BINDIR)/lz4c @echo Installing man pages @install -m 644 lz4.1 $(DESTDIR)$(MANDIR)/lz4.1 - @install -m 644 lz4c.1 $(DESTDIR)$(MANDIR)/lz4c.1 - @install -m 644 lz4cat.1 $(DESTDIR)$(MANDIR)/lz4cat.1 + @ln -sf lz4.1 $(DESTDIR)$(MANDIR)/lz4c.1 + @ln -sf lz4.1 $(DESTDIR)$(MANDIR)/lz4cat.1 + @ln -sf lz4.1 $(DESTDIR)$(MANDIR)/unlz4.1 @echo lz4 installation completed uninstall: rm -f $(DESTDIR)$(BINDIR)/lz4cat - [ -x $(DESTDIR)$(BINDIR)/lz4$(EXT) ] && rm -f $(DESTDIR)$(BINDIR)/lz4$(EXT) - [ -x $(DESTDIR)$(BINDIR)/lz4c$(EXT) ] && rm -f $(DESTDIR)$(BINDIR)/lz4c$(EXT) + rm -f $(DESTDIR)$(BINDIR)/unlz4 + [ -x $(DESTDIR)$(BINDIR)/lz4 ] && rm -f $(DESTDIR)$(BINDIR)/lz4 + [ -x $(DESTDIR)$(BINDIR)/lz4c ] && rm -f $(DESTDIR)$(BINDIR)/lz4c [ -f $(DESTDIR)$(MANDIR)/lz4.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz4.1 - [ -f $(DESTDIR)$(MANDIR)/lz4c.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz4c.1 - [ -f $(DESTDIR)$(MANDIR)/lz4cat.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz4cat.1 + rm -f $(DESTDIR)$(MANDIR)/lz4c.1 + rm -f $(DESTDIR)$(MANDIR)/lz4cat.1 + rm -f $(DESTDIR)$(MANDIR)/unlz4.1 @echo lz4 programs successfully uninstalled test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-mem @@ -139,13 +142,32 @@ test-all: test test32 test-travis: $(TRAVIS_TARGET) -test-lz4: lz4 datagen - ./datagen -g16KB | ./lz4 -9 | ./lz4 -vdq > $(VOID) - ./datagen | ./lz4 | ./lz4 -vdq > $(VOID) - ./datagen -g6M -p100 | ./lz4 -9BD | ./lz4 -vdq > $(VOID) - ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID) - ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID) -# test frame concatenation with null-length frame +test-lz4-sparse: lz4 datagen + @echo ---- test sparse file support ---- + ./datagen -g50M -P100 | ./lz4 -B4D | ./lz4 -dv --sparse-support > tmpB4 + ./datagen -g50M -P100 | ./lz4 -B5D | ./lz4 -dv --sparse-support > tmpB5 + ./datagen -g50M -P100 | ./lz4 -B6D | ./lz4 -dv --sparse-support > tmpB6 + ./datagen -g50M -P100 | ./lz4 -B7D | ./lz4 -dv --sparse-support > tmpB7 + ls -ls tmp* + ./datagen -g50M -P100 | diff -s - tmpB4 + ./datagen -g50M -P100 | diff -s - tmpB5 + ./datagen -g50M -P100 | diff -s - tmpB6 + ./datagen -g50M -P100 | diff -s - tmpB7 + ./datagen -s1 -g1200007 -P100 | ./lz4 | ./lz4 -dv --sparse-support > tmpOdd # Odd size file (to not finish on an exact nb of blocks) + ./datagen -s1 -g1200007 -P100 | diff -s - tmpOdd + ls -ls tmpOdd + @rm tmp* + +test-lz4-contentSize: lz4 datagen + @echo ---- test original size support ---- + ./datagen -g15M > tmp + ./lz4 -v tmp | ./lz4 -t + ./lz4 -v --frame-content-size tmp | ./lz4 -d > tmp2 + diff -s tmp tmp2 + @rm tmp* + +test-lz4-frame-concatenation: lz4 datagen + @echo ---- test frame concatenation ---- @echo -n > empty.test @echo hi > nonempty.test cat nonempty.test empty.test nonempty.test > orig.test @@ -156,20 +178,40 @@ test-lz4: lz4 datagen sdiff orig.test result.test @rm *.test @echo frame concatenation test completed -# test frame concatenation with null-length frame +test-lz4: lz4 datagen test-lz4-sparse test-lz4-contentSize test-lz4-frame-concatenation + @echo ---- test lz4 basic compression/decompression ---- + ./datagen -g0 | ./lz4 -v | ./lz4 -t + ./datagen -g16KB | ./lz4 -9 | ./lz4 -t + ./datagen | ./lz4 | ./lz4 -t + ./datagen -g6M -P99 | ./lz4 -9BD | ./lz4 -t + ./datagen -g17M | ./lz4 -9v | ./lz4 -tq + ./datagen -g33M | ./lz4 --no-frame-crc | ./lz4 -t + ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -t + ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -t + ./datagen -g6GB | ./lz4 -vq9BD | ./lz4 -t + @echo ---- test multiple input files ---- + @./datagen -s1 > file1 + @./datagen -s2 > file2 + @./datagen -s3 > file3 + ./lz4 -f -m file1 file2 file3 + ls -l file* + @rm file1 file2 file3 file1.lz4 file2.lz4 file3.lz4 + @echo ---- test pass-through ---- + ./datagen | ./lz4 -tf test-lz4c: lz4c datagen - ./datagen -g256MB | ./lz4c -l -v | ./lz4c -vdq > $(VOID) - -test-lz4c32: lz4 lz4c32 lz4 datagen - ./datagen -g16KB | ./lz4c32 -9 | ./lz4c32 -vdq > $(VOID) - ./datagen -g16KB | ./lz4c32 -9 | ./lz4 -vdq > $(VOID) - ./datagen | ./lz4c32 | ./lz4c32 -vdq > $(VOID) - ./datagen | ./lz4c32 | ./lz4 -vdq > $(VOID) - ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4c32 -vdq > $(VOID) - ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4 -vdq > $(VOID) - ./datagen -g6GB | ./lz4c32 -vqB5D | ./lz4c32 -vdq > $(VOID) + ./datagen -g256MB | ./lz4c -l -v | ./lz4c -t + +test-lz4c32: lz4 lz4c32 datagen + ./datagen -g16KB | ./lz4c32 -9 | ./lz4c32 -t + ./datagen -g16KB | ./lz4c32 -9 | ./lz4 -t + ./datagen | ./lz4c32 | ./lz4c32 -t + ./datagen | ./lz4c32 | ./lz4 -t + ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4c32 -t + ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4 -t + ./datagen -g6GB | ./lz4c32 -vqB5D | ./lz4c32 -t + ./datagen -g6GB | ./lz4c32 -vq9BD | ./lz4 -t test-fullbench: fullbench ./fullbench --no-prompt $(TEST_FILES) @@ -190,13 +232,15 @@ test-frametest32: frametest32 ./frametest32 test-mem: lz4 datagen fuzzer frametest + valgrind --leak-check=yes ./datagen -g50M > $(VOID) ./datagen -g16KB > tmp - valgrind --leak-check=yes ./lz4 -9 -BD -f tmp /dev/null + valgrind --leak-check=yes ./lz4 -9 -BD -f tmp $(VOID) ./datagen -g16MB > tmp - valgrind --leak-check=yes ./lz4 -9 -B5D -f tmp /dev/null + valgrind --leak-check=yes ./lz4 -9 -B5D -f tmp tmp2 ./datagen -g256MB > tmp - valgrind --leak-check=yes ./lz4 -B4D -f -vq tmp /dev/null - rm tmp + valgrind --leak-check=yes ./lz4 -t tmp2 + valgrind --leak-check=yes ./lz4 -B4D -f -vq tmp $(VOID) + rm tmp* valgrind --leak-check=yes ./fuzzer -i64 -t1 valgrind --leak-check=yes ./frametest -i256 diff --git a/programs/bench.c b/programs/bench.c index 02e56c9..e1b5357 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -1,6 +1,7 @@ /* - bench.c - Demo program to benchmark open-source compression algorithm - Copyright (C) Yann Collet 2012-2014 + bench.c - Demo program to benchmark open-source compression algorithms + Copyright (C) Yann Collet 2012-2015 + GPL v2 License This program is free software; you can redistribute it and/or modify @@ -18,16 +19,18 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /************************************** * Compiler Options ***************************************/ -/* Disable some Visual warning messages */ -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE /* VS2005 */ +#if defined(_MSC_VER) || defined(_WIN32) +# define _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_DEPRECATE /* VS2005 */ +# define BMK_LEGACY_TIMER 1 /* S_ISREG & gettimeofday() are not supported by MSVC */ +#endif /* Unix Large Files support (>4GB) */ #define _FILE_OFFSET_BITS 64 @@ -37,11 +40,6 @@ # define _LARGEFILE64_SOURCE #endif -/* S_ISREG & gettimeofday() are not supported by MSVC */ -#if defined(_MSC_VER) || defined(_WIN32) -# define BMK_LEGACY_TIMER 1 -#endif - /************************************** * Includes @@ -141,15 +139,15 @@ static int chunkSize = DEFAULT_CHUNKSIZE; static int nbIterations = NBLOOPS; static int BMK_pause = 0; -void BMK_SetBlocksize(int bsize) { chunkSize = bsize; } +void BMK_setBlocksize(int bsize) { chunkSize = bsize; } -void BMK_SetNbIterations(int nbLoops) +void BMK_setNbIterations(int nbLoops) { nbIterations = nbLoops; DISPLAY("- %i iterations -\n", nbIterations); } -void BMK_SetPause(void) { BMK_pause = 1; } +void BMK_setPause(void) { BMK_pause = 1; } /********************************************************* @@ -206,16 +204,21 @@ static size_t BMK_findMaxMem(U64 requiredMem) while (!testmem) { - requiredMem -= step; + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; testmem = (BYTE*) malloc ((size_t)requiredMem); } - free (testmem); - return (size_t) (requiredMem - step); + + /* keep some space available */ + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + + return (size_t)requiredMem; } -static U64 BMK_GetFileSize(char* infilename) +static U64 BMK_GetFileSize(const char* infilename) { int r; #if defined(_MSC_VER) @@ -234,7 +237,7 @@ static U64 BMK_GetFileSize(char* infilename) * Public function **********************************************************/ -int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) +int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel) { int fileIdx=0; char* orig_buff; @@ -265,7 +268,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) while (fileIdx<nbFiles) { FILE* inFile; - char* inFileName; + const char* inFileName; U64 inFileSize; size_t benchedSize; int nbChunks; @@ -286,7 +289,9 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) /* Memory allocation & restrictions */ inFileSize = BMK_GetFileSize(inFileName); + if (inFileSize==0) { DISPLAY( "file is empty\n"); return 11; } benchedSize = (size_t) BMK_findMaxMem(inFileSize * 2) / 2; + if (benchedSize==0) { DISPLAY( "not enough memory\n"); return 11; } if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; if (benchedSize < inFileSize) { @@ -295,11 +300,11 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) /* Alloc */ chunkP = (struct chunkParameters*) malloc(((benchedSize / (size_t)chunkSize)+1) * sizeof(struct chunkParameters)); - orig_buff = (char*)malloc((size_t )benchedSize); + orig_buff = (char*)malloc((size_t)benchedSize); nbChunks = (int) ((int)benchedSize / chunkSize) + 1; maxCompressedChunkSize = LZ4_compressBound(chunkSize); compressedBuffSize = nbChunks * maxCompressedChunkSize; - compressedBuffer = (char*)malloc((size_t )compressedBuffSize); + compressedBuffer = (char*)malloc((size_t)compressedBuffSize); if (!orig_buff || !compressedBuffer) diff --git a/programs/bench.h b/programs/bench.h index d42df68..c04fb17 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -1,6 +1,6 @@ /* bench.h - Demo program to benchmark open-source compression algorithm - Copyright (C) Yann Collet 2012-2014 + Copyright (C) Yann Collet 2012-2015 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,26 +17,17 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ - - LZ4 public forum : https://group.google.com/forum/#!forum/lz4c + - LZ4 source repository : https://github.com/Cyan4973/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ #pragma once -#if defined (__cplusplus) -extern "C" { -#endif - /* Main function */ -int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel); +int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel); /* Set Parameters */ -void BMK_SetBlocksize(int bsize); -void BMK_SetNbIterations(int nbLoops); -void BMK_SetPause(void); - - +void BMK_setBlocksize(int bsize); +void BMK_setNbIterations(int nbLoops); +void BMK_setPause(void); -#if defined (__cplusplus) -} -#endif diff --git a/programs/datagen.c b/programs/datagen.c index 0f07477..bccb21e 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -19,26 +19,20 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4 - - LZ4 source mirror : https://github.com/Cyan4973/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c + - ZSTD source repository : https://github.com/Cyan4973/zstd + - Public forum : https://groups.google.com/forum/#!forum/lz4c */ /************************************** - Remove Visual warning messages +* Includes **************************************/ -#define _CRT_SECURE_NO_WARNINGS // fgets +#include <stdlib.h> /* malloc */ +#include <stdio.h> /* FILE, fwrite */ +#include <string.h> /* memcpy */ /************************************** - Includes -**************************************/ -#include <stdio.h> // fgets, sscanf -#include <string.h> // strcmp - - -/************************************** - Basic Types +* Basic Types **************************************/ #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ # include <stdint.h> @@ -57,230 +51,173 @@ /************************************** - Constants +* OS-specific Includes **************************************/ -#ifndef LZ4_VERSION -# define LZ4_VERSION "r125" +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) +# include <fcntl.h> /* _O_BINARY */ +# include <io.h> /* _setmode, _isatty */ +# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) +#else +# define SET_BINARY_MODE(file) #endif -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define CDG_SIZE_DEFAULT (64 KB) -#define CDG_SEED_DEFAULT 0 -#define CDG_COMPRESSIBILITY_DEFAULT 50 -#define PRIME1 2654435761U -#define PRIME2 2246822519U - /************************************** - Macros +* Constants **************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } - +#define KB *(1 <<10) -/************************************** - Local Parameters -**************************************/ -static unsigned no_prompt = 0; -static char* programName; -static unsigned displayLevel = 2; +#define PRIME1 2654435761U +#define PRIME2 2246822519U /********************************************************* - functions +* Local Functions *********************************************************/ - -#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) -static unsigned int CDG_rand(U32* src) +#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static unsigned int RDG_rand(U32* src) { U32 rand32 = *src; rand32 *= PRIME1; - rand32 += PRIME2; - rand32 = CDG_rotl32(rand32, 13); + rand32 ^= PRIME2; + rand32 = RDG_rotl32(rand32, 13); *src = rand32; return rand32; } -#define CDG_RAND15BITS ((CDG_rand(seed) >> 3) & 32767) -#define CDG_RANDLENGTH ( ((CDG_rand(seed) >> 7) & 3) ? (CDG_rand(seed) % 14) : (CDG_rand(seed) & 511) + 15) -#define CDG_RANDCHAR (((CDG_rand(seed) >> 9) & 63) + '0') -static void CDG_generate(U64 size, U32* seed, double proba) +#define LTSIZE 8192 +#define LTMASK (LTSIZE-1) +static void* RDG_createLiteralDistrib(double ld) { - BYTE fullbuff[32 KB + 128 KB + 1]; - BYTE* buff = fullbuff + 32 KB; - U64 total=0; - U32 P32 = (U32)(32768 * proba); - U32 pos=1; - U32 genBlockSize = 128 KB; + BYTE* lt = (BYTE*)malloc(LTSIZE); + U32 i = 0; + BYTE character = '0'; + BYTE firstChar = '('; + BYTE lastChar = '}'; - // Build initial prefix - fullbuff[0] = CDG_RANDCHAR; - while (pos<32 KB) + if (ld==0.0) { - // Select : Literal (char) or Match (within 32K) - if (CDG_RAND15BITS < P32) - { - // Copy (within 64K) - U32 d; - int ref; - int length = CDG_RANDLENGTH + 4; - U32 offset = CDG_RAND15BITS + 1; - if (offset > pos) offset = pos; - ref = pos - offset; - d = pos + length; - while (pos < d) fullbuff[pos++] = fullbuff[ref++]; - } - else + character = 0; + firstChar = 0; + lastChar =255; + } + while (i<LTSIZE) + { + U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1; + U32 end; + if (weight + i > LTSIZE) weight = LTSIZE-i; + end = i + weight; + while (i < end) lt[i++] = character; + character++; + if (character > lastChar) character = firstChar; + } + return lt; +} + +static char RDG_genChar(U32* seed, const void* ltctx) +{ + const BYTE* lt = (const BYTE*)ltctx; + U32 id = RDG_rand(seed) & LTMASK; + return lt[id]; +} + +#define RDG_DICTSIZE (32 KB) +#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) +#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) +void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr) +{ + BYTE* buffPtr = (BYTE*)buffer; + const U32 matchProba32 = (U32)(32768 * matchProba); + size_t pos = prefixSize; + void* ldctx = litTable; + U32* seed = seedPtr; + + /* special case */ + while (matchProba >= 1.0) + { + size_t size0 = RDG_rand(seed) & 3; + size0 = (size_t)1 << (16 + size0 * 2); + size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/ + if (buffSize < pos + size0) { - // Literal (noise) - U32 d = pos + CDG_RANDLENGTH; - while (pos < d) fullbuff[pos++] = CDG_RANDCHAR; + memset(buffPtr+pos, 0, buffSize-pos); + return; } + memset(buffPtr+pos, 0, size0); + pos += size0; + buffPtr[pos-1] = RDG_genChar(seed, ldctx); } - // Generate compressible data - pos = 0; - while (total < size) + /* init */ + if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1; + + /* Generate compressible data */ + while (pos < buffSize) { - if (size-total < 128 KB) genBlockSize = (U32)(size-total); - total += genBlockSize; - buff[genBlockSize] = 0; - pos = 0; - while (pos<genBlockSize) + /* Select : Literal (char) or Match (within 32K) */ + if (RDG_RAND15BITS < matchProba32) + { + /* Copy (within 32K) */ + size_t match; + size_t d; + int length = RDG_RANDLENGTH + 4; + U32 offset = RDG_RAND15BITS + 1; + if (offset > pos) offset = (U32)pos; + match = pos - offset; + d = pos + length; + if (d > buffSize) d = buffSize; + while (pos < d) buffPtr[pos++] = buffPtr[match++]; + } + else { - // Select : Literal (char) or Match (within 32K) - if (CDG_RAND15BITS < P32) - { - // Copy (within 64K) - int ref; - U32 d; - int length = CDG_RANDLENGTH + 4; - U32 offset = CDG_RAND15BITS + 1; - if (pos + length > genBlockSize ) length = genBlockSize - pos; - ref = pos - offset; - d = pos + length; - while (pos < d) buff[pos++] = buff[ref++]; - } - else - { - // Literal (noise) - U32 d; - int length = CDG_RANDLENGTH; - if (pos + length > genBlockSize) length = genBlockSize - pos; - d = pos + length; - while (pos < d) buff[pos++] = CDG_RANDCHAR; - } + /* Literal (noise) */ + size_t d; + size_t length = RDG_RANDLENGTH; + d = pos + length; + if (d > buffSize) d = buffSize; + while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx); } - // output datagen - pos=0; - for (;pos+512<=genBlockSize;pos+=512) - printf("%512.512s", buff+pos); - for (;pos<genBlockSize;pos++) printf("%c", buff[pos]); - // Regenerate prefix - memcpy(fullbuff, buff + 96 KB, 32 KB); } } -int CDG_usage(void) +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) { - DISPLAY( "Compressible data generator\n"); - DISPLAY( "Usage :\n"); - DISPLAY( " %s [size] [args]\n", programName); - DISPLAY( "\n"); - DISPLAY( "Arguments :\n"); - DISPLAY( " -g# : generate # data (default:%i)\n", CDG_SIZE_DEFAULT); - DISPLAY( " -s# : Select seed (default:%i)\n", CDG_SEED_DEFAULT); - DISPLAY( " -p# : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT); - DISPLAY( " -h : display help and exit\n"); - return 0; + void* ldctx; + if (litProba==0.0) litProba = matchProba / 4.5; + ldctx = RDG_createLiteralDistrib(litProba); + RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed); + free(ldctx); } -int main(int argc, char** argv) +#define RDG_BLOCKSIZE (128 KB) +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) { - int argNb; - int proba = CDG_COMPRESSIBILITY_DEFAULT; - U64 size = CDG_SIZE_DEFAULT; - U32 seed = CDG_SEED_DEFAULT; - - // Check command line - programName = argv[0]; - for(argNb=1; argNb<argc; argNb++) - { - char* argument = argv[argNb]; - - if(!argument) continue; // Protection if argument empty + BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE]; + U64 total = 0; + size_t genBlockSize = RDG_BLOCKSIZE; + void* ldctx; - // Decode command (note : aggregated commands are allowed) - if (*argument=='-') - { - if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; } + /* init */ + if (litProba==0.0) litProba = matchProba / 4.5; + ldctx = RDG_createLiteralDistrib(litProba); + SET_BINARY_MODE(stdout); - argument++; - while (*argument!=0) - { - switch(*argument) - { - case 'h': - return CDG_usage(); - case 'g': - argument++; - size=0; - while ((*argument>='0') && (*argument<='9')) - { - size *= 10; - size += *argument - '0'; - argument++; - } - if (*argument=='K') { size <<= 10; argument++; } - if (*argument=='M') { size <<= 20; argument++; } - if (*argument=='G') { size <<= 30; argument++; } - if (*argument=='B') { argument++; } - break; - case 's': - argument++; - seed=0; - while ((*argument>='0') && (*argument<='9')) - { - seed *= 10; - seed += *argument - '0'; - argument++; - } - break; - case 'p': - argument++; - proba=0; - while ((*argument>='0') && (*argument<='9')) - { - proba *= 10; - proba += *argument - '0'; - argument++; - } - if (proba<0) proba=0; - if (proba>100) proba=100; - break; - case 'v': - displayLevel = 4; - argument++; - break; - default: ; - } - } + /* Generate dict */ + RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed); - } + /* Generate compressible data */ + while (total < size) + { + RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed); + if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total); + total += genBlockSize; + fwrite(buff, 1, genBlockSize, stdout); + /* update dict */ + memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE); } - // Get Seed - DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION); - DISPLAYLEVEL(3, "Seed = %u \n", seed); - if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba); - - CDG_generate(size, &seed, ((double)proba) / 100); - - return 0; + free(ldctx); } diff --git a/programs/datagen.h b/programs/datagen.h new file mode 100644 index 0000000..631d146 --- /dev/null +++ b/programs/datagen.h @@ -0,0 +1,40 @@ +/* + datagen.h - compressible data generator header + Copyright (C) Yann Collet 2012-2015 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - ZSTD source repository : https://github.com/Cyan4973/zstd + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + + +#include <stddef.h> /* size_t */ + +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed); +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); +/* RDG_genOut + Generate 'size' bytes of compressible data into stdout. + Compressibility can be controlled using 'matchProba'. + 'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used. + Generated data can be selected using 'seed'. + If (matchProba, litProba and seed) are equal, the function always generate the same content. + + RDG_genBuffer + Same as RDG_genOut, but generate data into provided buffer +*/ diff --git a/programs/datagencli.c b/programs/datagencli.c new file mode 100644 index 0000000..601cb0a --- /dev/null +++ b/programs/datagencli.c @@ -0,0 +1,193 @@ +/* + datagencli.c + compressible data command line generator + Copyright (C) Yann Collet 2012-2015 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - ZSTD source repository : https://github.com/Cyan4973/zstd + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** +* Includes +**************************************/ +#include <stdio.h> /* fprintf, stderr */ +#include "datagen.h" /* RDG_generate */ + + +/************************************** +* Basic Types +**************************************/ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include <stdint.h> + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + + +/************************************** +* Constants +**************************************/ +#ifndef ZSTD_VERSION +# define ZSTD_VERSION "r1" +#endif + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SIZE_DEFAULT (64 KB) +#define SEED_DEFAULT 0 +#define COMPRESSIBILITY_DEFAULT 50 + + +/************************************** +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned displayLevel = 2; + + +/********************************************************* +* Command line +*********************************************************/ +static int usage(char* programName) +{ + DISPLAY( "Compressible data generator\n"); + DISPLAY( "Usage :\n"); + DISPLAY( " %s [size] [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT); + DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", COMPRESSIBILITY_DEFAULT); + DISPLAY( " -h : display help and exit\n"); + DISPLAY( "Special values :\n"); + DISPLAY( " -P0 : generate incompressible noise\n"); + DISPLAY( " -P100 : generate sparse files\n"); + return 0; +} + + +int main(int argc, char** argv) +{ + int argNb; + double proba = (double)COMPRESSIBILITY_DEFAULT / 100; + double litProba = 0.0; + U64 size = SIZE_DEFAULT; + U32 seed = SEED_DEFAULT; + char* programName; + + /* Check command line */ + programName = argv[0]; + for(argNb=1; argNb<argc; argNb++) + { + char* argument = argv[argNb]; + + if(!argument) continue; /* Protection if argument empty */ + + /* Handle commands. Aggregated commands are allowed */ + if (*argument=='-') + { + argument++; + while (*argument!=0) + { + switch(*argument) + { + case 'h': + return usage(programName); + case 'g': + argument++; + size=0; + while ((*argument>='0') && (*argument<='9')) + { + size *= 10; + size += *argument - '0'; + argument++; + } + if (*argument=='K') { size <<= 10; argument++; } + if (*argument=='M') { size <<= 20; argument++; } + if (*argument=='G') { size <<= 30; argument++; } + if (*argument=='B') { argument++; } + break; + case 's': + argument++; + seed=0; + while ((*argument>='0') && (*argument<='9')) + { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + case 'P': + argument++; + proba=0.0; + while ((*argument>='0') && (*argument<='9')) + { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba>100.) proba=100.; + proba /= 100.; + break; + case 'L': /* hidden argument : Literal distribution probability */ + argument++; + litProba=0.; + while ((*argument>='0') && (*argument<='9')) + { + litProba *= 10; + litProba += *argument - '0'; + argument++; + } + if (litProba>100.) litProba=100.; + litProba /= 100.; + break; + case 'v': + displayLevel = 4; + argument++; + break; + default: + return usage(programName); + } + } + + } + } + + DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION); + DISPLAYLEVEL(3, "Seed = %u \n", seed); + if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100)); + + RDG_genOut(size, proba, litProba, seed); + DISPLAYLEVEL(1, "\n"); + + return 0; +} diff --git a/programs/frametest.c b/programs/frametest.c index 71490a6..2a087ec 100644 --- a/programs/frametest.c +++ b/programs/frametest.c @@ -1,6 +1,7 @@ /* frameTest - test tool for lz4frame - Copyright (C) Yann Collet 2014 + Copyright (C) Yann Collet 2014-2015 + GPL v2 License This program is free software; you can redistribute it and/or modify @@ -18,38 +19,43 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /************************************** - Compiler specific +* Compiler specific **************************************/ -#define _CRT_SECURE_NO_WARNINGS // fgets #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ #endif -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -# pragma GCC diagnostic ignored "-Wmissing-field-initializers" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ + +/* S_ISREG & gettimeofday() are not supported by MSVC */ +#if defined(_MSC_VER) || defined(_WIN32) +# define FUZ_LEGACY_TIMER 1 #endif /************************************** - Includes +* Includes **************************************/ -#include <stdlib.h> // free -#include <stdio.h> // fgets, sscanf -#include <sys/timeb.h> // timeb -#include <string.h> // strcmp +#include <stdlib.h> /* malloc, free */ +#include <stdio.h> /* fprintf */ +#include <string.h> /* strcmp */ #include "lz4frame_static.h" -#include "xxhash.h" // XXH64 +#include "xxhash.h" /* XXH64 */ + +/* Use ftime() if gettimeofday() is not available on your target */ +#if defined(FUZ_LEGACY_TIMER) +# include <sys/timeb.h> /* timeb, ftime */ +#else +# include <sys/time.h> /* gettimeofday */ +#endif /************************************** - Basic Types +* Basic Types **************************************/ #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ # include <stdint.h> @@ -67,13 +73,26 @@ typedef unsigned long long U64; #endif +/* unoptimized version; solves endianess & alignment issues */ +static void FUZ_writeLE32 (void* dstVoidPtr, U32 value32) +{ + BYTE* dstPtr = (BYTE*)dstVoidPtr; + dstPtr[0] = (BYTE)value32; + dstPtr[1] = (BYTE)(value32 >> 8); + dstPtr[2] = (BYTE)(value32 >> 16); + dstPtr[3] = (BYTE)(value32 >> 24); +} + + /************************************** - Constants +* Constants **************************************/ #ifndef LZ4_VERSION # define LZ4_VERSION "" #endif +#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U + #define KB *(1U<<10) #define MB *(1U<<20) #define GB *(1U<<30) @@ -87,7 +106,7 @@ static const U32 prime2 = 2246822519U; /************************************** - Macros +* Macros **************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } @@ -100,7 +119,7 @@ static U32 g_time = 0; /***************************************** - Local Parameters +* Local Parameters *****************************************/ static U32 no_prompt = 0; static char* programName; @@ -109,8 +128,10 @@ static U32 pause = 0; /********************************************************* - Fuzzer functions +* Fuzzer functions *********************************************************/ +#if defined(FUZ_LEGACY_TIMER) + static U32 FUZ_GetMilliStart(void) { struct timeb tb; @@ -120,6 +141,19 @@ static U32 FUZ_GetMilliStart(void) return nCount; } +#else + +static U32 FUZ_GetMilliStart(void) +{ + struct timeval tv; + U32 nCount; + gettimeofday(&tv, NULL); + nCount = (U32) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000); + return nCount; +} + +#endif + static U32 FUZ_GetMilliSpan(U32 nTimeStart) { @@ -151,15 +185,15 @@ static void FUZ_fillCompressibleNoiseBuffer(void* buffer, unsigned bufferSize, d unsigned pos = 0; U32 P32 = (U32)(32768 * proba); - // First Byte + /* First Byte */ BBuffer[pos++] = (BYTE)(FUZ_rand(seed)); while (pos < bufferSize) { - // Select : Literal (noise) or copy (within 64K) + /* Select : Literal (noise) or copy (within 64K) */ if (FUZ_RAND15BITS < P32) { - // Copy (within 64K) + /* Copy (within 64K) */ unsigned match, end; unsigned length = FUZ_RANDLENGTH + 4; unsigned offset = FUZ_RAND15BITS + 1; @@ -171,7 +205,7 @@ static void FUZ_fillCompressibleNoiseBuffer(void* buffer, unsigned bufferSize, d } else { - // Literal (noise) + /* Literal (noise) */ unsigned end; unsigned length = FUZ_RANDLENGTH; if (pos + length > bufferSize) length = bufferSize - pos; @@ -203,11 +237,12 @@ int basicTests(U32 seed, double compressibility) void* decodedBuffer; U32 randState = seed; size_t cSize, testSize; - LZ4F_preferences_t prefs = { 0 }; + LZ4F_preferences_t prefs; LZ4F_decompressionContext_t dCtx; U64 crcOrig; // Create compressible test buffer + memset(&prefs, 0, sizeof(prefs)); CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); compressedBuffer = malloc(LZ4F_compressFrameBound(COMPRESSIBLE_NOISE_LENGTH, NULL)); decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); @@ -236,6 +271,7 @@ int basicTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "Single Block : \n"); errorCode = LZ4F_decompress(dCtx, decodedBuffer, &decodedBufferSize, compressedBuffer, &compressedBufferSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; crcDest = XXH64(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, 1); if (crcDest != crcOrig) goto _output_error; DISPLAYLEVEL(3, "Regenerated %i bytes \n", (int)decodedBufferSize); @@ -345,6 +381,131 @@ int basicTests(U32 seed, double compressibility) if (LZ4F_isError(cSize)) goto _output_error; DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + { + size_t errorCode; + BYTE* const ostart = (BYTE*)compressedBuffer; + BYTE* op = ostart; + LZ4F_compressionContext_t cctx; + errorCode = LZ4F_createCompressionContext(&cctx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) goto _output_error; + + DISPLAYLEVEL(3, "compress without frameSize : \n"); + memset(&(prefs.frameInfo), 0, sizeof(prefs.frameInfo)); + errorCode = LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs); + if (LZ4F_isError(errorCode)) goto _output_error; + op += errorCode; + errorCode = LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + op += errorCode; + errorCode = LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)(op-ostart)); + + DISPLAYLEVEL(3, "compress with frameSize : \n"); + prefs.frameInfo.frameOSize = testSize; + op = ostart; + errorCode = LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs); + if (LZ4F_isError(errorCode)) goto _output_error; + op += errorCode; + errorCode = LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + op += errorCode; + errorCode = LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)(op-ostart)); + + DISPLAYLEVEL(3, "compress with wrong frameSize : \n"); + prefs.frameInfo.frameOSize = testSize+1; + op = ostart; + errorCode = LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs); + if (LZ4F_isError(errorCode)) goto _output_error; + op += errorCode; + errorCode = LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + op += errorCode; + errorCode = LZ4F_compressEnd(cctx, op, testSize, NULL); + if (LZ4F_isError(errorCode)) { DISPLAYLEVEL(3, "Error correctly detected : %s \n", LZ4F_getErrorName(errorCode)); } + else + goto _output_error; + + errorCode = LZ4F_freeCompressionContext(cctx); + if (LZ4F_isError(errorCode)) goto _output_error; + } + + DISPLAYLEVEL(3, "Skippable frame test : \n"); + { + size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH; + unsigned maxBits = FUZ_highbit((U32)decodedBufferSize); + BYTE* op = (BYTE*)decodedBuffer; + BYTE* const oend = (BYTE*)decodedBuffer + COMPRESSIBLE_NOISE_LENGTH; + BYTE* ip = (BYTE*)compressedBuffer; + BYTE* iend = (BYTE*)compressedBuffer + cSize + 8; + + LZ4F_errorCode_t errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) goto _output_error; + + /* generate skippable frame */ + FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START); + FUZ_writeLE32(ip+4, (U32)cSize); + + DISPLAYLEVEL(3, "random segment sizes : \n"); + while (ip < iend) + { + unsigned nbBits = FUZ_rand(&randState) % maxBits; + size_t iSize = (FUZ_rand(&randState) & ((1<<nbBits)-1)) + 1; + size_t oSize = oend-op; + if (iSize > (size_t)(iend-ip)) iSize = iend-ip; + errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + op += oSize; + ip += iSize; + } + DISPLAYLEVEL(3, "Skipped %i bytes \n", (int)decodedBufferSize); + + /* generate zero-size skippable frame */ + DISPLAYLEVEL(3, "zero-size skippable frame\n"); + ip = (BYTE*)compressedBuffer; + op = (BYTE*)decodedBuffer; + FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START+1); + FUZ_writeLE32(ip+4, 0); + iend = ip+8; + + while (ip < iend) + { + unsigned nbBits = FUZ_rand(&randState) % maxBits; + size_t iSize = (FUZ_rand(&randState) & ((1<<nbBits)-1)) + 1; + size_t oSize = oend-op; + if (iSize > (size_t)(iend-ip)) iSize = iend-ip; + errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + op += oSize; + ip += iSize; + } + DISPLAYLEVEL(3, "Skipped %i bytes \n", (int)(ip - (BYTE*)compressedBuffer - 8)); + + DISPLAYLEVEL(3, "Skippable frame header complete in first call \n"); + ip = (BYTE*)compressedBuffer; + op = (BYTE*)decodedBuffer; + FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START+2); + FUZ_writeLE32(ip+4, 10); + iend = ip+18; + while (ip < iend) + { + size_t iSize = 10; + size_t oSize = 10; + if (iSize > (size_t)(iend-ip)) iSize = iend-ip; + errorCode = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL); + if (LZ4F_isError(errorCode)) goto _output_error; + op += oSize; + ip += iSize; + } + DISPLAYLEVEL(3, "Skipped %i bytes \n", (int)(ip - (BYTE*)compressedBuffer - 8)); + + /* release memory */ + errorCode = LZ4F_freeDecompressionContext(dCtx); + if (LZ4F_isError(errorCode)) goto _output_error; + } + DISPLAY("Basic tests completed \n"); _end: free(CNBuffer); @@ -391,7 +552,7 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi # define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } - // Create buffers + /* Create buffers */ result = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); CHECK(LZ4F_isError(result), "Allocation failed (error %i)", (int)result); result = LZ4F_createCompressionContext(&cCtx, LZ4F_VERSION); @@ -400,14 +561,14 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi CHECK(srcBuffer==NULL, "srcBuffer Allocation failed"); compressedBuffer = malloc(LZ4F_compressFrameBound(srcDataLength, NULL)); CHECK(compressedBuffer==NULL, "compressedBuffer Allocation failed"); - decodedBuffer = malloc(srcDataLength); + decodedBuffer = calloc(1, srcDataLength); /* calloc avoids decodedBuffer being considered "garbage" by scan-build */ CHECK(decodedBuffer==NULL, "decodedBuffer Allocation failed"); FUZ_fillCompressibleNoiseBuffer(srcBuffer, srcDataLength, compressibility, &coreRand); - // jump to requested testNb + /* jump to requested testNb */ for (testNb =0; testNb < startTest; testNb++) (void)FUZ_rand(&coreRand); // sync randomizer - // main fuzzer loop + /* main fuzzer test loop */ for ( ; testNb < nbTests; testNb++) { U32 randState = coreRand ^ prime1; @@ -415,28 +576,41 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi unsigned BMId = FUZ_rand(&randState) & 1; unsigned CCflag = FUZ_rand(&randState) & 1; unsigned autoflush = (FUZ_rand(&randState) & 7) == 2; - LZ4F_preferences_t prefs = { 0 }; - LZ4F_compressOptions_t cOptions = { 0 }; - LZ4F_decompressOptions_t dOptions = { 0 }; + LZ4F_preferences_t prefs; + LZ4F_compressOptions_t cOptions; + LZ4F_decompressOptions_t dOptions; unsigned nbBits = (FUZ_rand(&randState) % (FUZ_highbit(srcDataLength-1) - 1)) + 1; size_t srcSize = (FUZ_rand(&randState) & ((1<<nbBits)-1)) + 1; size_t srcStart = FUZ_rand(&randState) % (srcDataLength - srcSize); + U64 frameContentSize = ((FUZ_rand(&randState) & 0xF) == 1) ? srcSize : 0; size_t cSize; U64 crcOrig, crcDecoded; LZ4F_preferences_t* prefsPtr = &prefs; - (void)FUZ_rand(&coreRand); // update rand seed + (void)FUZ_rand(&coreRand); /* update seed */ + memset(&prefs, 0, sizeof(prefs)); + memset(&cOptions, 0, sizeof(cOptions)); + memset(&dOptions, 0, sizeof(dOptions)); prefs.frameInfo.blockMode = (blockMode_t)BMId; prefs.frameInfo.blockSizeID = (blockSizeID_t)BSId; prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)CCflag; + prefs.frameInfo.frameOSize = frameContentSize; prefs.autoFlush = autoflush; prefs.compressionLevel = FUZ_rand(&randState) % 5; - if ((FUZ_rand(&randState)&0xF) == 1) prefsPtr = NULL; + if ((FUZ_rand(&randState) & 0xF) == 1) prefsPtr = NULL; DISPLAYUPDATE(2, "\r%5u ", testNb); - crcOrig = XXH64((BYTE*)srcBuffer+srcStart, (U32)srcSize, 1); + crcOrig = XXH64((BYTE*)srcBuffer+srcStart, srcSize, 1); - if ((FUZ_rand(&randState)&0xF) == 2) + if ((FUZ_rand(&randState) & 0xFFF) == 0) + { + /* create a skippable frame (rare case) */ + BYTE* op = (BYTE*)compressedBuffer; + FUZ_writeLE32(op, LZ4F_MAGIC_SKIPPABLE_START + (FUZ_rand(&randState) & 15)); + FUZ_writeLE32(op+4, srcSize); + cSize = srcSize+8; + } + else if ((FUZ_rand(&randState) & 0xF) == 2) { cSize = LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(srcSize, prefsPtr), (char*)srcBuffer + srcStart, srcSize, prefsPtr); CHECK(LZ4F_isError(cSize), "LZ4F_compressFrame failed : error %i (%s)", (int)cSize, LZ4F_getErrorName(cSize)); @@ -483,6 +657,7 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi const BYTE* const iend = ip + cSize; BYTE* op = (BYTE*)decodedBuffer; BYTE* const oend = op + srcDataLength; + size_t totalOut = 0; unsigned maxBits = FUZ_highbit((U32)cSize); unsigned nonContiguousDst = (FUZ_rand(&randState) & 3) == 1; nonContiguousDst += FUZ_rand(&randState) & nonContiguousDst; /* 0=>0; 1=>1,2 */ @@ -497,22 +672,23 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi if (oSize > (size_t)(oend-op)) oSize = oend-op; dOptions.stableDst = FUZ_rand(&randState) & 1; if (nonContiguousDst==2) dOptions.stableDst = 0; - //if (ip == compressedBuffer+62073) DISPLAY("oSize : %i : pos %i \n", (int)oSize, (int)(op-(BYTE*)decodedBuffer)); result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, &dOptions); - //if (op+oSize >= (BYTE*)decodedBuffer+94727) DISPLAY("iSize : %i : pos %i \n", (int)iSize, (int)(ip-(BYTE*)compressedBuffer)); - //if ((int)result<0) DISPLAY("iSize : %i : pos %i \n", (int)iSize, (int)(ip-(BYTE*)compressedBuffer)); if (result == (size_t)-ERROR_checksum_invalid) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize, nonContiguousDst); CHECK(LZ4F_isError(result), "Decompression failed (error %i:%s)", (int)result, LZ4F_getErrorName((LZ4F_errorCode_t)result)); XXH64_update(&xxh64, op, (U32)oSize); + totalOut += oSize; op += oSize; ip += iSize; op += nonContiguousDst; - if (nonContiguousDst==2) op = decodedBuffer; // overwritten destination + if (nonContiguousDst==2) op = (BYTE*)decodedBuffer; /* overwritten destination */ } CHECK(result != 0, "Frame decompression failed (error %i)", (int)result); - crcDecoded = XXH64_digest(&xxh64); - if (crcDecoded != crcOrig) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize, nonContiguousDst); - CHECK(crcDecoded != crcOrig, "Decompression corruption"); + if (totalOut) /* otherwise, it's a skippable frame */ + { + crcDecoded = XXH64_digest(&xxh64); + if (crcDecoded != crcOrig) locateBuffDiff((BYTE*)srcBuffer+srcStart, decodedBuffer, srcSize, nonContiguousDst); + CHECK(crcDecoded != crcOrig, "Decompression corruption"); + } } } diff --git a/programs/fullbench.c b/programs/fullbench.c index b785924..0c6e05e 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -19,17 +19,16 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4 - - LZ4 source mirror : https://github.com/Cyan4973/lz4 + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ -//************************************** -// Compiler Options -//************************************** -// Disable some Visual warning messages +/************************************** +* Compiler Options +**************************************/ +/* Disable some Visual warning messages */ #define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE // VS2005 +#define _CRT_SECURE_NO_DEPRECATE /* VS2005 */ // Unix Large Files support (>4GB) #if (defined(__sun__) && (!defined(__LP64__))) // Sun Solaris 32-bits requires specific definitions @@ -45,9 +44,9 @@ #endif -//************************************** -// Includes -//************************************** +/************************************** +* Includes +**************************************/ #include <stdlib.h> // malloc #include <stdio.h> // fprintf, fopen, ftello64 #include <sys/types.h> // stat64 @@ -68,10 +67,10 @@ #include "xxhash.h" -//************************************** -// Compiler Options -//************************************** -// S_ISREG & gettimeofday() are not supported by MSVC +/************************************** +* Compiler Options +**************************************/ +/* S_ISREG & gettimeofday() are not supported by MSVC */ #if !defined(S_ISREG) # define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) #endif @@ -82,9 +81,9 @@ #endif -//************************************** -// Basic Types -//************************************** +/************************************** +* Basic Types +**************************************/ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 # include <stdint.h> typedef uint8_t BYTE; @@ -101,9 +100,9 @@ #endif -//**************************** -// Constants -//**************************** +/************************************** +* Constants +**************************************/ #define PROGRAM_DESCRIPTION "LZ4 speed analyzer" #ifndef LZ4_VERSION # define LZ4_VERSION "" @@ -114,17 +113,21 @@ #define NBLOOPS 6 #define TIMELOOP 2500 +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + #define KNUTH 2654435761U -#define MAX_MEM (1984<<20) -#define DEFAULT_CHUNKSIZE (4<<20) +#define MAX_MEM (1984 MB) +#define DEFAULT_CHUNKSIZE (4 MB) #define ALL_COMPRESSORS 0 #define ALL_DECOMPRESSORS 0 -//************************************** -// Local structures -//************************************** +/************************************** +* Local structures +**************************************/ struct chunkParameters { U32 id; @@ -135,9 +138,9 @@ struct chunkParameters }; -//************************************** -// MACRO -//************************************** +/************************************** +* MACRO +**************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define PROGRESS(...) no_prompt ? 0 : DISPLAY(__VA_ARGS__) @@ -217,21 +220,26 @@ static int BMK_GetMilliSpan( int nTimeStart ) static size_t BMK_findMaxMem(U64 requiredMem) { - size_t step = (64U<<20); // 64 MB + size_t step = 64 MB; BYTE* testmem=NULL; - requiredMem = (((requiredMem >> 25) + 1) << 26); + requiredMem = (((requiredMem >> 26) + 1) << 26); + requiredMem += 2*step; if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; - requiredMem += 2*step; while (!testmem) { - requiredMem -= step; + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; testmem = (BYTE*) malloc ((size_t)requiredMem); } - free (testmem); - return (size_t) (requiredMem - step); + + /* keep some space available */ + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + + return (size_t)requiredMem; } @@ -251,8 +259,127 @@ static U64 BMK_GetFileSize(char* infilename) /********************************************************* - Benchmark function +* Benchmark function *********************************************************/ +#ifdef __SSSE3__ + +#include <tmmintrin.h> + +/* Idea proposed by Terje Mathisen */ +static BYTE stepSize16[17] = {16,16,16,15,16,15,12,14,16,9,10,11,12,13,14,15,16}; +static __m128i replicateTable[17] = { + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1}, + {0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0}, + {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}, + {0,1,2,3,4,0,1,2,3,4,0,1,2,3,4,0}, + {0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3}, + {0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1}, + {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}, + {0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6}, + {0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5}, + {0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4}, + {0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3}, + {0,1,2,3,4,5,6,7,8,9,10,11,12,0,1,2}, + {0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1}, + {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0}, + {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}}; +static BYTE stepSize32[17] = {32,32,32,30,32,30,30,28,32,27,30,22,24,26,28,30,16}; +static __m128i replicateTable2[17] = { + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1}, + {1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1}, + {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}, + {1,2,3,4,0,1,2,3,4,0,1,2,3,4,0,1}, + {4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1}, + {2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3}, + {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7}, + {7,8,0,1,2,3,4,5,6,7,8,0,1,2,3,4}, + {6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1}, + {5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9}, + {4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7}, + {3,4,5,6,7,8,9,10,11,12,0,1,2,3,4,5}, + {2,3,4,5,6,7,8,9,10,11,12,13,0,1,2,3}, + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,1}, + {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}}; + +U32 lz4_decode_sse(BYTE* dest, BYTE* src, U32 srcLength) +{ + BYTE* d = dest, *e = src+srcLength; + unsigned token, lit_len, mat_len; + __m128i a; + BYTE* dstore, *msrc; + + if (!srcLength) return 0; + goto start; + + do { + U32 step; + unsigned mat_offset = src[0] + (src[1] << 8); + src += 2; + msrc = d - mat_offset; + if (mat_len == 15) { + do { + token = *src++; + mat_len += token; + } while (token == 255); + } + mat_len += 4; + + dstore = d; + d += mat_len; + + if (mat_offset <= 16) + { // Bulk store only! + __m128i a2; + a = _mm_loadu_si128((const __m128i *)msrc); + a2 = _mm_shuffle_epi8(a, replicateTable2[mat_offset]); + a = _mm_shuffle_epi8(a, replicateTable[mat_offset]); + step = stepSize32[mat_offset]; + do { + _mm_storeu_si128((__m128i *)dstore, a); + _mm_storeu_si128((__m128i *)(dstore+16), a2); + dstore += step; + } while (dstore < d); + } + else + { + do + { + a = _mm_loadu_si128((const __m128i *)msrc); + _mm_storeu_si128((__m128i *)dstore, a); + msrc += sizeof(a); + dstore += sizeof(a); + } while (dstore < d); + } +start: + token = *src++; + lit_len = token >> 4; + mat_len = token & 15; + if (token >= 0xf0) { // lit_len == 15 + do { + token = *src++; + lit_len += token; + } while (token == 255); + } + dstore = d; + msrc = src; + d += lit_len; + src += lit_len; + do { + a = _mm_loadu_si128((const __m128i *)msrc); + _mm_storeu_si128((__m128i *)dstore, a); + msrc += sizeof(a); + dstore += sizeof(a); + } while (dstore < d); + } while (src < e); + + return (U32)(d-dest); +} +#endif // __SSSE3__ + static int local_LZ4_compress_limitedOutput(const char* in, char* out, int inSize) { @@ -345,6 +472,7 @@ static int local_LZ4_saveDictHC(const char* in, char* out, int inSize) static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int outSize) { (void)inSize; + //lz4_decode_sse((BYTE*)out, (BYTE*)in, inSize); LZ4_decompress_fast(in, out, outSize); return outSize; } @@ -446,7 +574,9 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) // Memory allocation & restrictions inFileSize = BMK_GetFileSize(inFileName); + if (inFileSize==0) { DISPLAY( "file is empty\n"); return 11; } benchedSize = (size_t) BMK_findMaxMem(inFileSize) / 2; + if (benchedSize==0) { DISPLAY( "not enough memory\n"); return 11; } if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; if (benchedSize < inFileSize) { @@ -567,7 +697,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) milliTime = BMK_GetMilliStart(); while(BMK_GetMilliSpan(milliTime) < TIMELOOP) { - if (initFunction!=NULL) ctx = initFunction(chunkP[0].origBuffer); + if (initFunction!=NULL) ctx = (LZ4_stream_t*)initFunction(chunkP[0].origBuffer); for (chunkNb=0; chunkNb<nbChunks; chunkNb++) { chunkP[chunkNb].compressedSize = compressionFunction(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize); @@ -677,7 +807,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) PROGRESS("%1i- %-29.29s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.); - // CRC Checking + /* CRC Checking */ crcDecoded = XXH32(orig_buff, (int)benchedSize, 0); if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); } } diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 6d3b077..3d3cf8e 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -25,7 +25,7 @@ */ /************************************** -* Remove Visual warning messages +* Compiler options **************************************/ #ifdef _MSC_VER /* Visual Studio */ # define _CRT_SECURE_NO_WARNINGS /* fgets */ @@ -34,21 +34,32 @@ # pragma warning(disable : 4310) /* disable: C4310: constant char value > 127 */ #endif +/* S_ISREG & gettimeofday() are not supported by MSVC */ +#if defined(_MSC_VER) || defined(_WIN32) +# define FUZ_LEGACY_TIMER 1 +#endif + /************************************** -* Includes +* Includes **************************************/ #include <stdlib.h> #include <stdio.h> /* fgets, sscanf */ -#include <sys/timeb.h> /* timeb */ #include <string.h> /* strcmp */ #include "lz4.h" #include "lz4hc.h" #include "xxhash.h" +/* Use ftime() if gettimeofday() is not available on your target */ +#if defined(FUZ_LEGACY_TIMER) +# include <sys/timeb.h> /* timeb, ftime */ +#else +# include <sys/time.h> /* gettimeofday */ +#endif + /************************************** -* Basic Types +* Basic Types **************************************/ #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ # include <stdint.h> @@ -67,7 +78,7 @@ typedef unsigned long long U64; /************************************** -* Constants +* Constants **************************************/ #ifndef LZ4_VERSION # define LZ4_VERSION "" @@ -88,7 +99,7 @@ typedef unsigned long long U64; /***************************************** -* Macros +* Macros *****************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } @@ -98,8 +109,10 @@ static U32 g_time = 0; /********************************************************* - Fuzzer functions +* Fuzzer functions *********************************************************/ +#if defined(FUZ_LEGACY_TIMER) + static U32 FUZ_GetMilliStart(void) { struct timeb tb; @@ -109,6 +122,20 @@ static U32 FUZ_GetMilliStart(void) return nCount; } +#else + +static U32 FUZ_GetMilliStart(void) +{ + struct timeval tv; + U32 nCount; + gettimeofday(&tv, NULL); + nCount = (U32) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000); + return nCount; +} + +#endif + + static U32 FUZ_GetMilliSpan(U32 nTimeStart) { U32 nCurrent = FUZ_GetMilliStart(); @@ -177,7 +204,7 @@ static void FUZ_fillCompressibleNoiseBuffer(void* buffer, size_t bufferSize, dou #define BLOCKSIZE_I134 (32 MB) static int FUZ_AddressOverflow(void) { - char* buffers[MAX_NB_BUFF_I134+1] = {0}; + char* buffers[MAX_NB_BUFF_I134+1]; int i, nbBuff=0; int highAddress = 0; @@ -296,6 +323,7 @@ static int FUZ_test(U32 seed, const U32 nbCycles, const U32 startCycle, const do U32 crcOrig, crcCheck; U32 coreRandState = seed; U32 randState = coreRandState ^ PRIME3; + int result = 0; // init @@ -661,7 +689,6 @@ static int FUZ_test(U32 seed, const U32 nbCycles, const U32 startCycle, const do // unalloc { - int result = 0; _exit: free(CNBuffer); free(compressedBuffer); @@ -753,7 +780,7 @@ static void FUZ_unitTests(void) FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed"); XXH64_update(&xxhNew, testVerify + dNext, messageSize); - crcNew = crcOrig = XXH64_digest(&xxhNew); + crcNew = XXH64_digest(&xxhNew); FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); // prepare next message @@ -916,7 +943,7 @@ static void FUZ_unitTests(void) FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed"); XXH64_update(&xxhNew, testVerify + dNext, messageSize); - crcNew = crcOrig = XXH64_digest(&xxhNew); + crcNew = XXH64_digest(&xxhNew); FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); // prepare next message @@ -959,10 +986,10 @@ static void FUZ_unitTests(void) FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed"); XXH64_update(&xxhNew, testVerify + dNext, messageSize); - crcNew = crcOrig = XXH64_digest(&xxhNew); + crcNew = XXH64_digest(&xxhNew); FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); - // prepare next message + /* prepare next message */ dNext += messageSize; totalMessageSize += messageSize; messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1; @@ -970,62 +997,6 @@ static void FUZ_unitTests(void) if (dNext + messageSize > dBufferSize) dNext = 0; } } - - // long stream test ; Warning : very long test ! - if (1) - { - XXH64_state_t crcOrigState; - XXH64_state_t crcNewState; - const U64 totalTestSize = 6ULL << 30; - U64 totalTestDone = 0; - size_t oldStart = 0; - size_t oldSize = 0; - U32 segNb = 1; - - DISPLAY("Long HC streaming test (%u MB)\n", (U32)(totalTestSize >> 20)); - LZ4_resetStreamHC(&sHC, 0); - - XXH64_reset(&crcOrigState, 0); - XXH64_reset(&crcNewState, 0); - - while (totalTestDone < totalTestSize) - { - size_t testSize = (FUZ_rand(&randState) & 65535) + 1; - size_t testStart = FUZ_rand(&randState) & 65535; - - FUZ_displayUpdate((U32)(totalTestDone >> 20)); - - if (testStart == oldStart + oldSize) // Corner case not covered by this test (LZ4_decompress_safe_usingDict() limitation) - testStart++; - - XXH64_update(&crcOrigState, testInput + testStart, testSize); - crcOrig = XXH64_digest(&crcOrigState); - - result = LZ4_compressHC_limitedOutput_continue(&sHC, testInput + testStart, testCompressed, (int)testSize, LZ4_compressBound((int)testSize)); - FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); - - result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, (int)testSize, testInput + oldStart, (int)oldSize); - FUZ_CHECKTEST(result!=(int)testSize, "LZ4_decompress_safe_usingDict() dictionary decompression part %u failed", segNb); - - XXH64_update(&crcNewState, testVerify, testSize); - crcNew = XXH64_digest(&crcNewState); - if (crcOrig!=crcNew) - { - size_t c=0; - while (testVerify[c] == testInput[testStart+c]) c++; - DISPLAY("Bad decompression at %u / %u \n", (U32)c, (U32)testSize); - } - FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() part %u corruption", segNb); - - oldStart = testStart; - oldSize = testSize; - totalTestDone += testSize; - - segNb ++; - } - - DISPLAY("\r"); - } } printf("All unit tests completed successfully \n"); diff --git a/programs/lz4.1 b/programs/lz4.1 index 6ae8d3c..8bab7a3 100644 --- a/programs/lz4.1 +++ b/programs/lz4.1 @@ -1,79 +1,202 @@ \" \" lz4.1: This is a manual page for 'lz4' program. This file is part of the \" lz4 <https://code.google.com/p/lz4/> project. +\" Author: Yann Collet \" \" No hyphenation .hy 0 .nr HY 0 -.TH lz4 "1" "2014-02-27" "lz4" "User Commands" +.TH lz4 "1" "2015-03-21" "lz4" "User Commands" .SH NAME -\fBlz4\fR - Extremely fast compression algorithm +\fBlz4, unlz4, lz4cat\fR \- Compress or decompress .lz4 files .SH SYNOPSIS .TP 5 \fBlz4\fR [\fBOPTIONS\fR] [-|INPUT-FILE] <OUTPUT-FILE> +.PP +.B unlz4 +is equivalent to +.BR "lz4 \-d" +.br +.B lz4cat +is equivalent to +.BR "lz4 \-dc" +.br +.PP +When writing scripts that need to decompress files, +it is recommended to always use the name +.B lz4 +with appropriate arguments +.RB ( "lz4 \-d" +or +.BR "lz4 \-dc" ) +instead of the names +.B unlz4 +and +.BR lz4cat . + .SH DESCRIPTION .PP -\fBlz4\fR is an extremely fast lossless compression algorithm. It is based on -the \fBLZ77\fR family of compression scheme. At the compression speed of 400 -MB/s per core, \fBlz4\fR is also scalable with multi-core CPUs. It features -an extremely fast decoder, with speed in multiple GB/s per core, typically -reaching the RAM speed limits on multi-core systems. \fBlz4\fR supports -following options +\fBlz4\fR is an extremely fast lossless compression algorithm, +based on \fBbyte-aligned LZ77\fR family of compression scheme. +\fBlz4\fR offers compression speeds of 400 MB/s per core, linearly scalable with multi-core CPUs. +It features an extremely fast decoder, with speed in multiple GB/s per core, +typically reaching RAM speed limit on multi-core systems. +.B lz4 +supports a command line syntax similar to +.BR gzip (1). +The native file format is the +.B .lz4 +format. + +.SS "Concatenation of .lz4 files" +It is possible to concatenate +.B .lz4 +files as is. +.B lz4 +will decompress such files as if they were a single +.B .lz4 +file. +.PP .SH OPTIONS +. +.SS "Short command concatenation" +In some cases, some options can be expressed using short command +.B "-x" +or long command +.B "--long-word" . +Short commands can be concatenated together. For example, +.B "-d -c" +is equivalent to +.B "-dc" . +Long commands cannot be concatenated. +They must be clearly separated by a space. +. +.SS "Operation mode" +If multiple operation mode options are given, +the last one takes effect. +.TP +.BR \-z ", " \-\-compress +Compress. +This is the default operation mode +when no operation mode option is specified , +no other operation mode is implied from the command name +(for example, +.B unlz4 +implies +.B \-\-decompress ), +nor from the input file name +(for example, a file extension +.B .lz4 +implies +.B \-\-decompress +by default). +.B -z +can also be used to force compression of an already compressed +.B .lz4 +file. +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Decompress. +.B --decompress +is also the default operation when the input filename has an +.B .lz4 +extensionq +.TP +.BR \-t ", " \-\-test +Test the integrity of compressed +.B .lz4 +files. +The decompressed data is discarded. +No files are created or removed. +. +.SS "Operation modifiers" .TP .B \-1 fast compression (default) .TP .B \-9 high compression + .TP -.B \-d - decompression -.TP -.B \-f - overwrite output without prompting +.BR \-f ", " --force + This option has several effects: +.RS +.IP \(bu 3 +If the target file already exists, +overwrite it without prompting. +.IP \(bu 3 +When used with +.B \-\-decompress +and +.B lz4 +cannot recognize the type of the source file, +copy the source file as is to standard output. +This allows +.B lz4cat +.B \-\-force +to be used like +.BR cat (1) +for files that have not been compressed with +.BR lz4 . +.RE + .TP -.B \-h/\-H - display help/long help and exit +.BR \-c ", " \--stdout ", " \--to-stdout + force write to standard output, even if it is the console + .TP -.B \-V - display Version number and exit +.BR \-m + Multiple file names. + By default, the second filename is used as the output filename for the compressed file. + With +.B -m +, you can specify any number of input filenames, each of them will be compressed +with the resulting compressed file named +.B filename.lz4 +. + .TP -.B \-v - verbose mode +.B \-B# + block size [4-7](default : 7) + B4= 64KB ; B5= 256KB ; B6= 1MB ; B7= 4MB .TP -.B \-q - suppress warnings; specify twice to suppress errors too +.B \-BD + block dependency (improve compression ratio) .TP -.B \-c - force write to standard output, even if it is the console +.B \--no-frame-crc + disable stream checksum (default:enabled) .TP -.B \-t - test compressed file integrity +.B \--frame-content-size + compressed frame includes original size (default:not present) .TP -.B \-z - force compression +.B \--sparse-support + enable sparse file (default:disabled)(experimental) .TP .B \-l use Legacy format (useful for Linux Kernel compression) +. +.SS "Other options" .TP -.B \-B# - block size [4-7](default : 7) - B4= 64KB ; B5= 256KB ; B6= 1MB ; B7= 4MB +.BR \-v ", " --verbose + verbose mode .TP -.B \-BD - block dependency (improve compression ratio) +.BR \-q ", " --quiet + suppress warnings; specify twice to suppress errors too +.TP +.B \-h/\-H + display help/long help and exit .TP -.B \-BX - enable block checksum (default:disabled) +.BR \-V ", " \--version + display Version number and exit .TP -.B \-Sx - disable stream checksum (default:enabled) +.BR \-k ", " \--keep + Don't delete source file. +This is default behavior anyway, so this option is just for compatibility with gzip/xz. .TP .B \-b benchmark file(s) @@ -82,7 +205,7 @@ following options iteration loops [1-9](default : 3), benchmark mode only .SH BUGS -Report bugs at:- https://code.google.com/p/lz4/ +Report bugs at: https://github.com/Cyan4973/lz4 .SH AUTHOR Yann Collet diff --git a/programs/lz4c.1 b/programs/lz4c.1 deleted file mode 100644 index fed6c8b..0000000 --- a/programs/lz4c.1 +++ /dev/null @@ -1,33 +0,0 @@ -\" -\" lz4c.1: This is a manual page for 'lz4c' program. This file is part of the -\" lz4 <https://code.google.com/p/lz4/> project. -\" - -\" No hyphenation -.hy 0 -.nr HY 0 - -.TH lz4c "1" "2014-04-15" "lz4c" "User Commands" -.SH NAME -\fBlz4\fR - Extremely fast compression algorithm - -.SH SYNOPSIS -.TP 5 -\fBlz4c\fR [\fBOPTIONS\fR] [-|INPUT-FILE] <OUTPUT-FILE> - -.SH DESCRIPTION -.PP -\fBlz4c\fR is the legacy version of \fBlz4\fR. -As such, it supports older supplementary legacy commands. -\fBlz4c\fR is now deprecated. -It is recommended to use \fBlz4\fR instead whenever possible. - -To get a list of commands specific to lz4c, do : -lz4c -h - - -.SH BUGS -Report bugs at:- https://code.google.com/p/lz4/ - -.SH AUTHOR -Yann Collet
\ No newline at end of file diff --git a/programs/lz4cat.1 b/programs/lz4cat.1 deleted file mode 100644 index 64ddbc8..0000000 --- a/programs/lz4cat.1 +++ /dev/null @@ -1,32 +0,0 @@ -\" -\" lz4cat.1: This is a manual page for 'lz4cat' program. This file is part of -\" the lz4 <https://code.google.com/p/lz4/> project. -\" - -\" No hyphenation -.hy 0 -.nr HY 0 - -.TH lz4cat "1" "2014-06-20" "lz4cat" "User Commands" -.SH NAME -\fBlz4cat\fR - Utility based on LZ4 - -.SH SYNOPSIS -.TP 5 -\fBlz4cat\fR [\fBOPTIONS\fR] [-|INPUT-FILE] - -.SH DESCRIPTION -.PP -\fBlz4cat\fR is an utility based on \fBlz4\fR, an extremely fast lossless compression algorithm. - -\fBlz4cat\fR decompress input file or stream, redirecting its output to the console. -It is equivalent to \fBlz4 -cd\fR, - -Available options are the same as \fBlz4\fR ones (man lz4). - - -.SH BUGS -Report bugs at:- https://code.google.com/p/lz4/ - -.SH AUTHOR -Yann Collet diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 0da5dce..6c57864 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -1,6 +1,6 @@ /* LZ4cli - LZ4 Command Line Interface - Copyright (C) Yann Collet 2011-2014 + Copyright (C) Yann Collet 2011-2015 GPL v2 License @@ -19,7 +19,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* @@ -59,23 +59,20 @@ #include <stdlib.h> /* exit, calloc, free */ #include <string.h> /* strcmp, strlen */ #include "bench.h" /* BMK_benchFile, BMK_SetNbIterations, BMK_SetBlocksize, BMK_SetPause */ -#include "lz4io.h" +#include "lz4io.h" /* LZ4IO_compressFilename, LZ4IO_decompressFilename, LZ4IO_compressMultipleFilenames */ /**************************** * OS-specific Includes *****************************/ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) -# include <fcntl.h> /* _O_BINARY */ -# include <io.h> /* _setmode, _isatty */ +# include <io.h> /* _isatty */ # ifdef __MINGW32__ int _fileno(FILE *stream); /* MINGW somehow forgets to include this prototype into <stdio.h> */ # endif -# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) # define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) #else # include <unistd.h> /* isatty */ -# define SET_BINARY_MODE(file) # define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) #endif @@ -85,12 +82,13 @@ ******************************/ #define COMPRESSOR_NAME "LZ4 command line interface" #ifndef LZ4_VERSION -# define LZ4_VERSION "r126" +# define LZ4_VERSION "r128" #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ4_VERSION, AUTHOR, __DATE__ #define LZ4_EXTENSION ".lz4" -#define LZ4_CAT "lz4cat" +#define LZ4CAT "lz4cat" +#define UNLZ4 "unlz4" #define KB *(1U<<10) #define MB *(1U<<20) @@ -136,10 +134,10 @@ static char* programName; #define EXTENDED_FORMAT #define DEFAULT_COMPRESSOR LZ4IO_compressFilename #define DEFAULT_DECOMPRESSOR LZ4IO_decompressFilename -int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, int compressionlevel); /* hidden function */ +int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel); /* hidden function */ -/**************************** +/***************************** * Functions *****************************/ static int usage(void) @@ -170,11 +168,14 @@ static int usage_advanced(void) DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -t : test compressed file integrity\n"); + DISPLAY( " -m : multiple input files (implies automatic output filenames)\n"); DISPLAY( " -l : compress using Legacy format (Linux kernel compression)\n"); DISPLAY( " -B# : Block size [4-7](default : 7)\n"); DISPLAY( " -BD : Block dependency (improve compression ratio)\n"); /* DISPLAY( " -BX : enable block checksum (default:disabled)\n"); *//* Option currently inactive */ - DISPLAY( " -Sx : disable stream checksum (default:enabled)\n"); + DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled)\n"); + DISPLAY( "--frame-content-size : compressed frame includes original size (default:not present)\n"); + DISPLAY( "--sparse-support : enable sparse file (default:disabled)(experimental)\n"); DISPLAY( "Benchmark arguments :\n"); DISPLAY( " -b : benchmark file(s)\n"); DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n"); @@ -184,7 +185,6 @@ static int usage_advanced(void) DISPLAY( " -c1 : high compression\n"); DISPLAY( " -hc : high compression\n"); DISPLAY( " -y : overwrite output without prompting \n"); - DISPLAY( " -s : suppress warnings \n"); #endif /* ENABLE_LZ4C_LEGACY_OPTIONS */ EXTENDED_HELP; return 0; @@ -261,25 +261,28 @@ int main(int argc, char** argv) cLevel=0, decode=0, bench=0, - filenamesStart=2, legacy_format=0, forceStdout=0, forceCompress=0, - main_pause=0; - char* input_filename=0; - char* output_filename=0; + main_pause=0, + multiple_inputs=0; + const char* input_filename=0; + const char* output_filename=0; char* dynNameSpace=0; + const char** inFileNames = NULL; + unsigned ifnIdx=0; char nullOutput[] = NULL_OUTPUT; char extension[] = LZ4_EXTENSION; - int blockSize; + int blockSize; /* Init */ programName = argv[0]; LZ4IO_setOverwrite(0); blockSize = LZ4IO_setBlockSizeID(LZ4_BLOCKSIZEID_DEFAULT); - /* lz4cat behavior */ - if (!strcmp(programName, LZ4_CAT)) { decode=1; forceStdout=1; output_filename=stdoutmark; displayLevel=1; } + /* lz4cat predefined behavior */ + if (!strcmp(programName, LZ4CAT)) { decode=1; forceStdout=1; output_filename=stdoutmark; displayLevel=1; } + if (!strcmp(programName, UNLZ4)) { decode=1; } /* command switches */ for(i=1; i<argc; i++) @@ -288,7 +291,23 @@ int main(int argc, char** argv) if(!argument) continue; /* Protection if argument empty */ - /* Decode command (note : aggregated commands are allowed) */ + /* long commands (--long-word) */ + if (!strcmp(argument, "--compress")) { forceCompress = 1; continue; } + if (!strcmp(argument, "--decompress")) { decode = 1; continue; } + if (!strcmp(argument, "--uncompress")) { decode = 1; continue; } + if (!strcmp(argument, "--test")) { decode = 1; LZ4IO_setOverwrite(1); output_filename=nulmark; continue; } + if (!strcmp(argument, "--force")) { LZ4IO_setOverwrite(1); continue; } + if (!strcmp(argument, "--stdout")) { forceStdout=1; output_filename=stdoutmark; displayLevel=1; continue; } + if (!strcmp(argument, "--to-stdout")) { forceStdout=1; output_filename=stdoutmark; displayLevel=1; continue; } + if (!strcmp(argument, "--no-frame-crc")) { LZ4IO_setStreamChecksumMode(0); continue; } + if (!strcmp(argument, "--frame-content-size")) { LZ4IO_setContentSize(1); continue; } + if (!strcmp(argument, "--sparse-support")) { LZ4IO_setSparseFile(1); continue; } + if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; } + if (!strcmp(argument, "--quiet")) { if (displayLevel) displayLevel--; continue; } + if (!strcmp(argument, "--version")) { DISPLAY(WELCOME_MESSAGE); return 0; } + if (!strcmp(argument, "--keep")) { continue; } /* keep source file (default anyway, so useless) (for xz/lzma compatibility) */ + + /* Short commands (note : aggregated short commands are allowed) */ if (argument[0]=='-') { /* '-' means stdin/stdout */ @@ -308,7 +327,6 @@ int main(int argc, char** argv) if ((argument[0]=='c') && (argument[1]=='1')) { cLevel=9; argument++; continue; } /* -c1 (high compression) */ if ((argument[0]=='h') && (argument[1]=='c')) { cLevel=9; argument++; continue; } /* -hc (high compression) */ if (*argument=='y') { LZ4IO_setOverwrite(1); continue; } /* -y (answer 'yes' to overwrite permission) */ - if (*argument=='s') { displayLevel=1; continue; } /* -s (silent mode) */ #endif /* ENABLE_LZ4C_LEGACY_OPTIONS */ if ((*argument>='0') && (*argument<='9')) @@ -353,7 +371,7 @@ int main(int argc, char** argv) case 'v': displayLevel=4; break; /* Quiet mode */ - case 'q': displayLevel--; break; + case 'q': if (displayLevel) displayLevel--; break; /* keep source file (default anyway, so useless) (for xz/lzma compatibility) */ case 'k': break; @@ -372,36 +390,42 @@ int main(int argc, char** argv) { int B = argument[1] - '0'; blockSize = LZ4IO_setBlockSizeID(B); - BMK_SetBlocksize(blockSize); + BMK_setBlocksize(blockSize); argument++; break; } case 'D': LZ4IO_setBlockMode(LZ4IO_blockLinked); argument++; break; - case 'X': LZ4IO_setBlockChecksumMode(1); argument ++; break; /* currently disables */ + case 'X': LZ4IO_setBlockChecksumMode(1); argument ++; break; /* currently disabled */ default : exitBlockProperties=1; } if (exitBlockProperties) break; } break; - /* Modify Stream properties */ - case 'S': if (argument[1]=='x') { LZ4IO_setStreamChecksumMode(0); argument++; break; } else { badusage(); } - /* Benchmark */ - case 'b': bench=1; break; + case 'b': bench=1; multiple_inputs=1; + if (inFileNames == NULL) + inFileNames = (const char**) malloc(argc * sizeof(char*)); + break; + + /* Treat non-option args as input files. See https://code.google.com/p/lz4/issues/detail?id=151 */ + case 'm': multiple_inputs=1; + if (inFileNames == NULL) + inFileNames = (const char**) malloc(argc * sizeof(char*)); + break; /* Modify Nb Iterations (benchmark only) */ case 'i': if ((argument[1] >='1') && (argument[1] <='9')) { int iters = argument[1] - '0'; - BMK_SetNbIterations(iters); + BMK_setNbIterations(iters); argument++; } break; /* Pause at the end (hidden option) */ - case 'p': main_pause=1; BMK_SetPause(); break; + case 'p': main_pause=1; BMK_setPause(); break; /* Specific commands for customized versions */ EXTENDED_ARGUMENTS; @@ -413,29 +437,36 @@ int main(int argc, char** argv) continue; } - /* first provided filename is input */ - if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } + /* Store in *inFileNames[] if -m is used. */ + if (multiple_inputs) { inFileNames[ifnIdx++]=argument; continue; } + + /* Store first non-option arg in input_filename to preserve original cli logic. */ + if (!input_filename) { input_filename=argument; continue; } - /* second provided filename is output */ + /* Second non-option arg in output_filename to preserve original cli logic. */ if (!output_filename) { output_filename=argument; if (!strcmp (output_filename, nullOutput)) output_filename = nulmark; continue; } + + /* 3rd non-option arg should not exist */ + DISPLAYLEVEL(1, "Warning : %s won't be used ! Do you want multiple input files (-m) ? \n", argument); } DISPLAYLEVEL(3, WELCOME_MESSAGE); if (!decode) DISPLAYLEVEL(4, "Blocks size : %i KB\n", blockSize>>10); /* No input filename ==> use stdin */ + if (multiple_inputs) input_filename = inFileNames[0], output_filename = (char*)(inFileNames[0]); if(!input_filename) { input_filename=stdinmark; } /* Check if input or output are defined as console; trigger an error in this case */ if (!strcmp(input_filename, stdinmark) && IS_CONSOLE(stdin) ) badusage(); /* Check if benchmark is selected */ - if (bench) return BMK_benchFile(argv+filenamesStart, argc-filenamesStart, cLevel); + if (bench) return BMK_benchFiles(inFileNames, ifnIdx, cLevel); /* No output filename ==> try to select one automatically (when possible) */ while (!output_filename) @@ -450,9 +481,9 @@ int main(int argc, char** argv) { size_t l = strlen(input_filename); dynNameSpace = (char*)calloc(1,l+5); + strcpy(dynNameSpace, input_filename); + strcat(dynNameSpace, LZ4_EXTENSION); output_filename = dynNameSpace; - strcpy(output_filename, input_filename); - strcpy(output_filename+l, LZ4_EXTENSION); DISPLAYLEVEL(2, "Compressed filename will be : %s \n", output_filename); break; } @@ -461,12 +492,12 @@ int main(int argc, char** argv) size_t outl; size_t inl = strlen(input_filename); dynNameSpace = (char*)calloc(1,inl+1); - output_filename = dynNameSpace; - strcpy(output_filename, input_filename); + strcpy(dynNameSpace, input_filename); outl = inl; if (inl>4) - while ((outl >= inl-4) && (input_filename[outl] == extension[outl-inl+4])) output_filename[outl--]=0; + while ((outl >= inl-4) && (input_filename[outl] == extension[outl-inl+4])) dynNameSpace[outl--]=0; if (outl != inl-5) { DISPLAYLEVEL(1, "Cannot determine an output filename\n"); badusage(); } + output_filename = dynNameSpace; DISPLAYLEVEL(2, "Decoding file %s \n", output_filename); } } @@ -491,11 +522,15 @@ int main(int argc, char** argv) } else { - DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel); + if (multiple_inputs) + LZ4IO_compressMultipleFilenames(inFileNames, ifnIdx, LZ4_EXTENSION, cLevel); + else + DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel); } } if (main_pause) waitEnter(); free(dynNameSpace); + free((void*)inFileNames); return 0; } diff --git a/programs/lz4io.c b/programs/lz4io.c index fa1f0f9..f5c5e98 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -1,6 +1,7 @@ /* LZ4io.c - LZ4 File/Stream Interface - Copyright (C) Yann Collet 2011-2014 + Copyright (C) Yann Collet 2011-2015 + GPL v2 License This program is free software; you can redistribute it and/or modify @@ -18,7 +19,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* @@ -31,56 +32,51 @@ /************************************** * Compiler Options -***************************************/ +**************************************/ #ifdef _MSC_VER /* Visual Studio */ # define _CRT_SECURE_NO_WARNINGS # define _CRT_SECURE_NO_DEPRECATE /* VS2005 */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -# pragma GCC diagnostic ignored "-Wmissing-field-initializers" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ -#endif - #define _LARGE_FILES /* Large file support on 32-bits AIX */ #define _FILE_OFFSET_BITS 64 /* Large file support on 32-bits unix */ -#define _POSIX_SOURCE 1 /* for fileno() within <stdio.h> on unix */ -/**************************** +/***************************** * Includes *****************************/ -#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */ -#include <stdlib.h> /* malloc, free */ -#include <string.h> /* strcmp, strlen */ -#include <time.h> /* clock */ +#include <stdio.h> /* fprintf, fopen, fread, stdin, stdout */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* strcmp, strlen */ +#include <time.h> /* clock */ +#include <sys/types.h> /* stat64 */ +#include <sys/stat.h> /* stat64 */ #include "lz4io.h" #include "lz4.h" /* still required for legacy format */ #include "lz4hc.h" /* still required for legacy format */ #include "lz4frame.h" -/**************************** +/****************************** * OS-specific Includes -*****************************/ +******************************/ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) # include <fcntl.h> /* _O_BINARY */ -# include <io.h> /* _setmode, _isatty */ -# ifdef __MINGW32__ - int _fileno(FILE *stream); /* MINGW somehow forgets to include this windows declaration into <stdio.h> */ -# endif +# include <io.h> /* _setmode, _fileno, _get_osfhandle */ # define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) -# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +# include <Windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } +# if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Avoid MSVC fseek()'s 2GiB barrier */ +# define fseek _fseeki64 +# endif #else -# include <unistd.h> /* isatty */ # define SET_BINARY_MODE(file) -# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +# define SET_SPARSE_FILE_MODE(file) #endif -/**************************** +/***************************** * Constants *****************************/ #define KB *(1 <<10) @@ -93,43 +89,46 @@ #define _4BITS 0x0F #define _8BITS 0xFF -#define MAGICNUMBER_SIZE 4 -#define LZ4S_MAGICNUMBER 0x184D2204 -#define LZ4S_SKIPPABLE0 0x184D2A50 -#define LZ4S_SKIPPABLEMASK 0xFFFFFFF0 -#define LEGACY_MAGICNUMBER 0x184C2102 +#define MAGICNUMBER_SIZE 4 +#define LZ4IO_MAGICNUMBER 0x184D2204 +#define LZ4IO_SKIPPABLE0 0x184D2A50 +#define LZ4IO_SKIPPABLEMASK 0xFFFFFFF0 +#define LEGACY_MAGICNUMBER 0x184C2102 #define CACHELINE 64 #define LEGACY_BLOCKSIZE (8 MB) #define MIN_STREAM_BUFSIZE (192 KB) -#define LZ4S_BLOCKSIZEID_DEFAULT 7 -#define LZ4S_CHECKSUM_SEED 0 -#define LZ4S_EOS 0 -#define LZ4S_MAXHEADERSIZE (MAGICNUMBER_SIZE+2+8+4+1) +#define LZ4IO_BLOCKSIZEID_DEFAULT 7 + +#define sizeT sizeof(size_t) +#define maskT (sizeT - 1) /************************************** * Macros -***************************************/ +**************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } -#define DISPLAYUPDATE(l, ...) if (displayLevel>=l) { \ - if ((LZ4IO_GetMilliSpan(g_time) > refreshRate) || (displayLevel>=4)) \ +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static int g_displayLevel = 0; /* 0 : no display ; 1: errors ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */ + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((LZ4IO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \ { g_time = clock(); DISPLAY(__VA_ARGS__); \ - if (displayLevel>=4) fflush(stdout); } } + if (g_displayLevel>=4) fflush(stdout); } } static const unsigned refreshRate = 150; static clock_t g_time = 0; /************************************** * Local Parameters -***************************************/ -static int displayLevel = 0; /* 0 : no display ; 1: errors ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */ -static int overwrite = 1; -static int globalBlockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; -static int blockChecksum = 0; -static int streamChecksum = 1; -static int blockIndependence = 1; +**************************************/ +static int g_overwrite = 1; +static int g_blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT; +static int g_blockChecksum = 0; +static int g_streamChecksum = 1; +static int g_blockIndependence = 1; +static int g_sparseFileSupport = 0; +static int g_contentSizeFlag = 0; static const int minBlockSizeID = 4; static const int maxBlockSizeID = 7; @@ -152,11 +151,10 @@ static const int maxBlockSizeID = 7; /************************************** * Version modifiers -***************************************/ +**************************************/ #define EXTENDED_ARGUMENTS #define EXTENDED_HELP #define EXTENDED_FORMAT -#define DEFAULT_COMPRESSOR compress_file #define DEFAULT_DECOMPRESSOR decodeLZ4S @@ -167,8 +165,8 @@ static const int maxBlockSizeID = 7; /* Default setting : overwrite = 1; return : overwrite mode (0/1) */ int LZ4IO_setOverwrite(int yes) { - overwrite = (yes!=0); - return overwrite; + g_overwrite = (yes!=0); + return g_overwrite; } /* blockSizeID : valid values : 4-5-6-7 */ @@ -176,35 +174,49 @@ int LZ4IO_setBlockSizeID(int bsid) { static const int blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB }; if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return -1; - globalBlockSizeId = bsid; - return blockSizeTable[globalBlockSizeId-minBlockSizeID]; + g_blockSizeId = bsid; + return blockSizeTable[g_blockSizeId-minBlockSizeID]; } int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode) { - blockIndependence = (blockMode == LZ4IO_blockIndependent); - return blockIndependence; + g_blockIndependence = (blockMode == LZ4IO_blockIndependent); + return g_blockIndependence; } /* Default setting : no checksum */ int LZ4IO_setBlockChecksumMode(int xxhash) { - blockChecksum = (xxhash != 0); - return blockChecksum; + g_blockChecksum = (xxhash != 0); + return g_blockChecksum; } /* Default setting : checksum enabled */ int LZ4IO_setStreamChecksumMode(int xxhash) { - streamChecksum = (xxhash != 0); - return streamChecksum; + g_streamChecksum = (xxhash != 0); + return g_streamChecksum; } /* Default setting : 0 (no notification) */ int LZ4IO_setNotificationLevel(int level) { - displayLevel = level; - return displayLevel; + g_displayLevel = level; + return g_displayLevel; +} + +/* Default setting : 0 (disabled) */ +int LZ4IO_setSparseFile(int enable) +{ + g_sparseFileSupport = (enable!=0); + return g_sparseFileSupport; +} + +/* Default setting : 0 (disabled) */ +int LZ4IO_setContentSize(int enable) +{ + g_contentSizeFlag = (enable!=0); + return g_contentSizeFlag; } static unsigned LZ4IO_GetMilliSpan(clock_t nPrevious) @@ -214,16 +226,30 @@ static unsigned LZ4IO_GetMilliSpan(clock_t nPrevious) return nSpan; } +static unsigned long long LZ4IO_GetFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); +#else + struct stat statbuf; + r = stat(infilename, &statbuf); +#endif + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (unsigned long long)statbuf.st_size; +} + -/* ************************************************************************ */ -/* ********************** LZ4 File / Pipe compression ********************* */ -/* ************************************************************************ */ +/* ************************************************************************ ** +** ********************** LZ4 File / Pipe compression ********************* ** +** ************************************************************************ */ -static int LZ4S_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } -static int LZ4S_isSkippableMagicNumber(unsigned int magic) { return (magic & LZ4S_SKIPPABLEMASK) == LZ4S_SKIPPABLE0; } +static int LZ4IO_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } +static int LZ4IO_isSkippableMagicNumber(unsigned int magic) { return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0; } -static int get_fileHandle(char* input_filename, char* output_filename, FILE** pfinput, FILE** pfoutput) +static int get_fileHandle(const char* input_filename, const char* output_filename, FILE** pfinput, FILE** pfoutput) { if (!strcmp (input_filename, stdinmark)) @@ -251,12 +277,12 @@ static int get_fileHandle(char* input_filename, char* output_filename, FILE** pf if (*pfoutput!=0) { fclose(*pfoutput); - if (!overwrite) + if (!g_overwrite) { char ch; DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename); DISPLAYLEVEL(2, "Overwrite ? (Y/N) : "); - if (displayLevel <= 1) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); /* No interaction possible */ + if (g_displayLevel <= 1) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); /* No interaction possible */ ch = (char)getchar(); if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); } @@ -272,15 +298,14 @@ static int get_fileHandle(char* input_filename, char* output_filename, FILE** pf - /*************************************** - * Legacy Compression - * *************************************/ +* Legacy Compression +***************************************/ /* unoptimized version; solves endianess & alignment issues */ static void LZ4IO_writeLE32 (void* p, unsigned value32) { - unsigned char* dstPtr = p; + unsigned char* dstPtr = (unsigned char*)p; dstPtr[0] = (unsigned char)value32; dstPtr[1] = (unsigned char)(value32 >> 8); dstPtr[2] = (unsigned char)(value32 >> 16); @@ -290,7 +315,7 @@ static void LZ4IO_writeLE32 (void* p, unsigned value32) /* LZ4IO_compressFilename_Legacy : * This function is intentionally "hidden" (not published in .h) * It generates compressed streams using the old 'legacy' format */ -int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, int compressionlevel) +int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel) { int (*compressionFunction)(const char*, char*, int); unsigned long long filesize = 0; @@ -308,7 +333,7 @@ int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, i if (compressionlevel < 3) compressionFunction = LZ4_compress; else compressionFunction = LZ4_compressHC; get_fileHandle(input_filename, output_filename, &finput, &foutput); - if ((displayLevel==2) && (compressionlevel==1)) displayLevel=3; + if ((g_displayLevel==2) && (compressionlevel==1)) g_displayLevel=3; /* Allocate Memory */ in_buff = (char*)malloc(LEGACY_BLOCKSIZE); @@ -360,11 +385,11 @@ int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, i } -/*********************************************** - * Compression using Frame format - * ********************************************/ +/********************************************* +* Compression using Frame format +*********************************************/ -int LZ4IO_compressFilename(char* input_filename, char* output_filename, int compressionLevel) +int LZ4IO_compressFilename(const char* input_filename, const char* output_filename, int compressionLevel) { unsigned long long filesize = 0; unsigned long long compressedfilesize = 0; @@ -377,23 +402,29 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp size_t sizeCheck, headerSize, readSize, outBuffSize; LZ4F_compressionContext_t ctx; LZ4F_errorCode_t errorCode; - LZ4F_preferences_t prefs = {0}; + LZ4F_preferences_t prefs; /* Init */ start = clock(); - if ((displayLevel==2) && (compressionLevel>=3)) displayLevel=3; + memset(&prefs, 0, sizeof(prefs)); + if ((g_displayLevel==2) && (compressionLevel>=3)) g_displayLevel=3; errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); if (LZ4F_isError(errorCode)) EXM_THROW(30, "Allocation error : can't create LZ4F context : %s", LZ4F_getErrorName(errorCode)); get_fileHandle(input_filename, output_filename, &finput, &foutput); - blockSize = LZ4S_GetBlockSize_FromBlockId (globalBlockSizeId); + blockSize = LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId); /* Set compression parameters */ prefs.autoFlush = 1; prefs.compressionLevel = compressionLevel; - prefs.frameInfo.blockMode = blockIndependence; - prefs.frameInfo.blockSizeID = globalBlockSizeId; - prefs.frameInfo.contentChecksumFlag = streamChecksum; + prefs.frameInfo.blockMode = (blockMode_t)g_blockIndependence; + prefs.frameInfo.blockSizeID = (blockSizeID_t)g_blockSizeId; + prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_streamChecksum; + if (g_contentSizeFlag) + { + unsigned long long fileSize = LZ4IO_GetFileSize(input_filename); + prefs.frameInfo.frameOSize = fileSize; /* == 0 if input == stdin */ + } /* Allocate Memory */ in_buff = (char*)malloc(blockSize); @@ -462,13 +493,34 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp } +#define FNSPACE 30 +int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel) +{ + int i; + char* outFileName = (char*)malloc(FNSPACE); + size_t ofnSize = FNSPACE; + const size_t suffixSize = strlen(suffix); + + for (i=0; i<ifntSize; i++) + { + size_t ifnSize = strlen(inFileNamesTable[i]); + if (ofnSize <= ifnSize+suffixSize+1) { free(outFileName); ofnSize = ifnSize + 20; outFileName = (char*)malloc(ofnSize); } + strcpy(outFileName, inFileNamesTable[i]); + strcat(outFileName, suffix); + LZ4IO_compressFilename(inFileNamesTable[i], outFileName, compressionlevel); + } + free(outFileName); + return 0; +} + + /* ********************************************************************* */ -/* ********************** LZ4 File / Stream decoding ******************* */ +/* ********************** LZ4 file-stream Decompression **************** */ /* ********************************************************************* */ static unsigned LZ4IO_readLE32 (const void* s) { - const unsigned char* srcPtr = s; + const unsigned char* srcPtr = (const unsigned char*)s; unsigned value32 = srcPtr[0]; value32 += (srcPtr[1]<<8); value32 += (srcPtr[2]<<16); @@ -506,15 +558,16 @@ static unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput) /* Read Block */ sizeCheck = fread(in_buff, 1, blockSize, finput); + if (sizeCheck!=blockSize) EXM_THROW(52, "Read error : cannot access compressed block !"); /* Decode Block */ decodeSize = LZ4_decompress_safe(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE); - if (decodeSize < 0) EXM_THROW(52, "Decoding Failed ! Corrupted input detected !"); + if (decodeSize < 0) EXM_THROW(53, "Decoding Failed ! Corrupted input detected !"); filesize += decodeSize; /* Write Block */ sizeCheck = fwrite(out_buff, 1, decodeSize, foutput); - if (sizeCheck != (size_t)decodeSize) EXM_THROW(53, "Write error : cannot write decoded block into output\n"); + if (sizeCheck != (size_t)decodeSize) EXM_THROW(54, "Write error : cannot write decoded block into output\n"); } /* Free */ @@ -528,57 +581,131 @@ static unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput) static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) { unsigned long long filesize = 0; - char* inBuff; - char* outBuff; + void* inBuff; + void* outBuff; # define HEADERMAX 20 char headerBuff[HEADERMAX]; - size_t sizeCheck, nextToRead, outBuffSize, inBuffSize; + size_t sizeCheck; + const size_t inBuffSize = 256 KB; + const size_t outBuffSize = 256 KB; LZ4F_decompressionContext_t ctx; LZ4F_errorCode_t errorCode; - LZ4F_frameInfo_t frameInfo; + unsigned storedSkips = 0; /* init */ errorCode = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION); - if (LZ4F_isError(errorCode)) EXM_THROW(60, "Allocation error : can't create context : %s", LZ4F_getErrorName(errorCode)); - LZ4IO_writeLE32(headerBuff, LZ4S_MAGICNUMBER); /* regenerated here, as it was already read from finput */ - - /* Decode stream descriptor */ - outBuffSize = 0; inBuffSize = 0; sizeCheck = MAGICNUMBER_SIZE; - nextToRead = LZ4F_decompress(ctx, NULL, &outBuffSize, headerBuff, &sizeCheck, NULL); - if (LZ4F_isError(nextToRead)) EXM_THROW(61, "Decompression error : %s", LZ4F_getErrorName(nextToRead)); - if (nextToRead > HEADERMAX) EXM_THROW(62, "Header too large (%i>%i)", (int)nextToRead, HEADERMAX); - sizeCheck = fread(headerBuff, 1, nextToRead, finput); - if (sizeCheck!=nextToRead) EXM_THROW(63, "Read error "); - nextToRead = LZ4F_decompress(ctx, NULL, &outBuffSize, headerBuff, &sizeCheck, NULL); - errorCode = LZ4F_getFrameInfo(ctx, &frameInfo, NULL, &inBuffSize); - if (LZ4F_isError(errorCode)) EXM_THROW(64, "can't decode frame header : %s", LZ4F_getErrorName(errorCode)); + if (LZ4F_isError(errorCode)) EXM_THROW(60, "Can't create context : %s", LZ4F_getErrorName(errorCode)); + LZ4IO_writeLE32(headerBuff, LZ4IO_MAGICNUMBER); /* regenerated here, as it was already read from finput */ /* Allocate Memory */ - outBuffSize = LZ4IO_setBlockSizeID(frameInfo.blockSizeID); - inBuffSize = outBuffSize + 4; - inBuff = (char*)malloc(inBuffSize); - outBuff = (char*)malloc(outBuffSize); - if (!inBuff || !outBuff) EXM_THROW(65, "Allocation error : not enough memory"); + inBuff = malloc(256 KB); + outBuff = malloc(256 KB); + if (!inBuff || !outBuff) EXM_THROW(61, "Allocation error : not enough memory"); + + /* Init feed with magic number (already consumed from FILE) */ + { + size_t inSize = 4; + size_t outSize=0; + LZ4IO_writeLE32(inBuff, LZ4IO_MAGICNUMBER); + errorCode = LZ4F_decompress(ctx, outBuff, &outSize, inBuff, &inSize, NULL); + if (LZ4F_isError(errorCode)) EXM_THROW(62, "Header error : %s", LZ4F_getErrorName(errorCode)); + } + /* Main Loop */ - while (nextToRead != 0) + for (;;) { - size_t decodedBytes = outBuffSize; + size_t readSize; + size_t pos = 0; - /* Read Block */ - sizeCheck = fread(inBuff, 1, nextToRead, finput); - if (sizeCheck!=nextToRead) EXM_THROW(66, "Read error "); + /* Read input */ + readSize = fread(inBuff, 1, inBuffSize, finput); + if (!readSize) break; /* empty file or stream */ - /* Decode Block */ - errorCode = LZ4F_decompress(ctx, outBuff, &decodedBytes, inBuff, &sizeCheck, NULL); - if (LZ4F_isError(errorCode)) EXM_THROW(67, "Decompression error : %s", LZ4F_getErrorName(errorCode)); - if (sizeCheck!=nextToRead) EXM_THROW(67, "Synchronization error"); - nextToRead = errorCode; - filesize += decodedBytes; + while (pos < readSize) + { + /* Decode Input (at least partially) */ + size_t remaining = readSize - pos; + size_t decodedBytes = outBuffSize; + errorCode = LZ4F_decompress(ctx, outBuff, &decodedBytes, (char*)inBuff+pos, &remaining, NULL); + if (LZ4F_isError(errorCode)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(errorCode)); + pos += remaining; + + if (decodedBytes) + { + /* Write Block */ + filesize += decodedBytes; + if (g_sparseFileSupport) + { + size_t* const oBuffStartT = (size_t*)outBuff; /* since outBuff is malloc'ed, it's aligned on size_t */ + size_t* oBuffPosT = oBuffStartT; + size_t oBuffSizeT = decodedBytes / sizeT; + size_t* const oBuffEndT = oBuffStartT + oBuffSizeT; + static const size_t bs0T = (32 KB) / sizeT; + while (oBuffPosT < oBuffEndT) + { + size_t seg0SizeT = bs0T; + size_t nb0T; + int seekResult; + if (seg0SizeT > oBuffSizeT) seg0SizeT = oBuffSizeT; + oBuffSizeT -= seg0SizeT; + for (nb0T=0; (nb0T < seg0SizeT) && (oBuffPosT[nb0T] == 0); nb0T++) ; + storedSkips += (unsigned)(nb0T * sizeT); + if (storedSkips > 1 GB) /* avoid int overflow */ + { + seekResult = fseek(foutput, 1 GB, SEEK_CUR); + if (seekResult != 0) EXM_THROW(68, "1 GB skip error (sparse file)"); + storedSkips -= 1 GB; + } + if (nb0T != seg0SizeT) /* not all 0s */ + { + seekResult = fseek(foutput, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(68, "Skip error (sparse file)"); + storedSkips = 0; + seg0SizeT -= nb0T; + oBuffPosT += nb0T; + sizeCheck = fwrite(oBuffPosT, sizeT, seg0SizeT, foutput); + if (sizeCheck != seg0SizeT) EXM_THROW(68, "Write error : cannot write decoded block"); + } + oBuffPosT += seg0SizeT; + } + if (decodedBytes & maskT) /* size not multiple of sizeT (necessarily end of block) */ + { + const char* const restStart = (char*)oBuffEndT; + const char* restPtr = restStart; + size_t restSize = decodedBytes & maskT; + const char* const restEnd = restStart + restSize; + for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; + storedSkips += (unsigned) (restPtr - restStart); + if (restPtr != restEnd) + { + int seekResult = fseek(foutput, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(68, "Skip error (end of block)"); + storedSkips = 0; + sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, foutput); + if (sizeCheck != (size_t)(restEnd - restPtr)) EXM_THROW(68, "Write error : cannot write decoded end of block"); + } + } + } + else + { + sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); + if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block"); + } + } + } - /* Write Block */ - sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); - if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block\n"); + } + + if ((g_sparseFileSupport) && (storedSkips>0)) + { + int seekResult; + storedSkips --; + seekResult = fseek(foutput, storedSkips, SEEK_CUR); + if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n"); + memset(outBuff, 0, 1); + sizeCheck = fwrite(outBuff, 1, 1, foutput); + if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n"); } /* Free */ @@ -591,6 +718,28 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) } +static unsigned long long LZ4IO_passThrough(FILE* finput, FILE* foutput, unsigned char U32store[MAGICNUMBER_SIZE]) +{ + void* buffer = malloc(64 KB); + size_t read = 1, sizeCheck; + unsigned long long total = MAGICNUMBER_SIZE; + + sizeCheck = fwrite(U32store, 1, MAGICNUMBER_SIZE, foutput); + if (sizeCheck != MAGICNUMBER_SIZE) EXM_THROW(50, "Pass-through error at start"); + + while (read) + { + read = fread(buffer, 1, 64 KB, finput); + total += read; + sizeCheck = fwrite(buffer, 1, read, foutput); + if (sizeCheck != read) EXM_THROW(50, "Pass-through error"); + } + + free(buffer); + return total; +} + + #define ENDOFSTREAM ((unsigned long long)-1) static unsigned long long selectDecoder( FILE* finput, FILE* foutput) { @@ -598,22 +747,26 @@ static unsigned long long selectDecoder( FILE* finput, FILE* foutput) unsigned magicNumber, size; int errorNb; size_t nbReadBytes; + static unsigned nbCalls = 0; + + /* init */ + nbCalls++; /* Check Archive Header */ nbReadBytes = fread(U32store, 1, MAGICNUMBER_SIZE, finput); if (nbReadBytes==0) return ENDOFSTREAM; /* EOF */ if (nbReadBytes != MAGICNUMBER_SIZE) EXM_THROW(40, "Unrecognized header : Magic Number unreadable"); magicNumber = LZ4IO_readLE32(U32store); /* Little Endian format */ - if (LZ4S_isSkippableMagicNumber(magicNumber)) magicNumber = LZ4S_SKIPPABLE0; /* fold skippable magic numbers */ + if (LZ4IO_isSkippableMagicNumber(magicNumber)) magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */ switch(magicNumber) { - case LZ4S_MAGICNUMBER: + case LZ4IO_MAGICNUMBER: return DEFAULT_DECOMPRESSOR(finput, foutput); case LEGACY_MAGICNUMBER: DISPLAYLEVEL(4, "Detected : Legacy format \n"); return decodeLegacyStream(finput, foutput); - case LZ4S_SKIPPABLE0: + case LZ4IO_SKIPPABLE0: DISPLAYLEVEL(4, "Skipping detected skippable area \n"); nbReadBytes = fread(U32store, 1, 4, finput); if (nbReadBytes != 4) EXM_THROW(42, "Stream error : skippable size unreadable"); @@ -623,14 +776,19 @@ static unsigned long long selectDecoder( FILE* finput, FILE* foutput) return selectDecoder(finput, foutput); EXTENDED_FORMAT; default: - if (ftell(finput) == MAGICNUMBER_SIZE) EXM_THROW(44,"Unrecognized header : file cannot be decoded"); /* Wrong magic number at the beginning of 1st stream */ + if (nbCalls == 1) /* just started */ + { + if (g_overwrite) + return LZ4IO_passThrough(finput, foutput, U32store); + EXM_THROW(44,"Unrecognized header : file cannot be decoded"); /* Wrong magic number at the beginning of 1st stream */ + } DISPLAYLEVEL(2, "Stream followed by unrecognized data\n"); return ENDOFSTREAM; } } -int LZ4IO_decompressFilename(char* input_filename, char* output_filename) +int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename) { unsigned long long filesize = 0, decodedSize=0; FILE* finput; @@ -642,6 +800,9 @@ int LZ4IO_decompressFilename(char* input_filename, char* output_filename) start = clock(); get_fileHandle(input_filename, output_filename, &finput, &foutput); + /* sparse file */ + if (g_sparseFileSupport && foutput) { SET_SPARSE_FILE_MODE(foutput); } + /* Loop over multiple streams */ do { diff --git a/programs/lz4io.h b/programs/lz4io.h index 7869a43..75a36e1 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -1,6 +1,6 @@ /* LZ4io.h - LZ4 File/Stream Interface - Copyright (C) Yann Collet 2011-2014 + Copyright (C) Yann Collet 2011-2015 GPL v2 License This program is free software; you can redistribute it and/or modify @@ -18,7 +18,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* @@ -29,17 +29,18 @@ - The license of this source file is GPLv2. */ +#pragma once /* ************************************************** */ /* Special input/output values */ /* ************************************************** */ #define NULL_OUTPUT "null" -static char stdinmark[] = "stdin"; -static char stdoutmark[] = "stdout"; +static char const stdinmark[] = "stdin"; +static char const stdoutmark[] = "stdout"; #ifdef _WIN32 -static char nulmark[] = "nul"; +static char const nulmark[] = "nul"; #else -static char nulmark[] = "/dev/null"; +static char const nulmark[] = "/dev/null"; #endif @@ -47,8 +48,10 @@ static char nulmark[] = "/dev/null"; /* ****************** Functions ********************* */ /* ************************************************** */ -int LZ4IO_compressFilename (char* input_filename, char* output_filename, int compressionlevel); -int LZ4IO_decompressFilename(char* input_filename, char* output_filename); +int LZ4IO_compressFilename (const char* input_filename, const char* output_filename, int compressionlevel); +int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename); + +int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel); /* ************************************************** */ @@ -67,11 +70,17 @@ int LZ4IO_setBlockSizeID(int blockSizeID); typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t; int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode); -/* Default setting : no checksum */ +/* Default setting : no block checksum */ int LZ4IO_setBlockChecksumMode(int xxhash); -/* Default setting : checksum enabled */ +/* Default setting : stream checksum enabled */ int LZ4IO_setStreamChecksumMode(int xxhash); /* Default setting : 0 (no notification) */ int LZ4IO_setNotificationLevel(int level); + +/* Default setting : 0 (disabled) */ +int LZ4IO_setSparseFile(int enable); + +/* Default setting : 0 (disabled) */ +int LZ4IO_setContentSize(int enable); |