diff options
author | Przemyslaw Skibinski <inikep@gmail.com> | 2016-11-06 10:11:55 (GMT) |
---|---|---|
committer | Przemyslaw Skibinski <inikep@gmail.com> | 2016-11-06 10:11:55 (GMT) |
commit | 112499a73a0f367677d8df486a30007ca94101b1 (patch) | |
tree | 6057917cbd5ac8599b22736ef2db83ad48ec9654 | |
parent | fbede33fd7f98f62d89031b4ee29cffdc90cceb8 (diff) | |
parent | 58d7a619a599e2a4ee94ab4406f9bb490d0ee84d (diff) | |
download | lz4-112499a73a0f367677d8df486a30007ca94101b1.zip lz4-112499a73a0f367677d8df486a30007ca94101b1.tar.gz lz4-112499a73a0f367677d8df486a30007ca94101b1.tar.bz2 |
Merge remote-tracking branch 'refs/remotes/lz4/dev' into dev
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | lib/lz4.c | 25 | ||||
-rw-r--r-- | lib/lz4frame.h | 35 | ||||
-rw-r--r-- | lib/lz4frame_static.h | 2 | ||||
-rw-r--r-- | programs/README.md | 12 | ||||
-rw-r--r-- | programs/lz4.1 | 35 | ||||
-rw-r--r-- | programs/lz4cli.c | 4 | ||||
-rw-r--r-- | tests/Makefile | 13 | ||||
-rw-r--r-- | tests/fasttest.c | 138 |
9 files changed, 226 insertions, 43 deletions
@@ -1,10 +1,13 @@ v1.7.2 Changed : moved to versioning; package, cli and library have same version number -Improved: Small decompression speed boost (+4%) +Improved: Small decompression speed boost +Improved: Small compression speed improvement on 64-bits systems Improved: Performance on ARMv6 and ARMv7 Added : Debianization, by Evgeniy Polyakov Makefile: Generates object files (*.o) for faster (re)compilation on low power systems Fix : cli : crash on some invalid inputs +Fix : cli : -t correctly validates lz4-compressed files, by Nick Terrell +Fix : better ratio on 64-bits big-endian targets r131 New : Dos/DJGPP target, thanks to Louis Santillan (#114) @@ -415,27 +415,32 @@ int LZ4_sizeofState() { return LZ4_STREAMSIZE; } static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType) { if (tableType == byU16) - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); else - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); } -static const U64 prime5bytes = 889523592379ULL; -static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType) +static U32 LZ4_hashSequence64(U64 sequence, tableType_t const tableType) { + static const U64 prime5bytes = 889523592379ULL; + static const U64 prime8bytes = 11400714785074694791ULL; const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; - const U32 hashMask = (1<<hashLog) - 1; - return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask; + if (LZ4_isLittleEndian()) + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + else + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); } static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType) { - if (LZ4_64bits()) - return LZ4_hashSequence64(sequence, tableType); + if (LZ4_64bits()) return LZ4_hashSequence64(sequence, tableType); return LZ4_hashSequence((U32)sequence, tableType); } -static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); } +static U32 LZ4_hashPosition(const void* p, tableType_t tableType) +{ + return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); +} static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) { @@ -1361,7 +1366,7 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch lz4sd->prefixEnd += originalSize; } else { lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; result = LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 63abc60..63d61ad 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -45,13 +45,11 @@ extern "C" { #endif -/*-************************************ -* Dependency -**************************************/ +/* --- Dependency --- */ #include <stddef.h> /* size_t */ /*-*************************************************************** -* Export parameters +* Compiler specifics *****************************************************************/ /*! * LZ4_DLL_EXPORT : @@ -67,6 +65,15 @@ extern "C" { # define LZ4FLIB_API #endif +#if defined(_MSC_VER) +# define LZ4F_DEPRECATE(x) __declspec(deprecated) x +#elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6)) +# define LZ4F_DEPRECATE(x) x __attribute__((deprecated)) +#else +# define LZ4F_DEPRECATE(x) x /* no deprecation warning for this compiler */ +#endif + + /*-************************************ * Error management **************************************/ @@ -81,7 +88,7 @@ LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /* return er **************************************/ /* #define LZ4F_DISABLE_OBSOLETE_ENUMS */ /* uncomment to disable obsolete enums */ #ifndef LZ4F_DISABLE_OBSOLETE_ENUMS -# define LZ4F_OBSOLETE_ENUM(x) ,x +# define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x #else # define LZ4F_OBSOLETE_ENUM(x) #endif @@ -92,30 +99,30 @@ typedef enum { LZ4F_max256KB=5, LZ4F_max1MB=6, LZ4F_max4MB=7 - LZ4F_OBSOLETE_ENUM(max64KB = LZ4F_max64KB) - LZ4F_OBSOLETE_ENUM(max256KB = LZ4F_max256KB) - LZ4F_OBSOLETE_ENUM(max1MB = LZ4F_max1MB) - LZ4F_OBSOLETE_ENUM(max4MB = LZ4F_max4MB) + LZ4F_OBSOLETE_ENUM(max64KB) + LZ4F_OBSOLETE_ENUM(max256KB) + LZ4F_OBSOLETE_ENUM(max1MB) + LZ4F_OBSOLETE_ENUM(max4MB) } LZ4F_blockSizeID_t; typedef enum { LZ4F_blockLinked=0, LZ4F_blockIndependent - LZ4F_OBSOLETE_ENUM(blockLinked = LZ4F_blockLinked) - LZ4F_OBSOLETE_ENUM(blockIndependent = LZ4F_blockIndependent) + LZ4F_OBSOLETE_ENUM(blockLinked) + LZ4F_OBSOLETE_ENUM(blockIndependent) } LZ4F_blockMode_t; typedef enum { LZ4F_noContentChecksum=0, LZ4F_contentChecksumEnabled - LZ4F_OBSOLETE_ENUM(noContentChecksum = LZ4F_noContentChecksum) - LZ4F_OBSOLETE_ENUM(contentChecksumEnabled = LZ4F_contentChecksumEnabled) + LZ4F_OBSOLETE_ENUM(noContentChecksum) + LZ4F_OBSOLETE_ENUM(contentChecksumEnabled) } LZ4F_contentChecksum_t; typedef enum { LZ4F_frame=0, LZ4F_skippableFrame - LZ4F_OBSOLETE_ENUM(skippableFrame = LZ4F_skippableFrame) + LZ4F_OBSOLETE_ENUM(skippableFrame) } LZ4F_frameType_t; #ifndef LZ4F_DISABLE_OBSOLETE_ENUMS diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h index fab3def..0c154a3 100644 --- a/lib/lz4frame_static.h +++ b/lib/lz4frame_static.h @@ -64,7 +64,7 @@ extern "C" { ITEM(ERROR_headerChecksum_invalid) ITEM(ERROR_contentChecksum_invalid) \ ITEM(ERROR_maxCode) -//#define LZ4F_DISABLE_OLD_ENUMS /* uncomment to disable deprecated enums */ +#define LZ4F_DISABLE_OLD_ENUMS /* comment to enable deprecated enums */ #ifndef LZ4F_DISABLE_OLD_ENUMS # define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM, ENUM = LZ4F_##ENUM, #else diff --git a/programs/README.md b/programs/README.md index 7a5d2e2..b67408f 100644 --- a/programs/README.md +++ b/programs/README.md @@ -2,20 +2,20 @@ Command Line Interface for LZ4 library ============================================ Command Line Interface (CLI) can be created using the `make` command without any additional parameters. -There are however other Makefile targets that create different variations of CLI: -- `lz4` : default CLI supporting gzip-like arguments -- `lz4c` : Same as `lz4` with additional support for decompression of legacy lz4 versions +There are also multiple targets that create different variations of CLI: +- `lz4` : default CLI, with a command line syntax close to gzip +- `lz4c` : Same as `lz4` with additional support legacy lz4 commands (incompatible with gzip) - `lz4c32` : Same as `lz4c`, but forced to compile in 32-bits mode #### Aggregation of parameters -CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`. +CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`. #### Benchmark in Command Line Interface CLI includes in-memory compression benchmark module for lz4. -The benchmark is conducted using a given filename. +The benchmark is conducted using a given filename. The file is read into memory. It makes benchmark more precise as it eliminates I/O overhead. @@ -26,7 +26,7 @@ The `-i` parameter selects a number of iterations used for each of tested levels #### Usage of Command Line Interface -The full list of options can be obtained with `-h` or `-H` parameter: +The full list of commands can be obtained with `-h` or `-H` parameter: ``` Usage : lz4 [arg] [input] [output] diff --git a/programs/lz4.1 b/programs/lz4.1 index 51a37f1..529d36a 100644 --- a/programs/lz4.1 +++ b/programs/lz4.1 @@ -50,12 +50,16 @@ The native file format is the format. .B lz4 -supports a command line syntax similar but not identical to +supports a command line syntax similar \fIbut not identical\fR to .BR gzip (1). Differences are : -\fBlz4\fR preserve original files ; -\fBlz4 file1 file2\fR means : compress file1 \fIinto\fR file2 ; -\fBlz4 file\fR shows real-time statistics during compression . + \fBlz4\fR preserves original files + \fBlz4\fR compresses a single file by default (use \fB-m\fR for multiple files) + \fBlz4 file1 file2\fR means : compress file1 \fIinto\fR file2 + When no destination name is provided, compressed file name receives a \fB.lz4\fR suffix + When no destination name is provided, if \fBstdout\fR is \fInot\fR the console, it becomes the output (like a silent \fB-c\fR) + Therefore \fBlz4 file > /dev/null\fR will not create \fBfile.lz4\fR + \fBlz4 file\fR shows real-time statistics during compression (use \fB-q\fR to silent them) Default behaviors can be modified by opt-in commands, described below. \fBlz4 --quiet --multiple\fR more closely mimics \fBgzip\fR behavior. @@ -121,7 +125,7 @@ Decompress. .B --decompress is also the default operation when the input filename has an .B .lz4 -extensionq +extension. .TP .BR \-t ", " \-\-test Test the integrity of compressed @@ -129,6 +133,11 @@ Test the integrity of compressed files. The decompressed data is discarded. No files are created nor removed. + +.TP +.BR \-b# +Benchmark mode, using # compression level. + . .SS "Operation modifiers" .TP @@ -215,12 +224,22 @@ hence for a file. It won't work with unknown source size, such as stdin or pipe. .BR \-k ", " \--keep Don't delete source file. This is default behavior anyway, so this option is just for compatibility with gzip/xz. + +. +.SS "Benchmark mode" +.TP +.B \-b# + benchmark file(s), using # compression level .TP -.B \-b - benchmark file(s) +.B \-e# + benchmark multiple compression levels, from b# to e# (included) .TP .B \-i# - iteration loops [1-9](default : 3), benchmark mode only + minimum evaluation in seconds [1-9] (default : 3) +.TP +.B \-r + operate recursively on directories + .SH BUGS Report bugs at: https://github.com/Cyan4973/lz4/issues diff --git a/programs/lz4cli.c b/programs/lz4cli.c index b707795..e49bbcb 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -569,7 +569,7 @@ int main(int argc, const char** argv) if (multiple_inputs) operationResult = LZ4IO_decompressMultipleFilenames(inFileNames, ifnIdx, LZ4_EXTENSION); else - DEFAULT_DECOMPRESSOR(input_filename, output_filename); + operationResult = DEFAULT_DECOMPRESSOR(input_filename, output_filename); } else { /* compression is default action */ if (legacy_format) { @@ -579,7 +579,7 @@ int main(int argc, const char** argv) if (multiple_inputs) operationResult = LZ4IO_compressMultipleFilenames(inFileNames, ifnIdx, LZ4_EXTENSION, cLevel); else - DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel); + operationResult = DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel); } } diff --git a/tests/Makefile b/tests/Makefile index f8d7015..2da6408 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -97,6 +97,9 @@ frametest: $(LZ4DIR)/lz4frame.o $(LZ4DIR)/lz4.o $(LZ4DIR)/lz4hc.o $(LZ4DIR)/xxha frametest32: $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/xxhash.c frametest.c $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) +fasttest: $(LZ4DIR)/lz4.o fasttest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + datagen : $(PRGDIR)/datagen.c datagencli.c $(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT) @@ -119,7 +122,7 @@ versionsTest: #FreeBSD targets ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD)) -test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-mem +test: test-lz4 test-lz4c test-fasttest test-frametest test-fullbench test-fuzzer test-mem test32: test-lz4c32 test-frametest32 test-fullbench32 test-fuzzer32 test-mem32 @@ -220,6 +223,11 @@ test-lz4: lz4 datagen test-lz4-basic test-lz4-multiple test-lz4-sparse test-lz4- ./datagen | $(PRGDIR)/lz4 -tf && false || true ./datagen | $(PRGDIR)/lz4 -d > $(VOID) && false || true ./datagen | $(PRGDIR)/lz4 -df > $(VOID) + @echo "\n ---- test cli ----" + $(PRGDIR)/lz4 file-does-not-exist && false || true + $(PRGDIR)/lz4 -f file-does-not-exist && false || true + $(PRGDIR)/lz4 -fm file1-dne file2-dne && false || true + $(PRGDIR)/lz4 -fm file1-dne file2-dne && false || true test-lz4c: lz4c datagen @echo "\n ---- test lz4c version ----" @@ -262,6 +270,9 @@ test-frametest: frametest test-frametest32: frametest32 ./frametest32 $(FUZZER_TIME) +test-fasttest: fasttest + ./fasttest + test-mem: lz4 datagen fuzzer frametest fullbench @echo "\n ---- valgrind tests : memory analyzer ----" valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID) diff --git a/tests/fasttest.c b/tests/fasttest.c new file mode 100644 index 0000000..a405542 --- /dev/null +++ b/tests/fasttest.c @@ -0,0 +1,138 @@ +/************************************** + * Compiler Options + **************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS // for MSVC +# define snprintf sprintf_s +#endif +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ +#endif + + +/************************************** + * Includes + **************************************/ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include "lz4.h" + + +/* Returns non-zero on failure. */ +int test_compress(const char *input, int inSize, char *output, int outSize) +{ + LZ4_stream_t lz4Stream_body = { 0 }; + LZ4_stream_t* lz4Stream = &lz4Stream_body; + + int inOffset = 0; + int outOffset = 0; + + if (inSize & 3) return -1; + + while (inOffset < inSize) { + const int length = inSize >> 2; + if (inSize > 1024) return -2; + if (outSize - (outOffset + 8) < LZ4_compressBound(length)) return -3; + { + const int outBytes = LZ4_compress_continue( + lz4Stream, input + inOffset, output + outOffset + 8, length); + if(outBytes <= 0) return -4; + memcpy(output + outOffset, &length, 4); /* input length */ + memcpy(output + outOffset + 4, &outBytes, 4); /* output length */ + inOffset += length; + outOffset += outBytes + 8; + } + } + if (outOffset + 8 > outSize) return -5; + memset(output + outOffset, 0, 4); + memset(output + outOffset + 4, 0, 4); + return 0; +} + +void swap(void **a, void **b) { + void *tmp = *a; + *a = *b; + *b = tmp; +} + +/* Returns non-zero on failure. Not a safe function. */ +int test_decompress(const char *uncompressed, const char *compressed) +{ + char outBufferA[1024]; + char spacing; /* So prefixEnd != dest */ + char outBufferB[1024]; + char *output = outBufferA; + char *lastOutput = outBufferB; + LZ4_streamDecode_t lz4StreamDecode_body = { 0 }; + LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body; + int offset = 0; + int unOffset = 0; + int lastBytes = 0; + + (void)spacing; + + for(;;) { + int32_t bytes; + int32_t unBytes; + /* Read uncompressed size and compressed size */ + memcpy(&unBytes, compressed + offset, 4); + memcpy(&bytes, compressed + offset + 4, 4); + offset += 8; + /* Check if we reached end of stream or error */ + if(bytes == 0 && unBytes == 0) return 0; + if(bytes <= 0 || unBytes <= 0 || unBytes > 1024) return 1; + + /* Put the last output in the dictionary */ + LZ4_setStreamDecode(lz4StreamDecode, lastOutput, lastBytes); + /* Decompress */ + bytes = LZ4_decompress_fast_continue( + lz4StreamDecode, compressed + offset, output, unBytes); + if(bytes <= 0) return 2; + /* Check result */ + { + int r = memcmp(uncompressed + unOffset, output, unBytes); + if (r) return 3; + } + swap((void**)&output, (void**)&lastOutput); + offset += bytes; + unOffset += unBytes; + lastBytes = unBytes; + } +} + + +int main(int argc, char **argv) +{ + char input[] = + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello!" + "Hello Hello Hello Hello Hello Hello Hello Hello"; + char output[LZ4_COMPRESSBOUND(4096)]; + int r; + + (void)argc; + (void)argv; + + if ((r = test_compress(input, sizeof(input), output, sizeof(output)))) { + return r; + } + if ((r = test_decompress(input, output))) { + return r; + } + return 0; +} |