summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>2013-04-18 08:53:13 (GMT)
committeryann.collet.73@gmail.com <yann.collet.73@gmail.com@650e7d94-2a16-8b24-b05c-7c0b3f6821cd>2013-04-18 08:53:13 (GMT)
commita2e93db805918a84908373a579c44732d3e6854e (patch)
tree28486565469e1a4f4d92c5304819916969091fb5
parentcbfd031d301222123d185320a55a923f9363f781 (diff)
downloadlz4-a2e93db805918a84908373a579c44732d3e6854e.zip
lz4-a2e93db805918a84908373a579c44732d3e6854e.tar.gz
lz4-a2e93db805918a84908373a579c44732d3e6854e.tar.bz2
Added : function LZ4_compressHC_limitedOutput()
Updated : LZ4 Streaming Format.odt, to version 1.4 New : LZ4c now supports Stream Checksum (default) and Skippable chunks Updated : Fuzzer, testing LZ4_compressHC_limitedOutput() git-svn-id: https://lz4.googlecode.com/svn/trunk@93 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
-rw-r--r--LZ4 Streaming Format.odtbin80344 -> 90196 bytes
-rw-r--r--Makefile2
-rw-r--r--fuzzer.c24
-rw-r--r--lz4.c18
-rw-r--r--lz4.h10
-rw-r--r--lz4c.c103
-rw-r--r--lz4hc.c40
-rw-r--r--lz4hc.h16
-rw-r--r--xxhash.c4
-rw-r--r--xxhash.h2
10 files changed, 153 insertions, 66 deletions
diff --git a/LZ4 Streaming Format.odt b/LZ4 Streaming Format.odt
index 62aeecb..614eb71 100644
--- a/LZ4 Streaming Format.odt
+++ b/LZ4 Streaming Format.odt
Binary files differ
diff --git a/Makefile b/Makefile
index 745888b..ea60d11 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ lz4c: lz4.c lz4hc.c bench.c xxhash.c lz4c.c
lz4c32: lz4.c lz4hc.c bench.c xxhash.c lz4c.c
$(CC) -m32 -Os -march=native $(CFLAGS) $^ -o $@$(EXT)
-fuzzer : lz4.c fuzzer.c
+fuzzer : lz4.c lz4hc.c fuzzer.c
$(CC) -O3 $(CFLAGS) $^ -o $@$(EXT)
clean:
diff --git a/fuzzer.c b/fuzzer.c
index 211c805..180f9e9 100644
--- a/fuzzer.c
+++ b/fuzzer.c
@@ -42,7 +42,7 @@
//**************************************
// Constants
//**************************************
-#define NB_ATTEMPTS (1<<18)
+#define NB_ATTEMPTS (1<<17)
#define LEN ((1<<15))
#define SEQ_POW 2
#define NUM_SEQ (1 << SEQ_POW)
@@ -99,7 +99,7 @@ int FUZ_SecurityTest()
char* input;
int i, r;
- printf("Starting security tests (issue 52)...");
+ printf("Starting overflow tests (issue 52)...");
input = (char*) malloc (20<<20);
output = (char*) malloc (20<<20);
input[0] = 0x0F;
@@ -111,7 +111,7 @@ int FUZ_SecurityTest()
free(input);
free(output);
- printf(" Completed (return = %i < 0)\n",r);
+ printf(" Passed (return = %i < 0)\n",r);
return 0;
}
@@ -120,6 +120,7 @@ int FUZ_SecurityTest()
int main() {
unsigned long long bytes = 0;
unsigned long long cbytes = 0;
+ unsigned long long hcbytes = 0;
unsigned char buf[LEN];
unsigned char testOut[LEN+1];
# define FUZ_max LZ4_COMPRESSBOUND(LEN)
@@ -127,7 +128,7 @@ int main() {
const int off_full = FUZ_avail - FUZ_max;
unsigned char cbuf[FUZ_avail + PAGE_SIZE];
unsigned int seed, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart();
- int i, j, k, ret, len;
+ int i, j, k, ret, len, lenHC;
char userInput[30] = {0};
printf("starting LZ4 fuzzer\n");
@@ -166,6 +167,11 @@ int main() {
buf[j] = FUZ_rand(&cur_seq) >> 16;
}
+ // Test compression HC
+ ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max);
+ if (ret == 0) { printf("HC compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; }
+ lenHC = ret;
+
// Test compression
ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max);
if (ret == 0) { printf("compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; }
@@ -208,17 +214,27 @@ int main() {
if (!test_canary(&cbuf[FUZ_avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d\n", seed, LEN, len); goto _output_error; }
if (ret == 0) { printf("compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; }
+ // Test HC compression with output size being exactly what's necessary (should work)
+ ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, lenHC);
+ if (ret == 0) { printf("HC compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; }
+
// Test compression with just one missing byte into output buffer => must fail
ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, len-1);
if (ret) { printf("compression overran output buffer: seed %u, len %d, olen %d => ret %d", seed, LEN, len-1, ret); goto _output_error; }
if (!test_canary(&cbuf[FUZ_avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d", seed, LEN, len-1); goto _output_error; }
+ // Test HC compression with just one missing byte into output buffer => must fail
+ ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, lenHC-1);
+ if (ret) { printf("HC compression overran output buffer: seed %u, len %d, olen %d => ret %d", seed, LEN, lenHC-1, ret); goto _output_error; }
+
bytes += LEN;
cbytes += len;
+ hcbytes += lenHC;
}
printf("all tests completed successfully \n");
printf("compression ratio: %0.3f%%\n", (double)cbytes/bytes*100);
+ printf("HC compression ratio: %0.3f%%\n", (double)hcbytes/bytes*100);
getchar();
return 0;
diff --git a/lz4.c b/lz4.c
index 2660f5d..2fac1a6 100644
--- a/lz4.c
+++ b/lz4.c
@@ -181,16 +181,10 @@ typedef struct _U64_S { U64 v; } U64_S;
#define HASHTABLESIZE (1 << HASH_LOG)
#define HASH_MASK (HASHTABLESIZE - 1)
-// NOTCOMPRESSIBLE_DETECTIONLEVEL :
-// Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
-// This may decrease compression ratio dramatically, but will be faster on incompressible data
-// Increasing this value will make the algorithm search more before declaring a segment "incompressible"
-// This could improve compression a bit, but will be slower on incompressible data
-// The default value (6) is recommended
-#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
+#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 // Increasing this value will make the algorithm slower on incompressible data
#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
#define STACKLIMIT 13
-#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
+#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
#define COPYLENGTH 8
#define LASTLITERALS 5
#define MFLIMIT (COPYLENGTH+MINMATCH)
@@ -338,7 +332,7 @@ static inline int LZ4_NbCommonBytes (register U32 val)
static inline int LZ4_compressCtx(void** ctx,
const char* source,
char* dest,
- int isize,
+ int inputSize,
int maxOutputSize)
{
#if HEAPMODE
@@ -351,7 +345,7 @@ static inline int LZ4_compressCtx(void** ctx,
const BYTE* ip = (BYTE*) source;
INITBASE(base);
const BYTE* anchor = ip;
- const BYTE* const iend = ip + isize;
+ const BYTE* const iend = ip + inputSize;
const BYTE* const mflimit = iend - MFLIMIT;
#define matchlimit (iend - LASTLITERALS)
@@ -364,7 +358,7 @@ static inline int LZ4_compressCtx(void** ctx,
// Init
- if (isize<MINLENGTH) goto _last_literals;
+ if (inputSize<MINLENGTH) goto _last_literals;
#if HEAPMODE
if (*ctx == NULL)
{
@@ -496,7 +490,7 @@ _last_literals:
// Encode Last Literals
{
int lastRun = (int)(iend - anchor);
- if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
else *op++ = (BYTE)(lastRun<<ML_BITS);
memcpy(op, anchor, iend - anchor);
diff --git a/lz4.h b/lz4.h
index 2785444..0e6a60c 100644
--- a/lz4.h
+++ b/lz4.h
@@ -50,21 +50,21 @@ extern "C" {
// Simple Functions
//****************************
-int LZ4_compress (const char* source, char* dest, int input_size);
-int LZ4_uncompress (const char* source, char* dest, int output_size);
+int LZ4_compress (const char* source, char* dest, int inputSize);
+int LZ4_uncompress (const char* source, char* dest, int outputSize);
/*
LZ4_compress() :
- Compresses 'input_size' bytes from 'source' into 'dest'.
+ Compresses 'inputSize' bytes from 'source' into 'dest'.
Destination buffer must be already allocated,
and must be sized to handle worst cases situations (input data not compressible)
Worst case size evaluation is provided by function LZ4_compressBound()
- input_size : Max supported value is ~1.9GB
+ inputSize : Max supported value is ~1.9GB
return : the number of bytes written in buffer dest
or 0 if the compression fails
LZ4_uncompress() :
- output_size : is the original (uncompressed) size
+ outputSize : is the original (uncompressed) size
return : the number of bytes read in the source buffer (in other words, the compressed size)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction.
note : This function never writes outside of provided buffers, and never modifies input buffer.
diff --git a/lz4c.c b/lz4c.c
index ebbb357..85c074a 100644
--- a/lz4c.c
+++ b/lz4c.c
@@ -92,6 +92,9 @@
#define LZ4S_BLOCKSIZEID_DEFAULT 7
#define LZ4S_CHECKSUM_SEED 0
+#define LZ4S_SKIPPABLE0 0x184D2A50
+#define LZ4S_SKIPPABLEMASK 0xFFFFFFF0
+
#define LEGACY_MAGICNUMBER 0x184C2102
#define LEGACY_BLOCKSIZE (8<<20) // 8 MB
@@ -134,8 +137,8 @@ char nulmark[] = "/dev/null";
//**************************************
static int overwrite = 0;
static int blockSizeId = LZ4S_BLOCKSIZEID_DEFAULT;
-static int blockChecksum = 1;
-
+static int blockChecksum = 0;
+static int streamChecksum = 1;
//**************************************
// Exceptions
@@ -163,19 +166,26 @@ int usage(char* exename)
DISPLAY( "Arguments :\n");
DISPLAY( " -c0/-c : Fast compression (default) \n");
DISPLAY( " -c1/-hc: High compression \n");
- DISPLAY( " -B# : Block size [4-7](default:7)\n");
- DISPLAY( " -x : disable checksum\n");
- DISPLAY( " -y : overwrite without prompting \n");
DISPLAY( " -d : decompression \n");
- DISPLAY( " -t : check compressed file \n");
- DISPLAY( " -b# : benchmark files, using # [0-1] compression level\n");
- DISPLAY( " -i# : nb of loops [1-9](default : 3), benchmark mode only\n");
- DISPLAY( " -H : Help (this text)\n");
+ DISPLAY( " -y : overwrite without prompting \n");
+ DISPLAY( " -H : Help (this text + advanced options)\n");
DISPLAY( "input : can be 'stdin' (pipe) or a filename\n");
DISPLAY( "output : can be 'stdout'(pipe) or a filename or 'null'\n");
return 0;
}
+int usage_advanced()
+{
+ DISPLAY( "\nAdvanced options :\n");
+ DISPLAY( " -t : test compressed file \n");
+ DISPLAY( " -B# : Block size [4-7](default : 7)\n");
+ DISPLAY( " -x : enable block checksum (default:disabled)\n");
+ DISPLAY( " -nx : disable stream checksum (default:enabled)\n");
+ DISPLAY( " -b# : benchmark files, using # [0-1] compression level\n");
+ DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n");
+ return 0;
+}
+
int badusage(char* exename)
{
@@ -317,7 +327,7 @@ int legacy_compress_file(char* input_filename, char* output_filename, int compre
int compress_file(char* input_filename, char* output_filename, int compressionlevel)
{
- int (*compressionFunction)(const char*, char*, int);
+ int (*compressionFunction)(const char*, char*, int, int);
unsigned long long filesize = 0;
unsigned long long compressedfilesize = 0;
unsigned int checkbits;
@@ -330,15 +340,16 @@ int compress_file(char* input_filename, char* output_filename, int compressionle
clock_t start, end;
int blockSize;
size_t sizeCheck, header_size;
+ void* streamChecksumState=NULL;
// Init
start = clock();
switch (compressionlevel)
{
- case 0 : compressionFunction = LZ4_compress; break;
- case 1 : compressionFunction = LZ4_compressHC; break;
- default : compressionFunction = LZ4_compress;
+ case 0 : compressionFunction = LZ4_compress_limitedOutput; break;
+ case 1 : compressionFunction = LZ4_compressHC_limitedOutput; break;
+ default : compressionFunction = LZ4_compress_limitedOutput;
}
errorcode = get_fileHandle(input_filename, output_filename, &finput, &foutput);
if (errorcode) return errorcode;
@@ -346,13 +357,15 @@ int compress_file(char* input_filename, char* output_filename, int compressionle
// Allocate Memory
in_buff = (char*)malloc(blockSize);
- out_buff = (char*)malloc(LZ4_compressBound(blockSize));
+ out_buff = (char*)malloc(blockSize+CACHELINE);
if (!in_buff || !out_buff) EXM_THROW(31, "Allocation error : not enough memory");
+ if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED);
// Write Archive Header
*(unsigned int*)out_buff = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER); // Magic Number, in Little Endian convention
*(out_buff+4) = 0x60; // Version("01"), Block independence
*(out_buff+4) |= blockChecksum << 4;
+ *(out_buff+4) |= streamChecksum << 2;
*(out_buff+5) = (char)(blockSizeId<<4); // Block Size
checkbits = XXH32((out_buff+4), 2, LZ4S_CHECKSUM_SEED);
checkbits = LZ4S_GetCheckBits_FromXXH(checkbits);
@@ -371,15 +384,16 @@ int compress_file(char* input_filename, char* output_filename, int compressionle
if( inSize<=0 ) break;
filesize += inSize;
if (displayLevel) DISPLAY("Read : %i MB \r", (int)(filesize>>20));
+ if (streamChecksum) XXH32_update(streamChecksumState, in_buff, inSize);
// Compress Block
- outSize = compressionFunction(in_buff, out_buff+4, inSize);
- if (outSize < inSize) compressedfilesize += outSize+4; else compressedfilesize += inSize+4;
+ outSize = compressionFunction(in_buff, out_buff+4, inSize, inSize-1);
+ if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += inSize+4;
if (blockChecksum) compressedfilesize+=4;
if (displayLevel) DISPLAY("Read : %i MB ==> %.2f%%\r", (int)(filesize>>20), (double)compressedfilesize/filesize*100);
// Write Block
- if (outSize < inSize)
+ if (outSize > 0)
{
unsigned int checksum;
int sizeToWrite;
@@ -417,6 +431,14 @@ int compress_file(char* input_filename, char* output_filename, int compressionle
sizeCheck = fwrite(out_buff, 1, 4, foutput);
if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write end of stream");
compressedfilesize += 4;
+ if (streamChecksum)
+ {
+ unsigned int checksum = XXH32_digest(streamChecksumState);
+ * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum);
+ sizeCheck = fwrite(out_buff, 1, 4, foutput);
+ if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum");
+ compressedfilesize += 4;
+ }
// Status
end = clock();
@@ -497,7 +519,8 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
int decodedBytes;
unsigned int maxBlockSize;
size_t sizeCheck;
- int blockChecksumFlag;
+ int blockChecksumFlag, streamChecksumFlag;
+ void* streamChecksumState=NULL;
// Decode stream descriptor
nbReadBytes = fread(descriptor, 1, 3, finput);
@@ -506,7 +529,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
int version = (descriptor[0] >> 6) & _2BITS;
int independance = (descriptor[0] >> 5) & _1BIT;
int streamSize = (descriptor[0] >> 3) & _1BIT;
- int reserved1 = (descriptor[0] >> 1) & _2BITS;
+ int reserved1 = (descriptor[0] >> 1) & _1BIT;
int dictionary = (descriptor[0] >> 0) & _1BIT;
int reserved2 = (descriptor[1] >> 7) & _1BIT;
@@ -516,6 +539,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
int checkBits_xxh32;
blockChecksumFlag = (descriptor[0] >> 4) & _1BIT;
+ streamChecksumFlag= (descriptor[0] >> 2) & _1BIT;
if (version != 1) EXM_THROW(62, "Wrong version number");
if (independance != 1) EXM_THROW(63, "Does not support block inter-dependence");
@@ -537,6 +561,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
in_buff = (char*)malloc(maxBlockSize);
out_buff = (char*)malloc(maxBlockSize);
if (!in_buff || !out_buff) EXM_THROW(70, "Allocation error : not enough memory");
+ if (streamChecksumFlag) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED);
// Main Loop
while (1)
@@ -561,10 +586,10 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
{
unsigned int checksum = XXH32(in_buff, blockSize, LZ4S_CHECKSUM_SEED);
unsigned int readChecksum;
- nbReadBytes = fread(&readChecksum, 1, 4, finput);
- if( nbReadBytes != 4 ) EXM_THROW(74, "Read error : cannot read next block size");
+ sizeCheck = fread(&readChecksum, 1, 4, finput);
+ if( sizeCheck != 4 ) EXM_THROW(74, "Read error : cannot read next block size");
readChecksum = LITTLE_ENDIAN_32(readChecksum); // Convert to little endian
- if (checksum != readChecksum) EXM_THROW(75, "Error : invalid checksum detected");
+ if (checksum != readChecksum) EXM_THROW(75, "Error : invalid block checksum detected");
}
if (uncompressedFlag)
@@ -573,6 +598,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
sizeCheck = fwrite(in_buff, 1, blockSize, foutput);
if (sizeCheck != (size_t)blockSize) EXM_THROW(76, "Write error : cannot write data block");
filesize += blockSize;
+ if (streamChecksumFlag) XXH32_update(streamChecksumState, in_buff, blockSize);
}
else
{
@@ -580,6 +606,7 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
decodedBytes = LZ4_uncompress_unknownOutputSize(in_buff, out_buff, blockSize, maxBlockSize);
if (decodedBytes < 0) EXM_THROW(77, "Decoding Failed ! Corrupted input detected !");
filesize += decodedBytes;
+ if (streamChecksumFlag) XXH32_update(streamChecksumState, out_buff, decodedBytes);
// Write Block
sizeCheck = fwrite(out_buff, 1, decodedBytes, foutput);
@@ -587,6 +614,17 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
}
}
+ // Stream Checksum
+ if (streamChecksumFlag)
+ {
+ unsigned int checksum = XXH32_digest(streamChecksumState);
+ unsigned int readChecksum;
+ sizeCheck = fread(&readChecksum, 1, 4, finput);
+ if (sizeCheck != 4) EXM_THROW(74, "Read error : cannot read stream checksum");
+ readChecksum = LITTLE_ENDIAN_32(readChecksum); // Convert to little endian
+ if (checksum != readChecksum) EXM_THROW(75, "Error : invalid stream checksum detected");
+ }
+
// Free
free(in_buff);
free(out_buff);
@@ -597,7 +635,8 @@ unsigned long long decodeLZ4S(FILE* finput, FILE* foutput)
unsigned long long selectDecoder( FILE* finput, FILE* foutput)
{
- unsigned int magicNumber;
+ unsigned int magicNumber, size;
+ int errorNb;
size_t nbReadBytes;
// Check Archive Header
@@ -605,6 +644,8 @@ unsigned long long selectDecoder( FILE* finput, FILE* foutput)
if (nbReadBytes==0) return 0; // EOF
if (nbReadBytes != MAGICNUMBER_SIZE) EXM_THROW(41, "Unrecognized header : Magic Number unreadable");
magicNumber = LITTLE_ENDIAN_32(magicNumber); // Convert to Little Endian format
+ if ((magicNumber & LZ4S_SKIPPABLEMASK) == LZ4S_SKIPPABLE0) magicNumber = LZ4S_SKIPPABLE0; // fold skippable magic numbers
+
switch(magicNumber)
{
case LZ4S_MAGICNUMBER:
@@ -612,8 +653,15 @@ unsigned long long selectDecoder( FILE* finput, FILE* foutput)
case LEGACY_MAGICNUMBER:
DISPLAY("Detected : Legacy format \n");
return decodeLegacyStream(finput, foutput);
+ case LZ4S_SKIPPABLE0:
+ nbReadBytes = fread(&size, 1, 4, finput);
+ if (nbReadBytes != 4) EXM_THROW(42, "Stream error : skippable size unreadable");
+ size = LITTLE_ENDIAN_32(size); // Convert to Little Endian format
+ errorNb = fseek(finput, size, SEEK_CUR);
+ if (errorNb != 0) EXM_THROW(43, "Stream error : cannot skip skippable area");
+ return selectDecoder(finput, foutput);
default:
- if (ftell(finput) == MAGICNUMBER_SIZE) EXM_THROW(42,"Unrecognized header : file cannot be decoded");
+ if (ftell(finput) == MAGICNUMBER_SIZE) EXM_THROW(44,"Unrecognized header : file cannot be decoded"); // Wrong magic number at the beginning of 1st stream
DISPLAY("Stream followed by unrecognized data\n");
return 0;
}
@@ -691,14 +739,15 @@ int main(int argc, char** argv)
switch(argument[0])
{
// Display help on usage
- case 'H': usage(exename); return 0;
+ case 'H': usage(exename); usage_advanced(); return 0;
// Compression (default)
case 'c': if ((argument[1] >='0') && (argument[1] <='1')) { cLevel=argument[1] - '0'; argument++; } break;
case 'h': if (argument[1]=='c') { cLevel=1; argument++; } break;
- // disable checksum
- case 'x': blockChecksum=0; break;
+ // Checksum management
+ case 'x': blockChecksum=1; break;
+ case 'n': if (argument[1]=='x') { streamChecksum=0; argument++; break; } else { badusage(exename); return 1; }
// Use Legacy format (hidden option)
case 'l': legacy_format=1; break;
diff --git a/lz4hc.c b/lz4hc.c
index e3b73d8..99c7770 100644
--- a/lz4hc.c
+++ b/lz4hc.c
@@ -116,6 +116,7 @@
#include <stdlib.h> // calloc, free
#include <string.h> // memset, memcpy
#include "lz4hc.h"
+#include "lz4.h"
#define ALLOCATOR(s) calloc(1,s)
#define FREEMEM free
@@ -499,7 +500,7 @@ _endCount:
}
-forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref)
+forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int matchLength, const BYTE* ref, BYTE* oend)
{
int length, len;
BYTE* token;
@@ -507,6 +508,7 @@ forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE
// Encode Literal length
length = (int)(*ip - *anchor);
token = (*op)++;
+ if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; // Check output limit
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
else *token = (BYTE)(length<<ML_BITS);
@@ -517,12 +519,13 @@ forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE
LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
// Encode MatchLength
- len = (int)(ml-MINMATCH);
+ len = (int)(matchLength-MINMATCH);
+ if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; // Check output limit
if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; }
else *token += (BYTE)len;
// Prepare next loop
- *ip += ml;
+ *ip += matchLength;
*anchor = *ip;
return 0;
@@ -536,15 +539,17 @@ forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE
int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx,
const char* source,
char* dest,
- int isize)
+ int inputSize,
+ int maxOutputSize)
{
const BYTE* ip = (const BYTE*) source;
const BYTE* anchor = ip;
- const BYTE* const iend = ip + isize;
+ const BYTE* const iend = ip + inputSize;
const BYTE* const mflimit = iend - MFLIMIT;
const BYTE* const matchlimit = (iend - LASTLITERALS);
BYTE* op = (BYTE*) dest;
+ BYTE* const oend = op + maxOutputSize;
int ml, ml2, ml3, ml0;
const BYTE* ref=NULL;
@@ -575,7 +580,7 @@ _Search2:
if (ml2 == ml) // No better match
{
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+ if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
continue;
}
@@ -627,9 +632,9 @@ _Search3:
// ip & ref are known; Now for ml
if (start2 < ip+ml) ml = (int)(start2 - ip);
// Now, encode 2 sequences
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+ if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
ip = start2;
- LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2);
+ if (LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2, oend)) return 0;
continue;
}
@@ -651,7 +656,7 @@ _Search3:
}
}
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+ if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
ip = start3;
ref = ref3;
ml = ml3;
@@ -690,7 +695,7 @@ _Search3:
ml = (int)(start2 - ip);
}
}
- LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
+ if (LZ4_encodeSequence(&ip, &op, &anchor, ml, ref, oend)) return 0;
ip = start2;
ref = ref2;
@@ -707,6 +712,7 @@ _Search3:
// Encode Last Literals
{
int lastRun = (int)(iend - anchor);
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
else *op++ = (BYTE)(lastRun<<ML_BITS);
memcpy(op, anchor, iend - anchor);
@@ -718,15 +724,23 @@ _Search3:
}
-int LZ4_compressHC(const char* source,
+int LZ4_compressHC_limitedOutput(const char* source,
char* dest,
- int isize)
+ int inputSize,
+ int maxOutputSize)
{
void* ctx = LZ4HC_Create((const BYTE*)source);
- int result = LZ4_compressHCCtx(ctx, source, dest, isize);
+ int result = LZ4_compressHCCtx(ctx, source, dest, inputSize, maxOutputSize);
LZ4HC_Free (&ctx);
return result;
}
+int LZ4_compressHC(const char* source,
+ char* dest,
+ int inputSize)
+{
+ return LZ4_compressHC_limitedOutput(source, dest, inputSize, LZ4_compressBound(inputSize)+1);
+}
+
diff --git a/lz4hc.h b/lz4hc.h
index 30b0063..0e4ab44 100644
--- a/lz4hc.h
+++ b/lz4hc.h
@@ -39,7 +39,7 @@ extern "C" {
#endif
-int LZ4_compressHC (const char* source, char* dest, int isize);
+int LZ4_compressHC (const char* source, char* dest, int inputSize);
/*
LZ4_compressHC :
@@ -49,6 +49,20 @@ LZ4_compressHC :
Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
*/
+int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*
+LZ4_compress_limitedOutput() :
+ Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+ If it cannot achieve it, compression will stop, and result of the function will be zero.
+ This function never writes outside of provided output buffer.
+
+ inputSize : Max supported value is ~1.9GB
+ maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated)
+ return : the number of output bytes written in buffer 'dest'
+ or 0 if the compression fails
+*/
+
/* Note :
Decompression functions are provided within regular LZ4 source code (see "lz4.h") (BSD license)
diff --git a/xxhash.c b/xxhash.c
index 95a7955..32b6994 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -1,6 +1,6 @@
/*
xxHash - Fast Hash algorithm
- Copyright (C) 2012, Yann Collet.
+ Copyright (C) 2012-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
@@ -135,7 +135,7 @@ static inline unsigned int XXH_swap32 (unsigned int x) {
unsigned int XXH32(const void* input, int len, unsigned int seed)
{
-#if 1
+#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
void* state = XXH32_init(seed);
XXH32_update(state, input, len);
diff --git a/xxhash.h b/xxhash.h
index a1d9dee..c525e0b 100644
--- a/xxhash.h
+++ b/xxhash.h
@@ -1,7 +1,7 @@
/*
xxHash - Fast Hash algorithm
Header File
- Copyright (C) 2012, Yann Collet.
+ Copyright (C) 2012-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without