diff options
author | Yann Collet <yann.collet.73@gmail.com> | 2014-06-10 05:10:08 (GMT) |
---|---|---|
committer | Yann Collet <yann.collet.73@gmail.com> | 2014-06-10 05:10:08 (GMT) |
commit | 661e4ddb78ce89d5de3ad0824e6abb161044aa06 (patch) | |
tree | b09695f94c4f48b68a2cae4369b2c4bb266d0fa5 | |
parent | f0e6bf45ca509851b51992ecf28b1415c2367440 (diff) | |
download | lz4-661e4ddb78ce89d5de3ad0824e6abb161044aa06.zip lz4-661e4ddb78ce89d5de3ad0824e6abb161044aa06.tar.gz lz4-661e4ddb78ce89d5de3ad0824e6abb161044aa06.tar.bz2 |
lz4io : reduced memory usage in streaming mode
-rwxr-xr-x | lz4.c | 55 | ||||
-rw-r--r-- | programs/Makefile | 12 | ||||
-rw-r--r-- | programs/datagen.c | 44 | ||||
-rw-r--r-- | programs/lz4io.c | 16 |
4 files changed, 84 insertions, 43 deletions
@@ -472,40 +472,45 @@ static int LZ4_compress_generic( /* Main Loop */ for ( ; ; ) { - int searchMatchNb = (1U << skipStrength) + 3; const BYTE* forwardIp = ip; const BYTE* ref; BYTE* token; + { + int step=1; + int searchMatchNb = (1U << skipStrength) + 3; - /* Find a match */ - do { - int step = searchMatchNb++ >> skipStrength; - U32 h = forwardH; - ip = forwardIp; - forwardIp += step; + /* Find a match */ + do { + U32 h = forwardH; + ip = forwardIp; + forwardIp += step; + step = searchMatchNb++ >> skipStrength; + if (unlikely (step>8)) step=8; // slows down uncompressible data; required for valid forwardIp - ref = LZ4_getPositionOnHash(h, ctx, tableType, base); - if (dict==usingExtDict) - { - if (ref<(const BYTE*)source) - { - refDelta = dictDelta; - lowLimit = dictionary; - } - else + if (unlikely(ip > mflimit)) goto _last_literals; + + ref = LZ4_getPositionOnHash(h, ctx, tableType, base); + if (dict==usingExtDict) { - refDelta = 0; - lowLimit = (const BYTE*)source; + if (ref<(const BYTE*)source) + { + refDelta = dictDelta; + lowLimit = dictionary; + } + else + { + refDelta = 0; + lowLimit = (const BYTE*)source; + } } - } - if (unlikely(ip > mflimit)) goto _last_literals; - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, ctx, tableType, base); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, ctx, tableType, base); - } while ( ((tableType==byU16)? 0 : (ref + MAX_DISTANCE < ip)) || (A32(ref+refDelta) != A32(ip)) ); + } while ( ((tableType==byU16)? 0 : (ref + MAX_DISTANCE < ip)) || (A32(ref+refDelta) != A32(ip)) ); + } /* Catch up */ - while ((ip>anchor) && (ref+refDelta > lowLimit) && (unlikely(ip[-1]==ref[refDelta-1]))) { ip--; ref--; } // refDelta costs some performance + while ((ip>anchor) && (ref+refDelta > lowLimit) && (unlikely(ip[-1]==ref[refDelta-1]))) { ip--; ref--; } { /* Encode Literal length */ @@ -558,7 +563,7 @@ _next_match: if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit))) return 0; /* Check output limit */ *token += ML_MASK; matchLength -= ML_MASK; - for (; matchLength > 509 ; matchLength-=510) { *op++ = 255; *op++ = 255; } + for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; } if (matchLength >= 255) { matchLength-=255; *op++ = 255; } *op++ = (BYTE)matchLength; } diff --git a/programs/Makefile b/programs/Makefile index 53e4eb2..9fb6cd9 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -133,15 +133,17 @@ test-32: test-lz4 test-lz4c32 test-fullbench32 test-fuzzer32 test-mem32 test-lz4: lz4 datagen ./datagen | ./lz4 | ./lz4 -vdq > $(VOID) - ./datagen -g256MB | ./lz4 -B4D | ./lz4 -vdq > $(VOID) - ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID) + ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID) + ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID) test-lz4c: lz4c datagen -test-lz4c32: lz4c32 datagen +test-lz4c32: lz4c32 lz4 datagen ./datagen | ./lz4c32 | ./lz4c32 -vdq > $(VOID) - ./datagen -g256MB | ./lz4c32 -B4D | ./lz4c32 -vdq > $(VOID) - ./datagen -g6GB | ./lz4c32 -vqB5D | ./lz4c32 -vdq > $(VOID) + ./datagen | ./lz4c32 | ./lz4 -vdq > $(VOID) + ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4c32 -vdq > $(VOID) + ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4 -vdq > $(VOID) + ./datagen -g6GB | ./lz4c32 -vqB5D | ./lz4c32 -vdq > $(VOID) test-fullbench: fullbench ./fullbench --no-prompt $(BENCH_NB) $(TEST_FILES) diff --git a/programs/datagen.c b/programs/datagen.c index 05eb7f0..0109d54 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -109,30 +109,58 @@ static unsigned int CDG_rand(U32* src) #define CDG_RANDCHAR (((CDG_rand(seed) >> 9) & 63) + '0') static void CDG_generate(U64 size, U32* seed, double proba) { - BYTE buff[128 KB + 1]; + BYTE fullbuff[32 KB + 128 KB + 1]; + BYTE* buff = fullbuff + 32 KB; U64 total=0; U32 P32 = (U32)(32768 * proba); U32 pos=0; U32 genBlockSize = 128 KB; + // Build initial prefix + while (pos<32 KB) + { + // Select : Literal (char) or Match (within 32K) + if (CDG_RAND15BITS < P32) + { + // Copy (within 64K) + U32 d; + int ref; + int length = CDG_RANDLENGTH + 4; + U32 offset = CDG_RAND15BITS + 1; + if (offset > pos) offset = pos; + ref = pos - offset; + d = pos + length; + while (pos < d) fullbuff[pos++] = fullbuff[ref++]; + } + else + { + // Literal (noise) + U32 d; + int length = CDG_RANDLENGTH; + d = pos + length; + while (pos < d) fullbuff[pos++] = CDG_RANDCHAR; + } + } + + // Generate compressible data + pos = 0; while (total < size) { if (size-total < 128 KB) genBlockSize = (U32)(size-total); total += genBlockSize; buff[genBlockSize] = 0; - *buff = CDG_RANDCHAR; - pos = 1; + pos = 0; while (pos<genBlockSize) { // Select : Literal (char) or Match (within 32K) if (CDG_RAND15BITS < P32) { // Copy (within 64K) - U32 ref, d; + int ref; + U32 d; int length = CDG_RANDLENGTH + 4; U32 offset = CDG_RAND15BITS + 1; - if (offset > pos) offset = pos; - if (pos + length > 128 KB ) length = 128 KB - pos; + if (pos + length > genBlockSize ) length = genBlockSize - pos; ref = pos - offset; d = pos + length; while (pos < d) buff[pos++] = buff[ref++]; @@ -142,7 +170,7 @@ static void CDG_generate(U64 size, U32* seed, double proba) // Literal (noise) U32 d; int length = CDG_RANDLENGTH; - if (pos + length > 128 KB) length = 128 KB - pos; + if (pos + length > genBlockSize) length = genBlockSize - pos; d = pos + length; while (pos < d) buff[pos++] = CDG_RANDCHAR; } @@ -150,6 +178,8 @@ static void CDG_generate(U64 size, U32* seed, double proba) pos=0; for (;pos+512<=genBlockSize;pos+=512) printf("%512.512s", buff+pos); for (;pos<genBlockSize;pos++) printf("%c", buff[pos]); + // Regenerate prefix + memcpy(fullbuff, buff + 96 KB, 32 KB); } } diff --git a/programs/lz4io.c b/programs/lz4io.c index e035f01..a0dae68 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -370,11 +370,12 @@ static int compress_file_blockDependency2(char* input_filename, char* output_fil void* (*initFunction) (); int (*compressionFunction)(void*, const char*, char*, int, int); int (*freeFunction) (void*); + int (*nextBlockFunction) (void*, char*, int); void* ctx; unsigned long long filesize = 0; unsigned long long compressedfilesize = 0; unsigned int checkbits; - char* in_buff, *in_blockStart, *in_end; + char* in_buff, *in_blockStart; char* out_buff; FILE* finput; FILE* foutput; @@ -389,18 +390,18 @@ static int compress_file_blockDependency2(char* input_filename, char* output_fil initFunction = LZ4_createStream; compressionFunction = LZ4_compress_limitedOutput_continue; + nextBlockFunction = LZ4_moveDict; freeFunction = LZ4_free; get_fileHandle(input_filename, output_filename, &finput, &foutput); blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId); // Allocate Memory - inputBufferSize = blockSize + 64 KB; - if (inputBufferSize < MIN_STREAM_BUFSIZE) inputBufferSize = MIN_STREAM_BUFSIZE; + inputBufferSize = 64 KB + blockSize; in_buff = (char*)malloc(inputBufferSize); out_buff = (char*)malloc(blockSize+CACHELINE); if (!in_buff || !out_buff) EXM_THROW(31, "Allocation error : not enough memory"); - in_blockStart = in_buff; in_end = in_buff + inputBufferSize; + in_blockStart = in_buff + 64 KB; if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); ctx = initFunction(); @@ -426,7 +427,6 @@ static int compress_file_blockDependency2(char* input_filename, char* output_fil unsigned int inSize; // Read Block - if ((in_blockStart+blockSize) > in_end) in_blockStart = in_buff; inSize = (unsigned int) fread(in_blockStart, (size_t)1, (size_t)blockSize, finput); if( inSize==0 ) break; // No more input : end of compression filesize += inSize; @@ -468,7 +468,11 @@ static int compress_file_blockDependency2(char* input_filename, char* output_fil if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum"); } } - in_blockStart += inSize; + { + size_t sizeToMove = 64 KB; + if (inSize < 64 KB) sizeToMove = inSize; + nextBlockFunction(ctx, in_blockStart - sizeToMove, sizeToMove); + } } // End of Stream mark |