From 661e4ddb78ce89d5de3ad0824e6abb161044aa06 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 10 Jun 2014 06:10:08 +0100 Subject: lz4io : reduced memory usage in streaming mode --- lz4.c | 55 +++++++++++++++++++++++++++++------------------------- programs/Makefile | 12 +++++++----- programs/datagen.c | 44 ++++++++++++++++++++++++++++++++++++------- programs/lz4io.c | 16 ++++++++++------ 4 files changed, 84 insertions(+), 43 deletions(-) diff --git a/lz4.c b/lz4.c index 62e1f45..beff5a7 100755 --- a/lz4.c +++ b/lz4.c @@ -472,40 +472,45 @@ static int LZ4_compress_generic( /* Main Loop */ for ( ; ; ) { - int searchMatchNb = (1U << skipStrength) + 3; const BYTE* forwardIp = ip; const BYTE* ref; BYTE* token; + { + int step=1; + int searchMatchNb = (1U << skipStrength) + 3; - /* Find a match */ - do { - int step = searchMatchNb++ >> skipStrength; - U32 h = forwardH; - ip = forwardIp; - forwardIp += step; + /* Find a match */ + do { + U32 h = forwardH; + ip = forwardIp; + forwardIp += step; + step = searchMatchNb++ >> skipStrength; + if (unlikely (step>8)) step=8; // slows down uncompressible data; required for valid forwardIp - ref = LZ4_getPositionOnHash(h, ctx, tableType, base); - if (dict==usingExtDict) - { - if (ref<(const BYTE*)source) - { - refDelta = dictDelta; - lowLimit = dictionary; - } - else + if (unlikely(ip > mflimit)) goto _last_literals; + + ref = LZ4_getPositionOnHash(h, ctx, tableType, base); + if (dict==usingExtDict) { - refDelta = 0; - lowLimit = (const BYTE*)source; + if (ref<(const BYTE*)source) + { + refDelta = dictDelta; + lowLimit = dictionary; + } + else + { + refDelta = 0; + lowLimit = (const BYTE*)source; + } } - } - if (unlikely(ip > mflimit)) goto _last_literals; - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, ctx, tableType, base); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, ctx, tableType, base); - } while ( ((tableType==byU16)? 0 : (ref + MAX_DISTANCE < ip)) || (A32(ref+refDelta) != A32(ip)) ); + } while ( ((tableType==byU16)? 0 : (ref + MAX_DISTANCE < ip)) || (A32(ref+refDelta) != A32(ip)) ); + } /* Catch up */ - while ((ip>anchor) && (ref+refDelta > lowLimit) && (unlikely(ip[-1]==ref[refDelta-1]))) { ip--; ref--; } // refDelta costs some performance + while ((ip>anchor) && (ref+refDelta > lowLimit) && (unlikely(ip[-1]==ref[refDelta-1]))) { ip--; ref--; } { /* Encode Literal length */ @@ -558,7 +563,7 @@ _next_match: if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit))) return 0; /* Check output limit */ *token += ML_MASK; matchLength -= ML_MASK; - for (; matchLength > 509 ; matchLength-=510) { *op++ = 255; *op++ = 255; } + for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; } if (matchLength >= 255) { matchLength-=255; *op++ = 255; } *op++ = (BYTE)matchLength; } diff --git a/programs/Makefile b/programs/Makefile index 53e4eb2..9fb6cd9 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -133,15 +133,17 @@ test-32: test-lz4 test-lz4c32 test-fullbench32 test-fuzzer32 test-mem32 test-lz4: lz4 datagen ./datagen | ./lz4 | ./lz4 -vdq > $(VOID) - ./datagen -g256MB | ./lz4 -B4D | ./lz4 -vdq > $(VOID) - ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID) + ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID) + ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID) test-lz4c: lz4c datagen -test-lz4c32: lz4c32 datagen +test-lz4c32: lz4c32 lz4 datagen ./datagen | ./lz4c32 | ./lz4c32 -vdq > $(VOID) - ./datagen -g256MB | ./lz4c32 -B4D | ./lz4c32 -vdq > $(VOID) - ./datagen -g6GB | ./lz4c32 -vqB5D | ./lz4c32 -vdq > $(VOID) + ./datagen | ./lz4c32 | ./lz4 -vdq > $(VOID) + ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4c32 -vdq > $(VOID) + ./datagen -g256MB | ./lz4c32 -vqB4D | ./lz4 -vdq > $(VOID) + ./datagen -g6GB | ./lz4c32 -vqB5D | ./lz4c32 -vdq > $(VOID) test-fullbench: fullbench ./fullbench --no-prompt $(BENCH_NB) $(TEST_FILES) diff --git a/programs/datagen.c b/programs/datagen.c index 05eb7f0..0109d54 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -109,30 +109,58 @@ static unsigned int CDG_rand(U32* src) #define CDG_RANDCHAR (((CDG_rand(seed) >> 9) & 63) + '0') static void CDG_generate(U64 size, U32* seed, double proba) { - BYTE buff[128 KB + 1]; + BYTE fullbuff[32 KB + 128 KB + 1]; + BYTE* buff = fullbuff + 32 KB; U64 total=0; U32 P32 = (U32)(32768 * proba); U32 pos=0; U32 genBlockSize = 128 KB; + // Build initial prefix + while (pos<32 KB) + { + // Select : Literal (char) or Match (within 32K) + if (CDG_RAND15BITS < P32) + { + // Copy (within 64K) + U32 d; + int ref; + int length = CDG_RANDLENGTH + 4; + U32 offset = CDG_RAND15BITS + 1; + if (offset > pos) offset = pos; + ref = pos - offset; + d = pos + length; + while (pos < d) fullbuff[pos++] = fullbuff[ref++]; + } + else + { + // Literal (noise) + U32 d; + int length = CDG_RANDLENGTH; + d = pos + length; + while (pos < d) fullbuff[pos++] = CDG_RANDCHAR; + } + } + + // Generate compressible data + pos = 0; while (total < size) { if (size-total < 128 KB) genBlockSize = (U32)(size-total); total += genBlockSize; buff[genBlockSize] = 0; - *buff = CDG_RANDCHAR; - pos = 1; + pos = 0; while (pos pos) offset = pos; - if (pos + length > 128 KB ) length = 128 KB - pos; + if (pos + length > genBlockSize ) length = genBlockSize - pos; ref = pos - offset; d = pos + length; while (pos < d) buff[pos++] = buff[ref++]; @@ -142,7 +170,7 @@ static void CDG_generate(U64 size, U32* seed, double proba) // Literal (noise) U32 d; int length = CDG_RANDLENGTH; - if (pos + length > 128 KB) length = 128 KB - pos; + if (pos + length > genBlockSize) length = genBlockSize - pos; d = pos + length; while (pos < d) buff[pos++] = CDG_RANDCHAR; } @@ -150,6 +178,8 @@ static void CDG_generate(U64 size, U32* seed, double proba) pos=0; for (;pos+512<=genBlockSize;pos+=512) printf("%512.512s", buff+pos); for (;pos in_end) in_blockStart = in_buff; inSize = (unsigned int) fread(in_blockStart, (size_t)1, (size_t)blockSize, finput); if( inSize==0 ) break; // No more input : end of compression filesize += inSize; @@ -468,7 +468,11 @@ static int compress_file_blockDependency2(char* input_filename, char* output_fil if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum"); } } - in_blockStart += inSize; + { + size_t sizeToMove = 64 KB; + if (inSize < 64 KB) sizeToMove = inSize; + nextBlockFunction(ctx, in_blockStart - sizeToMove, sizeToMove); + } } // End of Stream mark -- cgit v0.12