diff options
Diffstat (limited to 'lib/lz4opt.h')
-rw-r--r-- | lib/lz4opt.h | 185 |
1 files changed, 94 insertions, 91 deletions
diff --git a/lib/lz4opt.h b/lib/lz4opt.h index d1913fe..e9e54d8 100644 --- a/lib/lz4opt.h +++ b/lib/lz4opt.h @@ -1,6 +1,6 @@ /* lz4opt.h - Optimal Mode of LZ4 - Copyright (C) 2015-2016, Przemyslaw Skibinski <inikep@gmail.com> + Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com> Note : this file is intended to be included within lz4hc.c BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -36,14 +36,12 @@ #define LZ4_OPT_NUM (1<<12) -typedef struct -{ +typedef struct { int off; int len; } LZ4HC_match_t; -typedef struct -{ +typedef struct { int price; int off; int mlen; @@ -51,11 +49,12 @@ typedef struct } LZ4HC_optimal_t; -/* price in bits */ +/* price in bytes */ FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen) { - size_t price = 8*litlen; - if (litlen >= (size_t)RUN_MASK) price+=8*(1+(litlen-RUN_MASK)/255); + size_t price = litlen; + if (litlen >= (size_t)RUN_MASK) + price += 1 + (litlen-RUN_MASK)/255; return price; } @@ -63,12 +62,12 @@ FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen) /* requires mlen >= MINMATCH */ FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen) { - size_t price = 16 + 8; /* 16-bit offset + token */ + size_t price = 2 + 1; /* 16-bit offset + token */ price += LZ4HC_literalsPrice(litlen); - mlen -= MINMATCH; - if (mlen >= (size_t)ML_MASK) price+=8*(1+(mlen-ML_MASK)/255); + if (mlen >= (size_t)(ML_MASK+MINMATCH)) + price+= 1 + (mlen-(ML_MASK+MINMATCH))/255; return price; } @@ -123,6 +122,8 @@ FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches ( matchLength = LZ4_count(ip, match, vLimit); if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } if (matchLength > best_mlen) { @@ -141,6 +142,8 @@ FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches ( if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ + DEBUGLOG(6, "ip :%016llX", (U64)ip); + DEBUGLOG(6, "match:%016llX", (U64)match); if (*(ip+matchLength) < *(match+matchLength)) { *ptr0 = delta0; ptr0 = &DELTANEXTMAXD(matchIndex*2); @@ -192,13 +195,13 @@ FORCE_INLINE int LZ4HC_BinTree_GetAllMatches ( } -#define SET_PRICE(pos, mlen, offset, litlen, price) \ +#define SET_PRICE(pos, ml, offset, ll, cost) \ { \ while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ - opt[pos].mlen = (int)mlen; \ + opt[pos].mlen = (int)ml; \ opt[pos].off = (int)offset; \ - opt[pos].litlen = (int)litlen; \ - opt[pos].price = (int)price; \ + opt[pos].litlen = (int)ll; \ + opt[pos].price = (int)cost; \ } @@ -209,15 +212,12 @@ static int LZ4HC_compress_optimal ( int inputSize, int maxOutputSize, limitedOutput_directive limit, - const size_t sufficient_len, + size_t sufficient_len, const int fullUpdate ) { - LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1]; + LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1]; /* this uses a bit too much stack memory to my taste ... */ LZ4HC_match_t matches[LZ4_OPT_NUM + 1]; - const BYTE *inr = NULL; - size_t res, cur, cur2; - size_t i, llen, litlen, mlen, best_mlen, price, offset, best_off, match_num, last_pos; const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; @@ -228,18 +228,23 @@ static int LZ4HC_compress_optimal ( BYTE* const oend = op + maxOutputSize; /* init */ + DEBUGLOG(5, "LZ4HC_compress_optimal"); + if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; ctx->end += inputSize; ip++; /* Main Loop */ while (ip < mflimit) { - memset(opt, 0, sizeof(LZ4HC_optimal_t)); - last_pos = 0; - llen = ip - anchor; + size_t const llen = ip - anchor; + size_t last_pos = 0; + size_t match_num, cur, best_mlen, best_off; + memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */ + match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate); if (!match_num) { ip++; continue; } if ((size_t)matches[match_num-1].len > sufficient_len) { + /* good enough solution : immediate encoding */ best_mlen = matches[match_num-1].len; best_off = matches[match_num-1].off; cur = 0; @@ -248,45 +253,46 @@ static int LZ4HC_compress_optimal ( } /* set prices using matches at position = 0 */ - for (i = 0; i < match_num; i++) { - mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH; - best_mlen = (matches[i].len < LZ4_OPT_NUM) ? matches[i].len : LZ4_OPT_NUM; - while (mlen <= best_mlen) { - litlen = 0; - price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen); - SET_PRICE(mlen, mlen, matches[i].off, litlen, price); - mlen++; - } - } + { size_t matchNb; + for (matchNb = 0; matchNb < match_num; matchNb++) { + size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; + best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + for ( ; mlen <= best_mlen ; mlen++) { + size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen); + SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_pos and opt[pos] */ + } } } - if (last_pos < MINMATCH) { ip++; continue; } + if (last_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */ /* check further positions */ opt[0].mlen = opt[1].mlen = 1; for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur-1].mlen == 1) { - litlen = opt[cur-1].litlen + 1; - if (cur != litlen) { - price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen); + const BYTE* const curPtr = ip + cur; + + /* establish baseline price if cur is literal */ + { size_t price, litlen; + if (opt[cur-1].mlen == 1) { + /* no match at previous position */ + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen); + } else { + price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen); + } } else { - price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen); + litlen = 1; + price = opt[cur - 1].price + LZ4HC_literalsPrice(1); } - } else { - litlen = 1; - price = opt[cur - 1].price + LZ4HC_literalsPrice(litlen); - } - mlen = 1; - best_mlen = 0; - if (cur > last_pos || price < (size_t)opt[cur].price) - SET_PRICE(cur, mlen, best_mlen, litlen, price); + if (price < (size_t)opt[cur].price) + SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */ + } - if (cur == last_pos || inr >= mflimit) break; + if (cur == last_pos || curPtr >= mflimit) break; - match_num = LZ4HC_BinTree_GetAllMatches(ctx, inr, matchlimit, MINMATCH-1, matches, fullUpdate); - if (match_num > 0 && (size_t)matches[match_num-1].len > sufficient_len) { + match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); + if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) { + /* immediate encoding */ best_mlen = matches[match_num-1].len; best_off = matches[match_num-1].off; last_pos = cur + 1; @@ -294,60 +300,57 @@ static int LZ4HC_compress_optimal ( } /* set prices using matches at position = cur */ - for (i = 0; i < match_num; i++) { - mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH; - cur2 = cur; - best_mlen = (cur2 + matches[i].len < LZ4_OPT_NUM) ? (size_t)matches[i].len : LZ4_OPT_NUM - cur2; - - while (mlen <= best_mlen) { - if (opt[cur2].mlen == 1) { - litlen = opt[cur2].litlen; - - if (cur2 != litlen) - price = opt[cur2 - litlen].price + LZ4HC_sequencePrice(litlen, mlen); - else - price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen); - } else { - litlen = 0; - price = opt[cur2].price + LZ4HC_sequencePrice(litlen, mlen); - } - - if (cur2 + mlen > last_pos || price < (size_t)opt[cur2 + mlen].price) { // || (((int)price == opt[cur2 + mlen].price) && (opt[cur2 + mlen-1].mlen == 1))) { - SET_PRICE(cur2 + mlen, mlen, matches[i].off, litlen, price); - } - mlen++; - } - } + { size_t matchNb; + for (matchNb = 0; matchNb < match_num; matchNb++) { + size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH; + best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ? + (size_t)matches[matchNb].len : LZ4_OPT_NUM - cur; + + for ( ; ml <= best_mlen ; ml++) { + size_t ll, price; + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + if (cur > ll) + price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml); + else + price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } + + if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) { + SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price); + } } } } } /* for (cur = 1; cur <= last_pos; cur++) */ best_mlen = opt[last_pos].mlen; best_off = opt[last_pos].off; cur = last_pos - best_mlen; -encode: /* cur, last_pos, best_mlen, best_off have to be set */ +encode: /* cur, last_pos, best_mlen, best_off must be set */ opt[0].mlen = 1; - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; + while (1) { /* from end to beginning */ + size_t const ml = opt[cur].mlen; + int const offset = opt[cur].off; opt[cur].mlen = (int)best_mlen; opt[cur].off = (int)best_off; - best_mlen = mlen; + best_mlen = ml; best_off = offset; - if (mlen > cur) break; - cur -= mlen; + if (ml > cur) break; /* can this happen ? */ + cur -= ml; } + /* encode all recorded sequences */ cur = 0; while (cur < last_pos) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - - res = LZ4HC_encodeSequence(&ip, &op, &anchor, (int)mlen, ip - offset, limit, oend); - if (res) return 0; + int const ml = opt[cur].mlen; + int const offset = opt[cur].off; + if (ml == 1) { ip++; cur++; continue; } + cur += ml; + if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0; } - } + } /* while (ip < mflimit) */ /* Encode Last Literals */ { int lastRun = (int)(iend - anchor); |