more generic skip formula

improving speed
This commit is contained in:
Yann Collet 2017-11-02 18:54:18 -07:00
parent e06cb03c11
commit a1c5343d89
2 changed files with 5 additions and 14 deletions

View File

@ -369,7 +369,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
U32 const cost = 1 + llAdd + ll + 2 + mlAdd; U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
if (start==NULL) start = *anchor; /* only works for single segment */ if (start==NULL) start = *anchor; /* only works for single segment */
//g_debuglog_enable = (pos >= 112705) & (pos <= 112760); //g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u", DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
pos, pos,
(U32)(*ip - *anchor), matchLength, (U32)(*ip-match), (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),

View File

@ -46,7 +46,6 @@ typedef struct {
int off; int off;
int mlen; int mlen;
int litlen; int litlen;
int toSearch;
} LZ4HC_optimal_t; } LZ4HC_optimal_t;
@ -244,6 +243,7 @@ static int LZ4HC_compress_optimal (
const BYTE* const matchlimit = iend - LASTLITERALS; const BYTE* const matchlimit = iend - LASTLITERALS;
BYTE* op = (BYTE*) dst; BYTE* op = (BYTE*) dst;
BYTE* const oend = op + dstCapacity; BYTE* const oend = op + dstCapacity;
int const front = fullUpdate ? 2 : 1;
/* init */ /* init */
DEBUGLOG(5, "LZ4HC_compress_optimal"); DEBUGLOG(5, "LZ4HC_compress_optimal");
@ -279,7 +279,6 @@ static int LZ4HC_compress_optimal (
opt[rPos].off = 0; opt[rPos].off = 0;
opt[rPos].litlen = llen + rPos; opt[rPos].litlen = llen + rPos;
opt[rPos].price = cost; opt[rPos].price = cost;
opt[rPos].toSearch = 1;
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
rPos, cost, opt[rPos].litlen); rPos, cost, opt[rPos].litlen);
} } } }
@ -296,21 +295,16 @@ static int LZ4HC_compress_optimal (
opt[mlen].off = offset; opt[mlen].off = offset;
opt[mlen].litlen = llen; opt[mlen].litlen = llen;
opt[mlen].price = cost; opt[mlen].price = cost;
opt[mlen].toSearch = (((mlen - 18) % 255) == 0);
DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
mlen, cost, mlen); mlen, cost, mlen);
} } } } } }
last_match_pos = matches[nb_matches_initial-1].len; last_match_pos = matches[nb_matches_initial-1].len;
if (fullUpdate) opt[last_match_pos-2].toSearch = 1; /* 1 byte on calgary */
if (fullUpdate) opt[last_match_pos-1].toSearch = 1; /* 1 byte on calgary */
opt[last_match_pos].toSearch = 1;
{ int addLit; { int addLit;
for (addLit = 1; addLit <= 3; addLit ++) { for (addLit = 1; addLit <= 3; addLit ++) {
opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].mlen = 1; /* literal */
opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].off = 0;
opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].litlen = addLit;
opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
opt[last_match_pos+addLit].toSearch = 1;
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
} } } }
@ -321,7 +315,9 @@ static int LZ4HC_compress_optimal (
int nb_matches; int nb_matches;
if (curPtr >= mflimit) break; if (curPtr >= mflimit) break;
if (opt[cur].toSearch == 0) continue; DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
cur, opt[cur].price, opt[cur+1].price, cur+1);
if (opt[cur+front].price <= opt[cur].price) continue;
DEBUGLOG(7, "search at rPos:%u", cur); DEBUGLOG(7, "search at rPos:%u", cur);
//nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate); //nb_matches = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate);
@ -388,19 +384,14 @@ static int LZ4HC_compress_optimal (
opt[pos].off = offset; opt[pos].off = offset;
opt[pos].litlen = ll; opt[pos].litlen = ll;
opt[pos].price = price; opt[pos].price = price;
opt[pos].toSearch = (((ml-18) % 255) == 0);
} } } } } } } }
/* complete following positions with literals */ /* complete following positions with literals */
if (fullUpdate) opt[last_match_pos-2].toSearch = 1; /* 2 bytes on enwik7 */
if (fullUpdate) opt[last_match_pos-1].toSearch = 1; /* 53 bytes on enwik7, 13 bytes on calgary */
opt[last_match_pos].toSearch = 1;
{ int addLit; { int addLit;
for (addLit = 1; addLit <= 3; addLit ++) { for (addLit = 1; addLit <= 3; addLit ++) {
opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].mlen = 1; /* literal */
opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].off = 0;
opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].litlen = addLit;
opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
opt[last_match_pos+addLit].toSearch = 1;
DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
} } } }
} /* for (cur = 1; cur <= last_match_pos; cur++) */ } /* for (cur = 1; cur <= last_match_pos; cur++) */