turn off: last match must start at a minimum distance of 8 from oend

This commit is contained in:
inikep 2016-02-02 12:36:09 +01:00
parent ef2706ed12
commit 59140c5480
3 changed files with 85 additions and 25 deletions

View File

@ -2302,8 +2302,8 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ 0, 25, 24, 23, 5, 5, ZSTD_btlazy2 }, /* level 18 */ { 0, 25, 24, 23, 5, 5, ZSTD_btlazy2 }, /* level 18 */
{ 0, 25, 26, 23, 5, 5, ZSTD_btlazy2 }, /* level 19 */ { 0, 25, 26, 23, 5, 5, ZSTD_btlazy2 }, /* level 19 */
{ 0, 26, 27, 25, 9, 5, ZSTD_btlazy2 }, /* level 20 */ { 0, 26, 27, 25, 9, 5, ZSTD_btlazy2 }, /* level 20 */
{ 0, 22, 20, 22, 4, 4, ZSTD_lazy2 }, /* level 11 + L=4 */ // 41902762 { 0, 22, 20, 22, 4, 4, ZSTD_lazy2 }, /* level 11 + L=4 */ // 41902762 42087013
{ 0, 23, 21, 22, 5, 4, ZSTD_btlazy2 }, /* level 16 + L=4 */ // 41233150 { 0, 23, 21, 22, 5, 4, ZSTD_btlazy2 }, /* level 16 + L=4 */ // 41233150 41560211
{ 0, 23, 21, 22, 5, 4, ZSTD_opt }, /* level 23 */ { 0, 23, 21, 22, 5, 4, ZSTD_opt }, /* level 23 */
{ 0, 23, 21, 22, 5, 4, ZSTD_opt_bt }, /* level 23 */ { 0, 23, 21, 22, 5, 4, ZSTD_opt_bt }, /* level 23 */
}, },
@ -2402,6 +2402,7 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint)
int tableID = ((srcSizeHint-1) <= 256 KB) + ((srcSizeHint-1) <= 128 KB) + ((srcSizeHint-1) <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ int tableID = ((srcSizeHint-1) <= 256 KB) + ((srcSizeHint-1) <= 128 KB) + ((srcSizeHint-1) <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
if (compressionLevel<=0) compressionLevel = 1; if (compressionLevel<=0) compressionLevel = 1;
if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
tableID=0;
result = ZSTD_defaultParameters[tableID][compressionLevel]; result = ZSTD_defaultParameters[tableID][compressionLevel];
result.srcSize = srcSizeHint; result.srcSize = srcSizeHint;
return result; return result;

View File

@ -697,7 +697,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
const BYTE* match = oLitEnd - sequence.offset; const BYTE* match = oLitEnd - sequence.offset;
/* check */ /* check */
if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ // if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */ if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */

View File

@ -260,12 +260,31 @@ FORCE_INLINE size_t ZSTD_HcGetAllMatches_selectMLS (
} }
void print_hex_text(uint8_t* buf, int bufsize, int endline)
{
int i, j;
for (i=0; i<bufsize; i+=16)
{
printf("%02d:", i);
for (j=0; j<16; j++)
if (i+j<bufsize)
printf("%02x,",buf[i+j]);
else
printf(" ");
printf(" ");
for (j=0; i+j<bufsize && j<16; j++)
printf("%c",buf[i+j]>32?buf[i+j]:'.');
printf("\n");
}
if (endline) printf("\n");
}
/* ******************************* /* *******************************
* Optimal parser OLD * Optimal parser OLD
*********************************/ *********************************/
FORCE_INLINE FORCE_INLINE
void ZSTD_compressBlock_opt_generic_OLD(ZSTD_CCtx* ctx, void ZSTD_compressBlock_opt2_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const U32 searchMethod, const U32 depth) const U32 searchMethod, const U32 depth)
{ {
@ -301,7 +320,7 @@ void ZSTD_compressBlock_opt_generic_OLD(ZSTD_CCtx* ctx,
size_t offset=0; size_t offset=0;
const BYTE* start=ip+1; const BYTE* start=ip+1;
#if 1 #ifdef ZSTD_USE_REP
/* check repCode */ /* check repCode */
if (MEM_read32(start) == MEM_read32(start - offset_1)) { if (MEM_read32(start) == MEM_read32(start - offset_1)) {
/* repcode : we take it */ /* repcode : we take it */
@ -312,16 +331,16 @@ void ZSTD_compressBlock_opt_generic_OLD(ZSTD_CCtx* ctx,
{ {
/* first search (depth 0) */ /* first search (depth 0) */
#if 0 #if 1
size_t offsetFound = 99999999; size_t offsetFound = 99999999;
size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
if (ml2 > matchLength) if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound; start=ip, matchLength = ml2, offset=offsetFound;
#else #else
size_t mnum = getAllMatches(ctx, ip, iend, maxSearches, mls, matches); size_t mnum = getAllMatches(ctx, ip, iend, maxSearches, mls, matches);
if (mnum > 0) { if (mnum > 0) {
if (matches[mnum-1].len > matchLength) if (matches[mnum-1].len > matchLength)
matchLength = matches[mnum-1].len, start = ip, offset=matches[mnum-1].off; start=ip, matchLength = matches[mnum-1].len, offset=matches[mnum-1].off;
} }
#endif #endif
} }
@ -332,11 +351,12 @@ void ZSTD_compressBlock_opt_generic_OLD(ZSTD_CCtx* ctx,
continue; continue;
} }
#if 0 #if 1
/* let's try to find a better solution */ /* let's try to find a better solution */
if (depth>=1) if (depth>=1)
while (ip<ilimit) { while (ip<ilimit) {
ip ++; ip ++;
#ifdef ZSTD_USE_REP
if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1))) { if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1))) {
size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH; size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
int gain2 = (int)(mlRep * 3); int gain2 = (int)(mlRep * 3);
@ -344,6 +364,7 @@ void ZSTD_compressBlock_opt_generic_OLD(ZSTD_CCtx* ctx,
if ((mlRep >= MINMATCH) && (gain2 > gain1)) if ((mlRep >= MINMATCH) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip; matchLength = mlRep, offset = 0, start = ip;
} }
#endif
{ {
size_t offset2=999999; size_t offset2=999999;
size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
@ -369,11 +390,12 @@ void ZSTD_compressBlock_opt_generic_OLD(ZSTD_CCtx* ctx,
_storeSequence: _storeSequence:
{ {
size_t litLength = start - anchor; size_t litLength = start - anchor;
LZ5_LOG_ENCODE("%d/%d: ENCODE literals=%d off=%d mlen=%d\n", (int)(ip-istart), (int)(iend-istart), (int)(litLength), (int)(offset), (int)matchLength);
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
anchor = ip = start + matchLength; anchor = ip = start + matchLength;
} }
#if 1 /* check immediate repcode */ #ifdef ZSTD_USE_REP /* check immediate repcode */
while ( (ip <= ilimit) while ( (ip <= ilimit)
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
/* store sequence */ /* store sequence */
@ -392,6 +414,7 @@ _storeSequence:
/* Last Literals */ /* Last Literals */
{ {
size_t lastLLSize = iend - anchor; size_t lastLLSize = iend - anchor;
LZ5_LOG_ENCODE("%d/%d: ENCODE lastLLSize=%d\n", (int)(ip-istart), (int)(iend-istart), (int)(lastLLSize));
memcpy(seqStorePtr->lit, anchor, lastLLSize); memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize; seqStorePtr->lit += lastLLSize;
} }
@ -433,20 +456,25 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
const int sufficient_len = 128; //ctx->params.sufficientLength; const int sufficient_len = 128; //ctx->params.sufficientLength;
const int faster_get_matches = 0;//(ctx->params.strategy == ZSTD_opt); const int faster_get_matches = 0;//(ctx->params.strategy == ZSTD_opt);
// printf("orig_file="); print_hex_text(ip, srcSize, 0);
/* init */ /* init */
ZSTD_resetSeqStore(seqStorePtr); ZSTD_resetSeqStore(seqStorePtr);
if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;
/* Match Loop */ /* Match Loop */
while (ip < ilimit) { while (ip < ilimit) {
size_t mlen=0; int mlen=0;
size_t best_mlen=0; int best_mlen=0;
size_t best_off=0; int best_off=0;
memset(opt, 0, sizeof(LZ5HC_optimal_t)); memset(opt, 0, sizeof(LZ5HC_optimal_t));
last_pos = 0; last_pos = 0;
llen = ip - anchor; llen = ip - anchor;
inr = ip; inr = ip;
#if 0
/* check repCode */ /* check repCode */
if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep_1)) { if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep_1)) {
/* repcode : we take it */ /* repcode : we take it */
@ -468,6 +496,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
} }
while (mlen >= MINMATCH);*/ while (mlen >= MINMATCH);*/
} }
#endif
// best_mlen = (last_pos) ? last_pos : MINMATCH; // best_mlen = (last_pos) ? last_pos : MINMATCH;
@ -514,8 +543,10 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
// printf("%d: last_pos=%d\n", (int)(ip - istart), (int)last_pos); // printf("%d: last_pos=%d\n", (int)(ip - istart), (int)last_pos);
opt[0].rep = opt[1].rep = rep_1; // opt[0].rep = opt[1].rep = rep_1;
opt[0].mlen = opt[1].mlen = 1; // opt[0].mlen = opt[1].mlen = 1;
opt[0].rep = rep_1;
opt[0].mlen = 1;
#if 1 #if 1
// check further positions // check further positions
@ -786,23 +817,46 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set
offset = opt[cur].off; offset = opt[cur].off;
cur += mlen; cur += mlen;
#if 0 #if 1
if (offset) if (offset)
{ {
size_t ml2 = ZSTD_count(start, start-offset, iend); size_t ml2 = ZSTD_count(ip, ip-offset, iend);
printf("%d: mlen=%d offset=%d\n", (int)(ip - istart), (int)mlen, (int)offset); // printf("%d: mlen=%d offset=%d\n", (int)(ip - istart), (int)mlen, (int)offset);
if (ml2 < mlen) if (ml2 < mlen && ml2 < MINMATCH)
{ {
printf("%d: start=%d iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - istart), (int)(start - ip), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); printf("ERROR %d: iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - istart), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2);
exit(0); exit(0);
} }
} }
else else
printf("%d: mlen=%d offset=%d\n", (int)(ip - istart), (int)mlen, (int)offset); printf("ERROR %d: mlen=%d offset=%d\n", (int)(ip - istart), (int)mlen, (int)offset);
if (ip < anchor)
{
printf("ERROR %d: ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - istart), (int)(iend - ip), (int)mlen, (int)offset);
exit(0);
}
if (ip - offset < base)
{
printf("ERROR %d: ip - offset < istart iend=%d mlen=%d offset=%d\n", (int)(ip - istart), (int)(iend - ip), (int)mlen, (int)offset);
exit(0);
}
if (mlen < MINMATCH)
{
printf("ERROR %d: mlen < MINMATCH iend=%d mlen=%d offset=%d\n", (int)(ip - istart), (int)(iend - ip), (int)mlen, (int)offset);
exit(0);
}
if (ip + mlen > iend)
{
printf("ERROR %d: ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - istart), (int)(iend - ip), (int)mlen, (int)offset);
exit(0);
}
#endif #endif
LZ5_LOG_ENCODE("%d: ENCODE literals=%d off=%d mlen=%d ", (int)(ip-istart), (int)(ip-anchor), (int)(offset), (int)mlen);
size_t litLength = ip - anchor; size_t litLength = ip - anchor;
LZ5_LOG_ENCODE("%d/%d: ENCODE literals=%d off=%d mlen=%d\n", (int)(ip-istart), (int)(iend-istart), (int)(litLength), (int)(offset), (int)mlen);
// printf("orig="); print_hex_text(ip, mlen, 0);
// printf("match="); print_hex_text(ip-offset, mlen, 0);
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
anchor = ip = ip + mlen; anchor = ip = ip + mlen;
@ -813,7 +867,8 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set
} }
/* check immediate repcode */ #if 0
// check immediate repcode
while ( (ip <= ilimit) while ( (ip <= ilimit)
&& (MEM_read32(ip) == MEM_read32(ip - rep_2)) ) { && (MEM_read32(ip) == MEM_read32(ip - rep_2)) ) {
/* store sequence */ /* store sequence */
@ -824,12 +879,16 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen); ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen);
ip += best_mlen+MINMATCH; ip += best_mlen+MINMATCH;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; // faster when present ... (?)
} } }
#endif
}
/* Last Literals */ /* Last Literals */
{ {
size_t lastLLSize = iend - anchor; size_t lastLLSize = iend - anchor;
LZ5_LOG_ENCODE("%d: lastLLSize literals=%d\n", (int)(ip-istart), (int)(lastLLSize));
memcpy(seqStorePtr->lit, anchor, lastLLSize); memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize; seqStorePtr->lit += lastLLSize;
} }