diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8ba9435d..d378d7ab 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -1555,7 +1555,6 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base + ctx->dictLimit; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; const U32 maxSearches = 1 << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; @@ -1565,6 +1564,10 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; /* init */ + U32 rep[ZSTD_REP_NUM]; + for (int i=0; i matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offset=offsetFound + (ZSTD_REP_NUM - 1); } if (matchLength < MINMATCH) { @@ -1598,12 +1605,12 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, if (depth>=1) while (ip= ZSTD_REP_NUM) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) { + size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[0], iend) + MINMATCH; int gain2 = (int)(mlRep * 3); int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); if ((mlRep >= MINMATCH) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; + matchLength = mlRep, offset = 0 + (ZSTD_REP_NUM - 1), start = ip; } { size_t offset2=999999; @@ -1611,19 +1618,19 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + (ZSTD_REP_NUM - 1), start = ip; continue; /* search a better one */ } } /* let's find an even better one */ if ((depth==2) && (ip= ZSTD_REP_NUM) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) { + size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[0], iend) + MINMATCH; int gain2 = (int)(ml2 * 4); int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); if ((ml2 >= MINMATCH) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; + matchLength = ml2, offset = 0 + (ZSTD_REP_NUM - 1), start = ip; } { size_t offset2=999999; @@ -1631,35 +1638,37 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + (ZSTD_REP_NUM - 1), start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* catch up */ - if (offset) { - while ((start>anchor) && (start>base+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */ + if (offset >= ZSTD_REP_NUM) { + while ((start>anchor) && (start>base+offset-(ZSTD_REP_NUM-1)) && (start[-1] == start[-1-offset+(ZSTD_REP_NUM-1)])) /* only search for offset within prefix */ { start--; matchLength++; } - offset_2 = offset_1; offset_1 = offset; } /* store sequence */ _storeSequence: { + if (offset >= ZSTD_REP_NUM) { + rep[1] = rep[0]; rep[0] = offset - (ZSTD_REP_NUM - 1); + } size_t litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset + ZSTD_REP_NUM - 1, matchLength-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } /* check immediate repcode */ while ( (ip <= ilimit) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { /* store sequence */ - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - offset = offset_2; - offset_2 = offset_1; - offset_1 = offset; + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-rep[1], iend); + offset = rep[1]; + rep[1] = rep[0]; + rep[0] = offset; ZSTD_storeSeq(seqStorePtr, 0, anchor, 0 + ZSTD_REP_NUM - 1, matchLength); ip += matchLength+MINMATCH; anchor = ip; @@ -1719,7 +1728,6 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ctx->lowLimit; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; const U32 maxSearches = 1 << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; @@ -1729,19 +1737,22 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; /* init */ + U32 rep[ZSTD_REP_NUM]; + for (int i=0; i= 3) /* intentional overflow */ @@ -1757,7 +1768,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, size_t offsetFound = 99999999; size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; + matchLength = ml2, start = ip, offset=offsetFound + (ZSTD_REP_NUM - 1); } if (matchLength < MINMATCH) { @@ -1771,8 +1782,8 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, ip ++; current++; /* check repCode */ - if (offset) { - const U32 repIndex = (U32)(current - offset_1); + if (offset >= ZSTD_REP_NUM) { + const U32 repIndex = (U32)(current - rep[0]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ @@ -1783,7 +1794,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, int gain2 = (int)(repLength * 3); int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); if ((repLength >= MINMATCH) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; + matchLength = repLength, offset = 0 + (ZSTD_REP_NUM - 1), start = ip; } } /* search match, depth 1 */ @@ -1793,7 +1804,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + (ZSTD_REP_NUM - 1), start = ip; continue; /* search a better one */ } } @@ -1802,8 +1813,8 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, ip ++; current++; /* check repCode */ - if (offset) { - const U32 repIndex = (U32)(current - offset_1); + if (offset >= ZSTD_REP_NUM) { + const U32 repIndex = (U32)(current - rep[0]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ @@ -1814,7 +1825,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, int gain2 = (int)(repLength * 4); int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); if ((repLength >= MINMATCH) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; + matchLength = repLength, offset = 0 + (ZSTD_REP_NUM - 1), start = ip; } } /* search match, depth 2 */ @@ -1824,32 +1835,32 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); if ((ml2 >= MINMATCH) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; + matchLength = ml2, offset = offset2 + (ZSTD_REP_NUM - 1), start = ip; continue; } } } break; /* nothing found : store previous solution */ } /* catch up */ - if (offset) { - U32 matchIndex = (U32)((start-base) - offset); + if (offset >= ZSTD_REP_NUM) { + U32 matchIndex = (U32)((start-base) - (offset - (ZSTD_REP_NUM - 1))); const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = offset; + rep[1] = rep[0]; rep[0] = offset - (ZSTD_REP_NUM - 1); } /* store sequence */ _storeSequence: { size_t litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset + ZSTD_REP_NUM - 1, matchLength-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } /* check immediate repcode */ while (ip <= ilimit) { - const U32 repIndex = (U32)((ip-base) - offset_2); + const U32 repIndex = (U32)((ip-base) - rep[1]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ @@ -1857,7 +1868,7 @@ _storeSequence: /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH; - offset = offset_2; offset_2 = offset_1; offset_1 = offset; /* swap offset history */ + offset = rep[1]; rep[1] = rep[0]; rep[0] = offset; /* swap offset history */ ZSTD_storeSeq(seqStorePtr, 0, anchor, 0 + ZSTD_REP_NUM - 1, matchLength-MINMATCH); ip += matchLength; anchor = ip; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 9bbbd5c1..fbe419de 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -615,7 +615,7 @@ typedef struct { FSE_DState_t stateLL; FSE_DState_t stateOffb; FSE_DState_t stateML; - size_t prevOffset; + size_t prevOffset[ZSTD_REP_NUM]; const BYTE* dumps; const BYTE* dumpsEnd; } seqState_t; @@ -633,7 +633,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) /* Literal length */ litLength = FSE_peakSymbol(&(seqState->stateLL)); - prevOffset = litLength ? seq->offset : seqState->prevOffset; + prevOffset = litLength ? seq->offset : seqState->prevOffset[0]; if (litLength == MaxLL) { U32 add = *dumps++; if (add < 255) litLength += add; @@ -656,9 +656,45 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offsetCode==0) nbBits = 0; /* cmove */ offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - if (offsetCode==0) offset = prevOffset; /* repcode, cmove */ - else offset -= ZSTD_REP_NUM - 1; - if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */ + if (offsetCode==0) { + +/* + if (offset < LZMAX_NUM_REPS) + { + #if ZSTD_REP_NUM > 1 + uint32_t pos = offset; + offset = reps[pos]; + if (pos != 0) + { + if (pos != 1) + { + if (pos == 3) + reps[3] = reps[2]; + reps[2] = reps[1]; + } + reps[1] = reps[0]; + reps[0] = offset; + } + #else + offset = reps[0]; + #endif + } + else + { + #if ZSTD_REP_NUM > 1 + offset -= LZMAX_NUM_REPS - 1; + reps[3] = reps[2]; + reps[2] = reps[1]; + reps[1] = reps[0]; + #endif + reps[0] = offset; + } +*/ + offset = prevOffset; /* repcode, cmove */ + } else { + if (!litLength) seqState->prevOffset[0] = seq->offset; /* cmove */ + offset -= ZSTD_REP_NUM - 1; + } FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ // printf("offsetCode=%d nbBits=%d offset=%d\n", offsetCode, nbBits, (int)offset); fflush(stdout); } @@ -812,7 +848,8 @@ static size_t ZSTD_decompressSequences( sequence.offset = REPCODE_STARTVALUE; seqState.dumps = dumps; seqState.dumpsEnd = dumps + dumpsLength; - seqState.prevOffset = REPCODE_STARTVALUE; + for (int i=0; i