Implemented repOffset "minus 1" on ll==0

This commit is contained in:
Yann Collet 2016-07-31 04:01:57 +02:00
parent 8f29e8e0e4
commit 917fe188f1
3 changed files with 26 additions and 25 deletions

View File

@ -453,7 +453,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
/* check repCode */
{ U32 i;
for (i=0; i<ZSTD_REP_CHECK; i++) {
for (i=(ip == anchor); i<ZSTD_REP_CHECK; i++) {
if ((rep[i]<(U32)(ip-prefixStart))
&& (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - rep[i], minMatch))) {
mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-rep[i], iend) + minMatch;
@ -462,7 +462,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
goto _storeSequence;
}
best_off = (i<=1 && ip == anchor) ? 1-i : i;
best_off = i - (ip == anchor);
do {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
if (mlen > last_pos || price < opt[mlen].price)
@ -544,9 +544,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
best_mlen = minMatch;
{ U32 i;
for (i=0; i<ZSTD_REP_CHECK; i++) {
for (i=(opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) { /* check rep */
if ((opt[cur].rep[i]<(U32)(inr-prefixStart))
&& (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) { /* check rep */
&& (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {
mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch;
ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);
@ -556,7 +556,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
goto _storeSequence;
}
best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
//best_off = ((i<=1) & (opt[cur].mlen != 1)) ? 1-i : i;
best_off = i - (opt[cur].mlen != 1);
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen) {
@ -661,7 +663,8 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
rep[1] = rep[0];
rep[0] = best_off;
}
if (litLength == 0 && offset<=1) offset = 1-offset;
if ((litLength == 0) & (offset==0)) offset = rep[1]; /* protection, but should never happen */
if ((litLength == 0) & (offset<=2)) offset--;
}
ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);
@ -746,7 +749,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
/* check repCode */
{ U32 i;
for (i=0; i<ZSTD_REP_CHECK; i++) {
for (i = (ip==anchor); i<ZSTD_REP_CHECK; i++) {
const U32 repIndex = (U32)(current - rep[i]);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
@ -763,7 +766,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
goto _storeSequence;
}
best_off = (i<=1 && ip == anchor) ? 1-i : i;
best_off = i - (ip==anchor);
litlen = opt[0].litlen;
do {
price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
@ -849,7 +852,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
best_mlen = 0;
{ U32 i;
for (i=0; i<ZSTD_REP_CHECK; i++) {
for (i = (opt[cur].mlen != 1); i<ZSTD_REP_CHECK; i++) {
const U32 repIndex = (U32)(current+cur - opt[cur].rep[i]);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
@ -867,7 +870,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
goto _storeSequence;
}
best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i;
best_off = i - (opt[cur].mlen != 1);
if (opt[cur].mlen == 1) {
litlen = opt[cur].litlen;
if (cur > litlen) {
@ -973,8 +976,9 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
if (offset != 1) rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = best_off;
}
if (litLength == 0 && offset<=1) offset = 1-offset;
}
if ((litLength==0) & (offset==0)) offset = rep[1]; /* protection, but should never happen */
if ((litLength==0) & (offset<=2)) offset --;
}
ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]);

View File

@ -627,9 +627,9 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
}
if (ofCode <= 1) {
if ((llCode == 0) & (offset <= 1)) offset = 1-offset;
offset += (llCode==0);
if (offset) {
size_t const temp = seqState->prevOffset[offset];
size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset = temp;

View File

@ -1081,11 +1081,11 @@ As seen in [Offset Codes], the first 3 values define a repeated offset.
They are sorted in recency order, with 1 meaning "most recent one".
There is an exception though, when current sequence's literal length is `0`.
In which case, the first 2 values are swapped,
meaning `2` refers to the most recent offset,
while `1` refers to the second most recent offset,
In which case, repcodes are "pushed by one",
so 1 becomes 2, 2 becomes 3,
and 3 becomes "offset_1 - 1_byte".
Repeat offsets start with the following values : 1, 4 and 8 (in order).
On first block, offset history is populated by the following values : 1, 4 and 8 (in order).
Then each block receives its start value from previous compressed block.
Note that non-compressed blocks are skipped,
@ -1095,14 +1095,11 @@ they do not contribute to offset history.
###### Offset updates rules
When the new offset is a normal one,
offset history is simply translated by one position,
with the new offset taking first spot.
New offset take the lead in offset history,
up to its previous place if it was already present.
- When repeat offset 1 (most recent) is used, history is unmodified.
- When repeat offset 2 is used, it's swapped with offset 1.
- When repeat offset 3 is used, it takes first spot,
pushing the other ones by one position.
It means that when repeat offset 1 (most recent) is used, history is unmodified.
When repeat offset 2 is used, it's swapped with offset 1.
Dictionary format