diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 1890a853..d85c803b 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -775,6 +775,7 @@ typedef struct { size_t litLength; size_t matchLength; size_t offset; + const BYTE* match; } seq_t; typedef struct { @@ -783,6 +784,7 @@ typedef struct { FSE_DState_t stateOffb; FSE_DState_t stateML; size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* ptr; } seqState_t; @@ -851,11 +853,14 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState) if (MEM_32bits() || (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); + seq.match = seqState->ptr + seq.litLength - seq.offset; /* only for single memory segment ! */ + /* ANS state update */ FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + seqState->ptr += seq.matchLength + seq.litLength; return seq; } @@ -919,7 +924,7 @@ size_t ZSTD_execSequence(BYTE* op, BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; + const BYTE* match = sequence.match; /* check */ if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ @@ -987,6 +992,8 @@ size_t ZSTD_execSequence(BYTE* op, return sequenceLength; } +#include +#define PREFETCH(ptr) _mm_prefetch(ptr, _MM_HINT_T0); static size_t ZSTD_decompressSequences( ZSTD_DCtx* dctx, @@ -1018,6 +1025,7 @@ static size_t ZSTD_decompressSequences( int seqNb; dctx->fseEntropy = 1; { U32 i; for (i=0; irep[i]; } + seqState.ptr = op; CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); @@ -1025,28 +1033,28 @@ static size_t ZSTD_decompressSequences( /* prepare in advance */ int const seqAdvance = MIN(nbSeq, 3); - for (seqNb=0; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNbrep[i] = (U32)(seqState.prevOffset[i]); }