diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 8b292e79..71c1d20b 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -493,41 +493,41 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, const HUF_DTable *DTable); #if DYNAMIC_BMI2 -#define X(fn) \ - \ - static size_t fn##_default( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - if (bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ +#define X(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ } #else -#define X(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - (void)bmi2; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ +#define X(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ } #endif diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 8aa21db7..4cd1f3da 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1229,21 +1229,368 @@ size_t ZSTD_execSequenceLong(BYTE* op, return sequenceLength; } +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -#define FUNCTION(fn) fn##_default -#define TARGET -#include "zstd_decompress_impl.h" -#undef TARGET -#undef FUNCTION +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { /* offset == 0 */ + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const base = (const BYTE*) (dctx->base); + const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences"); + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + nbSeq--; + { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq); + if (nbSeq) return ERROR(corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + + + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) +{ + seq_t seq; + U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; + U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; + U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; + U32 const totalBits = llBits+mlBits+ofBits; + U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; + U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; + U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + + /* sequence */ + { size_t offset; + if (!ofBits) + offset = 0; + else { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + } + + if (ofBits <= 1) { + offset += (llBase==0); + if (offset) { + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + { size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update */ + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->base); + const BYTE* const dictStart = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STOSEQ_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } +#undef STORED_SEQS +#undef STOSEQ_MASK +#undef ADVANCED_SEQS + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + + #if DYNAMIC_BMI2 -#define FUNCTION(fn) fn##_bmi2 -#define TARGET TARGET_ATTRIBUTE("bmi2") -#include "zstd_decompress_impl.h" -#undef TARGET -#undef FUNCTION +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} #endif diff --git a/lib/decompress/zstd_decompress_impl.h b/lib/decompress/zstd_decompress_impl.h deleted file mode 100644 index 18b3a296..00000000 --- a/lib/decompress/zstd_decompress_impl.h +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright (c) 2018-present, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef FUNCTION -# error "FUNCTION(name) must be defined" -#endif - -#ifndef TARGET -# error "TARGET must be defined" -#endif - -static TARGET void -FUNCTION(ZSTD_updateFseState)(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) -{ - ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; -} - -/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum - * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) - * bits before reloading. This value is the maximum number of bytes we read - * after reloading when we are decoding long offets. - */ -#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ - (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ - ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ - : 0) - -static TARGET seq_t -FUNCTION(ZSTD_decodeSequence)(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) -{ - seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; - - /* sequence */ - { size_t offset; - if (!ofBits) - offset = 0; - else { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { - U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { /* offset == 0 */ - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = mlBase - + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase - + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - - /* ANS state update */ - FUNCTION(ZSTD_updateFseState)(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FUNCTION(ZSTD_updateFseState)(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FUNCTION(ZSTD_updateFseState)(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; -} - - - -HINT_INLINE seq_t -FUNCTION(ZSTD_decodeSequenceLong)(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) -{ - seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; - - /* sequence */ - { size_t offset; - if (!ofBits) - offset = 0; - else { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets) { - U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - { size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update */ - FUNCTION(ZSTD_updateFseState)(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FUNCTION(ZSTD_updateFseState)(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FUNCTION(ZSTD_updateFseState)(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; -} - - -static TARGET void -FUNCTION(ZSTD_initFseState)(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) -{ - const void* ptr = dt; - const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", - (U32)DStatePtr->state, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -static TARGET -size_t FUNCTION(ZSTD_decompressSequences)( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const base = (const BYTE*) (dctx->base); - const BYTE* const vBase = (const BYTE*) (dctx->vBase); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences"); - - /* Regen sequences */ - if (nbSeq) { - seqState_t seqState; - dctx->fseEntropy = 1; - { U32 i; for (i=0; ientropy.rep[i]; } - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); - FUNCTION(ZSTD_initFseState)(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FUNCTION(ZSTD_initFseState)(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FUNCTION(ZSTD_initFseState)(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { - nbSeq--; - { seq_t const sequence = FUNCTION(ZSTD_decodeSequence)(&seqState, isLongOffset); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; - } } - - /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq); - if (nbSeq) return ERROR(corruption_detected); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op-ostart; -} - -static TARGET -size_t FUNCTION(ZSTD_decompressSequencesLong)( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->base); - const BYTE* const dictStart = (const BYTE*) (dctx->vBase); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - - /* Regen sequences */ - if (nbSeq) { -#define STORED_SEQS 4 -#define STOSEQ_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 - seq_t sequences[STORED_SEQS]; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - dctx->fseEntropy = 1; - { U32 i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); - FUNCTION(ZSTD_initFseState)(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FUNCTION(ZSTD_initFseState)(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FUNCTION(ZSTD_initFseState)(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } -#undef STORED_SEQS -#undef STOSEQ_MASK -#undef ADVANCED_SEQS - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op-ostart; -}