[lib] Reduce zstd stack usage by 1KB
This commit is contained in:
parent
046aca190f
commit
a90779397a
@ -335,8 +335,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
||||
|
||||
/* FSE_buildCTable_wksp() :
|
||||
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` must be >= `(1<<tableLog)`.
|
||||
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog)`.
|
||||
*/
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * (maxSymbolValue + 2) + (1ull << tableLog))
|
||||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
|
||||
|
@ -74,13 +74,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
|
||||
|
||||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
|
||||
U32* cumul = (U32*)workSpace;
|
||||
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
|
||||
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
|
||||
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
/* CTable header */
|
||||
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
tableU16[-2] = (U16) tableLog;
|
||||
tableU16[-1] = (U16) maxSymbolValue;
|
||||
assert(tableLog < 16); /* required for threshold strategy to work */
|
||||
|
@ -69,7 +69,7 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
||||
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
||||
|
||||
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
||||
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
|
||||
BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
||||
|
||||
unsigned count[HUF_TABLELOG_MAX+1];
|
||||
S16 norm[HUF_TABLELOG_MAX+1];
|
||||
|
@ -105,10 +105,10 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
|
||||
cctx->staticSize = workspaceSize;
|
||||
|
||||
/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
|
||||
if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
|
||||
if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
|
||||
cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
|
||||
cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
|
||||
cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE);
|
||||
cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
|
||||
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
return cctx;
|
||||
}
|
||||
@ -1139,7 +1139,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
||||
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
||||
+ ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
|
||||
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
|
||||
size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
|
||||
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
|
||||
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
|
||||
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
|
||||
|
||||
@ -1502,7 +1502,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
|
||||
zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
|
||||
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
|
||||
zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE);
|
||||
zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
|
||||
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
|
||||
} }
|
||||
|
||||
@ -1976,7 +1976,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
{
|
||||
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
||||
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
||||
unsigned count[MaxSeq+1];
|
||||
unsigned* count = (unsigned*)entropyWorkspace;
|
||||
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
||||
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
||||
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
||||
@ -1992,8 +1992,12 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
BYTE* seqHead;
|
||||
BYTE* lastNCount = NULL;
|
||||
|
||||
entropyWorkspace = count + (MaxSeq + 1);
|
||||
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
|
||||
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
||||
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
|
||||
|
||||
/* Compress literals */
|
||||
{ const BYTE* const literals = seqStorePtr->litStart;
|
||||
@ -2448,7 +2452,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||
&zc->appliedParams,
|
||||
dst, dstCapacity,
|
||||
srcSize,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
||||
zc->bmi2);
|
||||
|
||||
if (frame &&
|
||||
|
@ -232,6 +232,9 @@ struct ZSTD_CCtx_params_s {
|
||||
ZSTD_customMem customMem;
|
||||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
||||
|
||||
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
|
||||
#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
|
||||
|
||||
struct ZSTD_CCtx_s {
|
||||
ZSTD_compressionStage_e stage;
|
||||
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
||||
@ -258,7 +261,7 @@ struct ZSTD_CCtx_s {
|
||||
size_t maxNbLdmSequences;
|
||||
rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
|
||||
ZSTD_blockState_t blockState;
|
||||
U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
|
||||
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
||||
|
||||
/* streaming */
|
||||
char* inBuff;
|
||||
|
@ -266,6 +266,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
nbSeq_1--;
|
||||
}
|
||||
assert(nbSeq_1 > 1);
|
||||
assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
||||
|
@ -835,7 +835,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
&zc->blockState.nextCBlock->entropy,
|
||||
&zc->appliedParams,
|
||||
&entropyMetadata,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
||||
|
||||
return ZSTD_compressSubBlock_multi(&zc->seqStore,
|
||||
zc->blockState.prevCBlock,
|
||||
@ -845,5 +845,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
zc->bmi2, lastBlock,
|
||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
||||
}
|
||||
|
@ -815,7 +815,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
BYTE* const oend = (BYTE*)frame->dataEnd;
|
||||
BYTE* op = (BYTE*)frame->data;
|
||||
BYTE* seqHead;
|
||||
BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
|
||||
BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)];
|
||||
|
||||
/* literals compressing block removed so that can be done separately */
|
||||
|
||||
@ -852,7 +852,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
LLtype = set_rle;
|
||||
} else if (!(RAND(seed) & 3)) {
|
||||
/* maybe use the default distribution */
|
||||
FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
|
||||
CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)));
|
||||
LLtype = set_basic;
|
||||
} else {
|
||||
/* fall back on a full table */
|
||||
@ -863,7 +863,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
|
||||
op += NCountSize; }
|
||||
FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
|
||||
CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)));
|
||||
LLtype = set_compressed;
|
||||
} }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user