Reduce stack usage of HUF_readDTableX4 and HUF_readDTableX2

This commit is contained in:
Stella Lau 2017-06-29 11:49:59 -07:00
parent 09a5bbe22e
commit 99e315999c
3 changed files with 170 additions and 24 deletions

View File

@ -111,6 +111,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void*
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
/* The workspace must have alignment at least 4 and be at least this large */
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
/* ****************************************************************** /* ******************************************************************
@ -170,8 +173,11 @@ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
/* **************************************** /* ****************************************
@ -243,7 +249,9 @@ HUF_decompress() does the following:
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
@ -266,8 +274,11 @@ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);

View File

@ -67,6 +67,12 @@
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
/* **************************************************************
* Byte alignment for workSpace management
****************************************************************/
#define ALIGN(x, a) ALIGN_MASK((x), (a) - 1)
#define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
/*-***************************/ /*-***************************/
/* generic DTableDesc */ /* generic DTableDesc */
/*-***************************/ /*-***************************/
@ -87,16 +93,29 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
{ {
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
U32 tableLog = 0; U32 tableLog = 0;
U32 nbSymbols = 0; U32 nbSymbols = 0;
size_t iSize; size_t iSize;
void* const dtPtr = DTable + 1; void* const dtPtr = DTable + 1;
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
U32 *rankVal;
BYTE *huffWeight;
size_t spaceUsed = 0;
rankVal = (U32 *)((BYTE *)workSpace + spaceUsed);
spaceUsed += sizeof(U32) * (HUF_TABLELOG_ABSOLUTEMAX + 1);
huffWeight = (BYTE *)workSpace + spaceUsed;
spaceUsed += HUF_SYMBOLVALUE_MAX + 1;
spaceUsed = ALIGN(spaceUsed, sizeof(U32));
if (spaceUsed > wkspSize)
return ERROR(tableLog_tooLarge);
workSpace = (BYTE *)workSpace + spaceUsed;
wkspSize -= spaceUsed;
HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
@ -135,6 +154,13 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
return iSize; return iSize;
} }
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
{
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
return HUF_readDTableX2_wksp(DTable, src, srcSize,
workSpace, sizeof(workSpace));
}
static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
{ {
@ -212,11 +238,13 @@ size_t HUF_decompress1X2_usingDTable(
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
} }
size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{ {
const BYTE* ip = (const BYTE*) cSrc; const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize); size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize; if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong); if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize; ip += hSize; cSrcSize -= hSize;
@ -224,6 +252,15 @@ size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
} }
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize)
{
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{ {
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@ -335,11 +372,14 @@ size_t HUF_decompress4X2_usingDTable(
} }
size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{ {
const BYTE* ip = (const BYTE*) cSrc; const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize); size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize; if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong); if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize; ip += hSize; cSrcSize -= hSize;
@ -347,6 +387,13 @@ size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx); return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
} }
size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{ {
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@ -404,6 +451,7 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
} }
typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
const sortedSymbol_t* sortedList, const U32 sortedListSize, const sortedSymbol_t* sortedList, const U32 sortedListSize,
@ -447,20 +495,44 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
} }
} }
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
size_t srcSize, void* workSpace,
size_t wkspSize)
{ {
BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
U32 rankStats[HUF_TABLELOG_MAX + 1] = { 0 };
U32 rankStart0[HUF_TABLELOG_MAX + 2] = { 0 };
U32* const rankStart = rankStart0+1;
rankVal_t rankVal;
U32 tableLog, maxW, sizeOfSort, nbSymbols; U32 tableLog, maxW, sizeOfSort, nbSymbols;
DTableDesc dtd = HUF_getDTableDesc(DTable); DTableDesc dtd = HUF_getDTableDesc(DTable);
U32 const maxTableLog = dtd.maxTableLog; U32 const maxTableLog = dtd.maxTableLog;
size_t iSize; size_t iSize;
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
U32 *rankStart;
rankValCol_t *rankVal;
U32 *rankStats;
U32 *rankStart0;
sortedSymbol_t *sortedSymbol;
BYTE *weightList;
size_t spaceUsed = 0;
rankVal = (rankValCol_t *)((BYTE *)workSpace + spaceUsed);
spaceUsed += sizeof(rankValCol_t) * HUF_TABLELOG_MAX;
rankStats = (U32 *)((BYTE *)workSpace + spaceUsed);
spaceUsed += sizeof(U32) * (HUF_TABLELOG_MAX + 1);
rankStart0 = (U32 *)((BYTE *)workSpace + spaceUsed);
spaceUsed += sizeof(U32) * (HUF_TABLELOG_MAX + 2);
sortedSymbol = (sortedSymbol_t *)((BYTE *)workSpace + spaceUsed);
spaceUsed += sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1);
weightList = (BYTE *)workSpace + spaceUsed;
spaceUsed += HUF_SYMBOLVALUE_MAX + 1;
spaceUsed = ALIGN(spaceUsed, sizeof(U32));
if (spaceUsed > wkspSize)
return ERROR(tableLog_tooLarge);
workSpace = (BYTE *)workSpace + spaceUsed;
wkspSize -= spaceUsed;
rankStart = rankStart0 + 1;
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
@ -527,6 +599,12 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
return iSize; return iSize;
} }
size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
{
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
return HUF_readDTableX4_wksp(DTable, src, srcSize,
workSpace, sizeof(workSpace));
}
static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
{ {
@ -627,11 +705,14 @@ size_t HUF_decompress1X4_usingDTable(
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
} }
size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{ {
const BYTE* ip = (const BYTE*) cSrc; const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize); size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize; if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong); if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize; ip += hSize; cSrcSize -= hSize;
@ -639,6 +720,15 @@ size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
} }
size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize)
{
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{ {
HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@ -749,11 +839,14 @@ size_t HUF_decompress4X4_usingDTable(
} }
size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{ {
const BYTE* ip = (const BYTE*) cSrc; const BYTE* ip = (const BYTE*) cSrc;
size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize); size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize; if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong); if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize; ip += hSize; cSrcSize -= hSize;
@ -761,6 +854,15 @@ size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
} }
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize)
{
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
{ {
HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@ -874,7 +976,25 @@ size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, co
} }
} }
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
size_t dstSize, const void* cSrc,
size_t cSrcSize, void* workSpace,
size_t wkspSize)
{
/* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall);
if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
}
}
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{ {
/* validation checks */ /* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall); if (dstSize == 0) return ERROR(dstSize_tooSmall);
@ -883,7 +1003,17 @@ size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; cSrcSize, workSpace, wkspSize):
HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
cSrcSize, workSpace, wkspSize);
} }
} }
size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize)
{
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}

View File

@ -93,6 +93,7 @@ typedef struct {
FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
U32 rep[ZSTD_REP_NUM]; U32 rep[ZSTD_REP_NUM];
} ZSTD_entropyTables_t; } ZSTD_entropyTables_t;
@ -559,8 +560,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) : HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
( singleStream ? ( singleStream ?
HUF_decompress1X2_DCtx(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) : HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
HUF_decompress4X_hufOnly (dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) )) dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
return ERROR(corruption_detected); return ERROR(corruption_detected);
dctx->litPtr = dctx->litBuffer; dctx->litPtr = dctx->litBuffer;
@ -1836,7 +1839,9 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t* entropy, const void* const
dictPtr += 8; /* skip header = magic + dictID */ dictPtr += 8; /* skip header = magic + dictID */
{ size_t const hSize = HUF_readDTableX4(entropy->hufTable, dictPtr, dictEnd-dictPtr); { size_t const hSize = HUF_readDTableX4_wksp(
entropy->hufTable, dictPtr, dictEnd - dictPtr,
entropy->workspace, sizeof(entropy->workspace));
if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
dictPtr += hSize; dictPtr += hSize;
} }