added conditional prefetch
depending on amount of work to do.
This commit is contained in:
parent
63a519dbf6
commit
4de344d505
@ -95,19 +95,21 @@
|
||||
#else
|
||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
||||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
|
||||
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T1)
|
||||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
||||
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 0 /* locality */)
|
||||
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 2 /* locality */)
|
||||
# else
|
||||
# define PREFETCH(ptr) /* disabled */
|
||||
# endif
|
||||
#endif /* NO_PREFETCH */
|
||||
|
||||
#define PREFETCH_AREA(ptr, size) { \
|
||||
size_t pos; \
|
||||
for (pos=0; pos<size; pos++) { \
|
||||
PREFETCH( (const char*)(const void*)ptr + pos); \
|
||||
} \
|
||||
#define CACHELINE_SIZE 64
|
||||
|
||||
#define PREFETCH_AREA(ptr, size) { \
|
||||
size_t pos; \
|
||||
for (pos=0; pos<size; pos+=CACHELINE_SIZE) { \
|
||||
PREFETCH( (const char*)ptr + pos); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* disable warnings */
|
||||
|
@ -578,13 +578,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
{
|
||||
case set_repeat:
|
||||
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
|
||||
|
||||
/* prefetch huffman table if cold */
|
||||
if (dctx->ddictIsCold) {
|
||||
PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
|
||||
}
|
||||
|
||||
/* fall-through */
|
||||
|
||||
case set_compressed:
|
||||
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
|
||||
{ size_t lhSize, litSize, litCSize;
|
||||
@ -616,6 +611,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
|
||||
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
||||
|
||||
/* prefetch huffman table if cold */
|
||||
if (dctx->ddictIsCold && (litSize > 256 /* heuristic */)) {
|
||||
PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
|
||||
}
|
||||
|
||||
if (HUF_isError((litEncType==set_repeat) ?
|
||||
( singleStream ?
|
||||
HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
|
||||
@ -897,7 +897,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
const void* src, size_t srcSize,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
||||
int ddictIsCold)
|
||||
int ddictIsCold, int nbSeq)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
@ -917,7 +917,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
||||
case set_repeat:
|
||||
if (!flagRepeatTable) return ERROR(corruption_detected);
|
||||
/* prefetch FSE table if used */
|
||||
if (ddictIsCold) {
|
||||
if (ddictIsCold && (nbSeq > 16 /* heuristic */)) {
|
||||
//if (ddictIsCold) {
|
||||
const void* const pStart = *DTablePtr;
|
||||
size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
|
||||
PREFETCH_AREA(pStart, pSize);
|
||||
@ -974,25 +975,27 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
const BYTE* const istart = (const BYTE* const)src;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* ip = istart;
|
||||
int nbSeq;
|
||||
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
||||
|
||||
/* check */
|
||||
if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
|
||||
|
||||
/* SeqHead */
|
||||
{ int nbSeq = *ip++;
|
||||
if (!nbSeq) { *nbSeqPtr=0; return 1; }
|
||||
if (nbSeq > 0x7F) {
|
||||
if (nbSeq == 0xFF) {
|
||||
if (ip+2 > iend) return ERROR(srcSize_wrong);
|
||||
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
|
||||
} else {
|
||||
if (ip >= iend) return ERROR(srcSize_wrong);
|
||||
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
||||
}
|
||||
nbSeq = *ip++;
|
||||
if (!nbSeq) { *nbSeqPtr=0; return 1; }
|
||||
if (nbSeq > 0x7F) {
|
||||
if (nbSeq == 0xFF) {
|
||||
if (ip+2 > iend) return ERROR(srcSize_wrong);
|
||||
nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
|
||||
} else {
|
||||
if (ip >= iend) return ERROR(srcSize_wrong);
|
||||
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
||||
}
|
||||
*nbSeqPtr = nbSeq;
|
||||
}
|
||||
*nbSeqPtr = nbSeq;
|
||||
DEBUGLOG(2, "nbSeqs=%i", nbSeq);
|
||||
|
||||
|
||||
/* FSE table descriptors */
|
||||
if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
|
||||
@ -1007,7 +1010,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ip, iend-ip,
|
||||
LL_base, LL_bits,
|
||||
LL_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold);
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
|
||||
ip += llhSize;
|
||||
}
|
||||
@ -1017,7 +1020,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ip, iend-ip,
|
||||
OF_base, OF_bits,
|
||||
OF_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold);
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
|
||||
ip += ofhSize;
|
||||
}
|
||||
@ -1027,7 +1030,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
ip, iend-ip,
|
||||
ML_base, ML_bits,
|
||||
ML_defaultDTable, dctx->fseEntropy,
|
||||
dctx->ddictIsCold);
|
||||
dctx->ddictIsCold, nbSeq);
|
||||
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
|
||||
ip += mlhSize;
|
||||
}
|
||||
@ -2395,7 +2398,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
||||
/* prefetch dictionary content */
|
||||
if (dctx->ddictIsCold) {
|
||||
size_t const dictSize = ddict->dictSize;
|
||||
size_t const pSize = MIN(dictSize, 32 KB); /* proposed heuristic : 8 x frameContentSize => need to know frameContentSize */
|
||||
size_t const pSize = MIN(dictSize, 2 KB); /* very conservative; would need to know Nb of Copies in dictionary, or frameContentSize as a proxy */
|
||||
const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize;
|
||||
PREFETCH_AREA(pStart, pSize);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user