disable prefetch-decode for 32-bits target
This decoder variant is detrimental to x86 architecture likely due to register pressure. Note that the variant is disabled for all 32-bits targets. It's unclear if it would help for different architectures, such as ARM, MIPS or PowerPC.
This commit is contained in:
parent
3a55d8be26
commit
fe5d27062e
@ -1410,13 +1410,18 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|||||||
|
|
||||||
if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
|
if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong);
|
||||||
|
|
||||||
/* Decode literals sub-block */
|
/* Decode literals section */
|
||||||
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
||||||
if (ZSTD_isError(litCSize)) return litCSize;
|
if (ZSTD_isError(litCSize)) return litCSize;
|
||||||
ip += litCSize;
|
ip += litCSize;
|
||||||
srcSize -= litCSize;
|
srcSize -= litCSize;
|
||||||
}
|
}
|
||||||
if (dctx->fParams.windowSize > (1<<23)) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
|
if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
|
||||||
|
/* likely because of register pressure */
|
||||||
|
/* if that's the correct cause, then 32-bits ARM should be affected differently */
|
||||||
|
/* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
|
||||||
|
if (dctx->fParams.windowSize > (1<<23))
|
||||||
|
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
|
||||||
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
|
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user