diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 37b99c01..2ae8b7c7 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -141,8 +141,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) assert(val != 0); { # if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val) ^ 31; +# else + unsigned long r = 0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ @@ -324,16 +328,24 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { +#if STATIC_BMI2 + return _bextr_u64(bitContainer, start, nbBits); +#else U32 const regMask = sizeof(bitContainer)*8 - 1; /* if start > regMask, bitstream is corrupted, and result is undefined */ assert(nbBits < BIT_MASK_SIZE); return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +#endif } MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { +#if STATIC_BMI2 + return _bzhi_u64(bitContainer, nbBits); +#else assert(nbBits < BIT_MASK_SIZE); return bitContainer & BIT_mask[nbBits]; +#endif } /*! BIT_lookBits() : diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 37cdc45e..91440b1a 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -183,4 +183,17 @@ # pragma warning(disable : 4324) /* disable: C4324: padded structure */ #endif +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 +#endif + #endif /* ZSTD_COMPILER_H */ diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index ee3cb51b..17c4b9ca 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -394,8 +394,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus assert(val != 0); { # if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */