qCount{Leading,Trailing}ZeroBits: Use __builtin_clzs for 16-bit

If possible.

The BSF/BSR/TZCNT/LZCNT Intel instruction does not exist for 8-bit. And
it's a good idea to use the 32-bit instruction instead of the 16-bit one
for that case, to avoid the Length Changing Prefix (LCP).

GCC doesn't allow us to use __builtin_cl[tz]s unless BMI is active,
while ICC generates the same code either way (Clang understands
__has_builtin).

Change-Id: I8de47ed6c7be4847b99bffff141c91603c7024dc
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@theqtcompany.com>
This commit is contained in:
Thiago Macieira 2015-12-03 17:37:02 -08:00
parent 2fb42eb4af
commit f5f47987ce

View File

@ -620,7 +620,11 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint8 v) Q_DECL_NOT
Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint16 v) Q_DECL_NOTHROW
{
#if defined(Q_CC_GNU)
# if QT_HAS_BUILTIN(__builtin_ctzs) || defined(__BMI__)
return v ? __builtin_ctzs(v) : 16U;
# else
return v ? __builtin_ctz(v) : 16U;
# endif
#else
unsigned int c = 16; // c will be the number of zero bits on the right
v &= -signed(v);
@ -679,7 +683,11 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint8 v) Q_DECL_NOTH
Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint16 v) Q_DECL_NOTHROW
{
#if defined(Q_CC_GNU)
# if QT_HAS_BUILTIN(__builtin_clzs) || defined(__BMI__)
return v ? __builtin_clzs(v) : 16U;
# else
return v ? __builtin_clz(v)-16U : 16U;
# endif
#else
v = v | (v >> 1);
v = v | (v >> 2);