Remove _bit_scan_{forward,reverse}

Use qCountTrailingZeroBits and qCountLeadingZeroBits from qalgorithms.h
instead. Also extended these versions for MSVC. The _bit_scan_* versions
stem from a time before the glorious days of qalgorithms.h. A big
advantage is that these functions can be used on all platforms.

Change-Id: I5a1b886371520310a7fe16e617635ea335046beb
Reviewed-by: Simon Hausmann <simon.hausmann@qt.io>
This commit is contained in:
Erik Verbruggen 2015-12-03 12:50:44 +01:00 committed by Erik Verbruggen
parent f7e29f07ff
commit e70324f8dd
5 changed files with 144 additions and 94 deletions

View File

@ -53,6 +53,16 @@ enum { Endian = 0, Data = 1 };
static const uchar utf8bom[] = { 0xef, 0xbb, 0xbf }; static const uchar utf8bom[] = { 0xef, 0xbb, 0xbf };
#if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2) #if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
static Q_ALWAYS_INLINE uint qBitScanReverse(unsigned v) Q_DECL_NOTHROW
{
uint result = qCountLeadingZeroBits(v);
// Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31
// and the lsb index is 0. The result for _bit_scan_reverse is expected to be the index when
// counting up: msb index is 0 (because it starts there), and the lsb index is 31.
result ^= sizeof(unsigned) * 8 - 1;
return result;
}
static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end) static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end)
{ {
// do sixteen characters at a time // do sixteen characters at a time
@ -81,9 +91,9 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const
// find the next probable ASCII character // find the next probable ASCII character
// we don't want to load 32 bytes again in this loop if we know there are non-ASCII // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
// characters still coming // characters still coming
nextAscii = src + _bit_scan_reverse(n) + 1; nextAscii = src + qBitScanReverse(n) + 1;
n = _bit_scan_forward(n); n = qCountTrailingZeroBits(n);
dst += n; dst += n;
src += n; src += n;
return false; return false;
@ -132,7 +142,7 @@ static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const
// find the next probable ASCII character // find the next probable ASCII character
// we don't want to load 16 bytes again in this loop if we know there are non-ASCII // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
// characters still coming // characters still coming
n = _bit_scan_reverse(n); n = qBitScanReverse(n);
nextAscii = src + (n / BitSpacing) + 1; nextAscii = src + (n / BitSpacing) + 1;
return false; return false;

View File

@ -45,6 +45,7 @@
#endif #endif
#include <QtCore/qglobal.h> #include <QtCore/qglobal.h>
#include <QtCore/qalgorithms.h>
#ifndef _USE_MATH_DEFINES #ifndef _USE_MATH_DEFINES
# define _USE_MATH_DEFINES # define _USE_MATH_DEFINES
@ -241,20 +242,12 @@ Q_DECL_CONSTEXPR inline double qRadiansToDegrees(double radians)
} }
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CLZ)
// clz instructions exist in at least MIPS, ARM, PowerPC and X86, so we can assume this builtin always maps to an efficient instruction.
inline quint32 qNextPowerOfTwo(quint32 v) inline quint32 qNextPowerOfTwo(quint32 v)
{ {
if (v == 0) if (v == 0)
return 1; return 1;
return 2U << (31 ^ __builtin_clz(v)); return 2U << (31 ^ QAlgorithmsPrivate::qt_builtin_clz(v));
}
inline quint64 qNextPowerOfTwo(quint64 v)
{
if (v == 0)
return 1;
return Q_UINT64_C(2) << (63 ^ __builtin_clzll(v));
} }
#else #else
inline quint32 qNextPowerOfTwo(quint32 v) inline quint32 qNextPowerOfTwo(quint32 v)
@ -267,7 +260,16 @@ inline quint32 qNextPowerOfTwo(quint32 v)
++v; ++v;
return v; return v;
} }
#endif
#if defined(QT_HAS_BUILTIN_CLZLL)
inline quint64 qNextPowerOfTwo(quint64 v)
{
if (v == 0)
return 1;
return Q_UINT64_C(2) << (63 ^ QAlgorithmsPrivate::qt_builtin_clzll(v));
}
#else
inline quint64 qNextPowerOfTwo(quint64 v) inline quint64 qNextPowerOfTwo(quint64 v)
{ {
v |= v >> 1; v |= v >> 1;

View File

@ -516,6 +516,105 @@ QT_DEPRECATED_X("Use std::binary_search") Q_OUTOFLINE_TEMPLATE RandomAccessItera
#endif // QT_DEPRECATED_SINCE(5, 2) #endif // QT_DEPRECATED_SINCE(5, 2)
// Clang had a bug where __builtin_ctz/clz is not marked as constexpr.
#if defined Q_CC_CLANG && defined __apple_build_version__ && __clang_major__ < 7
# undef QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ
#else
# define QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ
#endif
#if defined QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ
#if defined(Q_CC_GNU)
# define QT_HAS_BUILTIN_CTZS
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctzs(quint16 v) Q_DECL_NOTHROW
{
# if QT_HAS_BUILTIN(__builtin_ctzs) || defined(__BMI__)
return __builtin_ctzs(v);
# else
return __builtin_ctz(v);
# endif
}
#define QT_HAS_BUILTIN_CLZS
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clzs(quint16 v) Q_DECL_NOTHROW
{
# if QT_HAS_BUILTIN(__builtin_clzs) || defined(__BMI__)
return __builtin_clzs(v);
# else
return __builtin_clz(v) - 16U;
# endif
}
#define QT_HAS_BUILTIN_CTZ
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctz(quint32 v) Q_DECL_NOTHROW
{
return __builtin_ctz(v);
}
#define QT_HAS_BUILTIN_CLZ
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clz(quint32 v) Q_DECL_NOTHROW
{
return __builtin_clz(v);
}
#define QT_HAS_BUILTIN_CTZLL
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctzll(quint64 v) Q_DECL_NOTHROW
{
return __builtin_ctzll(v);
}
#define QT_HAS_BUILTIN_CLZLL
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clzll(quint64 v) Q_DECL_NOTHROW
{
return __builtin_clzll(v);
}
#elif defined(Q_CC_MSVC) && !defined(Q_OS_WINCE)
#define QT_HAS_BUILTIN_CTZ
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_ctz(quint32 val)
{
unsigned long result;
_BitScanForward(&result, val);
return result;
}
#define QT_HAS_BUILTIN_CLZ
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_clz(quint32 val)
{
unsigned long result;
_BitScanReverse(&result, val);
// Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31
// and the lsb index is 0. The result for the index when counting up: msb index is 0 (because it
// starts there), and the lsb index is 31.
result ^= sizeof(quint32) * 8 - 1;
return result;
}
#if Q_PROCESSOR_WORDSIZE == 8
// These are only defined for 64bit builds.
#define QT_HAS_BUILTIN_CTZLL
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_ctzll(quint64 val)
{
unsigned long result;
_BitScanForward64(&result, val);
return result;
}
// MSVC calls it _BitScanReverse and returns the carry flag, which we don't need
#define QT_HAS_BUILTIN_CLZLL
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE unsigned long qt_builtin_clzll(quint64 val)
{
unsigned long result;
_BitScanReverse64(&result, val);
// see qt_builtin_clz
result ^= sizeof(quint64) * 8 - 1;
return result;
}
#endif
# define QT_HAS_BUILTIN_CTZS
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_ctzs(quint16 v) Q_DECL_NOTHROW
{
return qt_builtin_ctz(v);
}
#define QT_HAS_BUILTIN_CLZS
Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_builtin_clzs(quint16 v) Q_DECL_NOTHROW
{
return qt_builtin_clz(v) - 16U;
}
#endif
#endif // QT_HAS_CONSTEXPR_BUILTIN_CTZ_CLZ
} //namespace QAlgorithmsPrivate } //namespace QAlgorithmsPrivate
@ -586,8 +685,8 @@ Q_DECL_CONST_FUNCTION Q_DECL_CONSTEXPR inline uint qPopulationCount(long unsigne
Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint32 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint32 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CTZ)
return v ? __builtin_ctz(v) : 32U; return v ? QAlgorithmsPrivate::qt_builtin_ctz(v) : 32U;
#else #else
// see http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel // see http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightParallel
unsigned int c = 32; // c will be the number of zero bits on the right unsigned int c = 32; // c will be the number of zero bits on the right
@ -604,8 +703,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint32 v) Q_DECL_NO
Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint8 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint8 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CTZ)
return v ? __builtin_ctz(v) : 8U; return v ? QAlgorithmsPrivate::qt_builtin_ctz(v) : 8U;
#else #else
unsigned int c = 8; // c will be the number of zero bits on the right unsigned int c = 8; // c will be the number of zero bits on the right
v &= -signed(v); v &= -signed(v);
@ -619,12 +718,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint8 v) Q_DECL_NOT
Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint16 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint16 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CTZS)
# if QT_HAS_BUILTIN(__builtin_ctzs) || defined(__BMI__) return v ? QAlgorithmsPrivate::qt_builtin_ctzs(v) : 16U;
return v ? __builtin_ctzs(v) : 16U;
# else
return v ? __builtin_ctz(v) : 16U;
# endif
#else #else
unsigned int c = 16; // c will be the number of zero bits on the right unsigned int c = 16; // c will be the number of zero bits on the right
v &= -signed(v); v &= -signed(v);
@ -639,8 +734,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint16 v) Q_DECL_NO
Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint64 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(quint64 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CTZLL)
return v ? __builtin_ctzll(v) : 64; return v ? QAlgorithmsPrivate::qt_builtin_ctzll(v) : 64;
#else #else
quint32 x = static_cast<quint32>(v); quint32 x = static_cast<quint32>(v);
return x ? qCountTrailingZeroBits(x) return x ? qCountTrailingZeroBits(x)
@ -655,8 +750,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountTrailingZeroBits(unsigned long v) Q_D
Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint32 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint32 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CLZ)
return v ? __builtin_clz(v) : 32U; return v ? QAlgorithmsPrivate::qt_builtin_clz(v) : 32U;
#else #else
// Hacker's Delight, 2nd ed. Fig 5-16, p. 102 // Hacker's Delight, 2nd ed. Fig 5-16, p. 102
v = v | (v >> 1); v = v | (v >> 1);
@ -670,8 +765,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint32 v) Q_DECL_NOT
Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint8 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint8 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CLZ)
return v ? __builtin_clz(v)-24U : 8U; return v ? QAlgorithmsPrivate::qt_builtin_clz(v)-24U : 8U;
#else #else
v = v | (v >> 1); v = v | (v >> 1);
v = v | (v >> 2); v = v | (v >> 2);
@ -682,12 +777,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint8 v) Q_DECL_NOTH
Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint16 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint16 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CLZS)
# if QT_HAS_BUILTIN(__builtin_clzs) || defined(__BMI__) return v ? QAlgorithmsPrivate::qt_builtin_clzs(v) : 16U;
return v ? __builtin_clzs(v) : 16U;
# else
return v ? __builtin_clz(v)-16U : 16U;
# endif
#else #else
v = v | (v >> 1); v = v | (v >> 1);
v = v | (v >> 2); v = v | (v >> 2);
@ -699,8 +790,8 @@ Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint16 v) Q_DECL_NOT
Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint64 v) Q_DECL_NOTHROW Q_DECL_RELAXED_CONSTEXPR inline uint qCountLeadingZeroBits(quint64 v) Q_DECL_NOTHROW
{ {
#if defined(Q_CC_GNU) #if defined(QT_HAS_BUILTIN_CLZLL)
return v ? __builtin_clzll(v) : 64U; return v ? QAlgorithmsPrivate::qt_builtin_clzll(v) : 64U;
#else #else
v = v | (v >> 1); v = v | (v >> 1);
v = v | (v >> 2); v = v | (v >> 2);

View File

@ -465,59 +465,6 @@ static inline quint64 qCpuFeatures()
#define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (Q_UINT64_C(1) << CpuFeature ## feature)) \ #define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (Q_UINT64_C(1) << CpuFeature ## feature)) \
|| (qCpuFeatures() & (Q_UINT64_C(1) << CpuFeature ## feature))) || (qCpuFeatures() & (Q_UINT64_C(1) << CpuFeature ## feature)))
#if QT_HAS_BUILTIN(__builtin_clz) && QT_HAS_BUILTIN(__builtin_ctz) && defined(Q_CC_CLANG) && !defined(Q_CC_INTEL)
static Q_ALWAYS_INLINE unsigned _bit_scan_reverse(unsigned val)
{
Q_ASSERT(val != 0); // if val==0, the result is undefined.
unsigned result = static_cast<unsigned>(__builtin_clz(val)); // Count Leading Zeros
// Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31
// and the lsb inde is 0. The result for _bit_scan_reverse is expected to be the index when
// counting up: msb index is 0 (because it starts there), and the lsb index is 31.
result ^= sizeof(unsigned) * 8 - 1;
return result;
}
static Q_ALWAYS_INLINE unsigned _bit_scan_forward(unsigned val)
{
Q_ASSERT(val != 0); // if val==0, the result is undefined.
return static_cast<unsigned>(__builtin_ctz(val)); // Count Trailing Zeros
}
#elif defined(Q_PROCESSOR_X86)
// Bit scan functions for x86
# if defined(Q_CC_MSVC)
// MSVC calls it _BitScanReverse and returns the carry flag, which we don't need
static __forceinline unsigned long _bit_scan_reverse(uint val)
{
unsigned long result;
_BitScanReverse(&result, val);
return result;
}
static __forceinline unsigned long _bit_scan_forward(uint val)
{
unsigned long result;
_BitScanForward(&result, val);
return result;
}
# elif (defined(Q_CC_CLANG) || (defined(Q_CC_GNU) && Q_CC_GNU < 405)) \
&& !defined(Q_CC_INTEL)
// Clang is missing the intrinsic for _bit_scan_reverse
// GCC only added it in version 4.5
static inline __attribute__((always_inline))
unsigned _bit_scan_reverse(unsigned val)
{
unsigned result;
asm("bsr %1, %0" : "=r" (result) : "r" (val));
return result;
}
static inline __attribute__((always_inline))
unsigned _bit_scan_forward(unsigned val)
{
unsigned result;
asm("bsf %1, %0" : "=r" (result) : "r" (val));
return result;
}
# endif
#endif // Q_PROCESSOR_X86
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)

View File

@ -468,7 +468,7 @@ static int ucstrncmp(const QChar *a, const QChar *b, int l)
uint mask = ~_mm_movemask_epi8(result); uint mask = ~_mm_movemask_epi8(result);
if (ushort(mask)) { if (ushort(mask)) {
// found a different byte // found a different byte
uint idx = uint(_bit_scan_forward(mask)); uint idx = qCountTrailingZeroBits(mask);
return reinterpret_cast<const QChar *>(ptr + idx)->unicode() return reinterpret_cast<const QChar *>(ptr + idx)->unicode()
- reinterpret_cast<const QChar *>(ptr + distance + idx)->unicode(); - reinterpret_cast<const QChar *>(ptr + distance + idx)->unicode();
} }
@ -571,7 +571,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
# endif # endif
if (mask) { if (mask) {
// found a different character // found a different character
uint idx = uint(_bit_scan_forward(mask)); uint idx = qCountTrailingZeroBits(mask);
return uc[offset + idx / 2] - c[offset + idx / 2]; return uc[offset + idx / 2] - c[offset + idx / 2];
} }
} }
@ -589,7 +589,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
uint mask = ~_mm_movemask_epi8(result); uint mask = ~_mm_movemask_epi8(result);
if (ushort(mask)) { if (ushort(mask)) {
// found a different character // found a different character
uint idx = uint(_bit_scan_forward(mask)); uint idx = qCountTrailingZeroBits(mask);
return uc[offset + idx / 2] - c[offset + idx / 2]; return uc[offset + idx / 2] - c[offset + idx / 2];
} }
@ -683,7 +683,7 @@ static int findChar(const QChar *str, int len, QChar ch, int from,
// found a match // found a match
// same as: return n - s + _bit_scan_forward(mask) / 2 // same as: return n - s + _bit_scan_forward(mask) / 2
return (reinterpret_cast<const char *>(n) - reinterpret_cast<const char *>(s) return (reinterpret_cast<const char *>(n) - reinterpret_cast<const char *>(s)
+ _bit_scan_forward(mask)) >> 1; + qCountTrailingZeroBits(mask)) >> 1;
} }
} }