From 540978288ea0f6ed0b166bb9207f427a4c825ab6 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 15 Mar 2016 10:10:12 -0700 Subject: [PATCH] Replace qUnaligned{Load,Store} with the existing q{To,From}Unaligned Move the Q_ALWAYS_INLINE and forcing of __builtin_memcpy to the existing functions. Change-Id: Icaa7fb2a490246bda156ffff143c137e520eea79 Reviewed-by: Lars Knoll --- src/corelib/global/qendian.h | 30 ++++++++++++------- src/corelib/global/qendian.qdoc | 23 +++++++++++++++ src/corelib/json/qjson_p.h | 2 +- src/corelib/mimetypes/qmimemagicrule.cpp | 3 +- src/corelib/tools/qbitarray.cpp | 26 +++-------------- src/corelib/tools/qhash.cpp | 9 +++--- src/corelib/tools/qsimd.cpp | 22 -------------- src/corelib/tools/qsimd_p.h | 37 ------------------------ src/corelib/tools/qstring.cpp | 2 +- 9 files changed, 55 insertions(+), 99 deletions(-) diff --git a/src/corelib/global/qendian.h b/src/corelib/global/qendian.h index 2ddefaec8b..23dda270e3 100644 --- a/src/corelib/global/qendian.h +++ b/src/corelib/global/qendian.h @@ -42,6 +42,11 @@ QT_BEGIN_NAMESPACE +#ifdef __has_builtin +# define QT_HAS_BUILTIN(x) __has_builtin(x) +#else +# define QT_HAS_BUILTIN(x) 0 +#endif /* * ENDIAN FUNCTIONS @@ -64,18 +69,29 @@ template inline void qbswap(const T src, uchar *dest) // Used to implement a type-safe and alignment-safe copy operation // If you want to avoid the memcpy, you must write specializations for these functions -template inline void qToUnaligned(const T src, uchar *dest) +template Q_ALWAYS_INLINE void qToUnaligned(const T src, uchar *dest) { // Using sizeof(T) inside memcpy function produces internal compiler error with // MSVC2008/ARM in tst_endian -> use extra indirection to resolve size of T. const size_t size = sizeof(T); - memcpy(dest, &src, size); +#if QT_HAS_BUILTIN(__builtin_memcpy) + __builtin_memcpy +#else + memcpy +#endif + (dest, &src, size); } -template inline T qFromUnaligned(const uchar *src) + +template Q_ALWAYS_INLINE T qFromUnaligned(const uchar *src) { T dest; const size_t size = sizeof(T); - memcpy(&dest, src, size); +#if QT_HAS_BUILTIN(__builtin_memcpy) + __builtin_memcpy +#else + memcpy +#endif + (&dest, src, size); return dest; } @@ -87,12 +103,6 @@ template inline T qFromUnaligned(const uchar *src) */ template T qbswap(T source); -#ifdef __has_builtin -# define QT_HAS_BUILTIN(x) __has_builtin(x) -#else -# define QT_HAS_BUILTIN(x) 0 -#endif - // GCC 4.3 implemented all the intrinsics, but the 16-bit one only got implemented in 4.8; // Clang 2.6 implemented the 32- and 64-bit but waited until 3.2 to implement the 16-bit one #if (defined(Q_CC_GNU) && Q_CC_GNU >= 403) || QT_HAS_BUILTIN(__builtin_bswap32) diff --git a/src/corelib/global/qendian.qdoc b/src/corelib/global/qendian.qdoc index e110461f8b..b7494c9a21 100644 --- a/src/corelib/global/qendian.qdoc +++ b/src/corelib/global/qendian.qdoc @@ -33,6 +33,29 @@ little and big endian representations of numbers. */ +/*! + \internal + \fn T qFromUnaligned(const uchar *ptr) + \since 5.5 + + Loads a \c{T} from address \a ptr, which may be misaligned. + + Use of this function avoids the undefined behavior that the C++ standard + otherwise attributes to unaligned loads. +*/ + +/*! + \internal + \fn void qToUnaligned(T t, uchar *ptr) + \since 4.5 + + Stores \a t to address \a ptr, which may be misaligned. + + Use of this function avoids the undefined behavior that the C++ standard + otherwise attributes to unaligned stores. +*/ + + /*! \fn T qFromBigEndian(const uchar *src) \since 4.3 diff --git a/src/corelib/json/qjson_p.h b/src/corelib/json/qjson_p.h index 59d0c91785..c52a37ba2b 100644 --- a/src/corelib/json/qjson_p.h +++ b/src/corelib/json/qjson_p.h @@ -402,7 +402,7 @@ public: // pack with itself, we'll discard the high part anyway chunk = _mm_packus_epi16(chunk, chunk); // unaligned 64-bit store - qUnalignedStore(l + i, _mm_cvtsi128_si64(chunk)); + qToUnaligned(_mm_cvtsi128_si64(chunk), l + i); i += 8; } # endif diff --git a/src/corelib/mimetypes/qmimemagicrule.cpp b/src/corelib/mimetypes/qmimemagicrule.cpp index 44834420fe..398a670544 100644 --- a/src/corelib/mimetypes/qmimemagicrule.cpp +++ b/src/corelib/mimetypes/qmimemagicrule.cpp @@ -42,7 +42,6 @@ #include #include #include -#include // for qUnalignedLoad QT_BEGIN_NAMESPACE @@ -177,7 +176,7 @@ static bool matchNumber(const QMimeMagicRulePrivate *d, const QByteArray &data) const char *p = data.constData() + d->startPos; const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1); for ( ; p <= e; ++p) { - if ((qUnalignedLoad(p) & mask) == (value & mask)) + if ((qFromUnaligned(reinterpret_cast(p)) & mask) == (value & mask)) return true; } diff --git a/src/corelib/tools/qbitarray.cpp b/src/corelib/tools/qbitarray.cpp index a64edea77e..8e6b1203f8 100644 --- a/src/corelib/tools/qbitarray.cpp +++ b/src/corelib/tools/qbitarray.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include QT_BEGIN_NAMESPACE @@ -162,25 +163,6 @@ QBitArray::QBitArray(int size, bool value) Same as size(). */ -template T qUnalignedLoad(const uchar *ptr) -{ - /* - * Testing with different compilers shows that they all optimize the memcpy - * call away and replace with direct loads whenever possible. On x86 and PPC, - * GCC does direct unaligned loads; on MIPS, it generates a pair of load-left - * and load-right instructions. ICC and Clang do the same on x86. This is both - * 32- and 64-bit. - * - * On ARM cores without unaligned loads, the compiler leaves a call to - * memcpy. - */ - - T u; - memcpy(&u, ptr, sizeof(u)); - return u; -} - - /*! If \a on is true, this function returns the number of 1-bits stored in the bit array; otherwise the number @@ -196,17 +178,17 @@ int QBitArray::count(bool on) const const quint8 *const end = reinterpret_cast(d.end()); while (bits + 7 <= end) { - quint64 v = qUnalignedLoad(bits); + quint64 v = qFromUnaligned(bits); bits += 8; numBits += int(qPopulationCount(v)); } if (bits + 3 <= end) { - quint32 v = qUnalignedLoad(bits); + quint32 v = qFromUnaligned(bits); bits += 4; numBits += int(qPopulationCount(v)); } if (bits + 1 < end) { - quint16 v = qUnalignedLoad(bits); + quint16 v = qFromUnaligned(bits); bits += 2; numBits += int(qPopulationCount(v)); } diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index d40570d347..c5669babd9 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #ifndef QT_BOOTSTRAPPED @@ -105,24 +106,24 @@ static uint crc32(const Char *ptr, size_t len, uint h) p += 8; for ( ; p <= e; p += 8) - h2 = _mm_crc32_u64(h2, qUnalignedLoad(p - 8)); + h2 = _mm_crc32_u64(h2, qFromUnaligned(p - 8)); h = h2; p -= 8; len = e - p; if (len & 4) { - h = _mm_crc32_u32(h, qUnalignedLoad(p)); + h = _mm_crc32_u32(h, qFromUnaligned(p)); p += 4; } # else p += 4; for ( ; p <= e; p += 4) - h = _mm_crc32_u32(h, qUnalignedLoad(p - 4)); + h = _mm_crc32_u32(h, qFromUnaligned(p - 4)); p -= 4; len = e - p; # endif if (len & 2) { - h = _mm_crc32_u16(h, qUnalignedLoad(p)); + h = _mm_crc32_u16(h, qFromUnaligned(p)); p += 2; } if (sizeof(Char) == 1 && len & 1) diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 5ca2ce4c6f..f07eb098f2 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -716,26 +716,4 @@ void qDumpCPUFeatures() puts(""); } -/*! - \internal - \fn T qUnalignedLoad(const void *ptr) - \since 5.6.1 - - Loads a \c{T} from address \a ptr, which may be misaligned. - - Use of this function avoid the undefined behavior that the C++ standard - otherwise attributes to unaligned loads. -*/ - -/*! - \internal - \fn void qUnalignedStore(void *ptr, T t) - \since 5.6.1 - - Stores \a t to address \a ptr, which may be misaligned. - - Use of this function avoid the undefined behavior that the C++ standard - otherwise attributes to unaligned stores. -*/ - QT_END_NAMESPACE diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index ca53908cf5..d689654b29 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -476,43 +476,6 @@ unsigned _bit_scan_forward(unsigned val) #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ for (; i < static_cast(qMin(static_cast(length), ((4 - ((reinterpret_cast(ptr) >> 2) & 0x3)) & 0x3))); ++i) -// these defines are copied from qendian.h -// in Qt 5.7, they have been moved to qglobal.h -// drop them when merging this to 5.7 -#ifdef __has_builtin -# define QT_HAS_BUILTIN(x) __has_builtin(x) -#else -# define QT_HAS_BUILTIN(x) 0 -#endif - -template -Q_ALWAYS_INLINE -T qUnalignedLoad(const void *ptr) Q_DECL_NOTHROW -{ - T result; -#if QT_HAS_BUILTIN(__builtin_memcpy) - __builtin_memcpy -#else - memcpy -#endif - /*memcpy*/(&result, ptr, sizeof result); - return result; -} - -template -Q_ALWAYS_INLINE -void qUnalignedStore(void *ptr, T t) Q_DECL_NOTHROW -{ -#if QT_HAS_BUILTIN(__builtin_memcpy) - __builtin_memcpy -#else - memcpy -#endif - /*memcpy*/(ptr, &t, sizeof t); -} - -#undef QT_HAS_BUILTIN - QT_END_NAMESPACE #endif // QSIMD_P_H diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 6bbaf05fef..be1ca8ba95 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -577,7 +577,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) // we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes) if (uc + offset + 7 < e) { // same, but we're using an 8-byte load - __m128i chunk = _mm_cvtsi64_si128(qUnalignedLoad(c + offset)); + __m128i chunk = _mm_cvtsi64_si128(qFromUnaligned(c + offset)); __m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask); __m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset));