Replace qUnaligned{Load,Store} with the existing q{To,From}Unaligned

Move the Q_ALWAYS_INLINE and forcing of __builtin_memcpy to the existing
functions.

Change-Id: Icaa7fb2a490246bda156ffff143c137e520eea79
Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
This commit is contained in:
Thiago Macieira 2016-03-15 10:10:12 -07:00 committed by Jani Heikkinen
parent 421aa422af
commit 540978288e
9 changed files with 55 additions and 99 deletions

View File

@ -42,6 +42,11 @@
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
#ifdef __has_builtin
# define QT_HAS_BUILTIN(x) __has_builtin(x)
#else
# define QT_HAS_BUILTIN(x) 0
#endif
/* /*
* ENDIAN FUNCTIONS * ENDIAN FUNCTIONS
@ -64,18 +69,29 @@ template <typename T> inline void qbswap(const T src, uchar *dest)
// Used to implement a type-safe and alignment-safe copy operation // Used to implement a type-safe and alignment-safe copy operation
// If you want to avoid the memcpy, you must write specializations for these functions // If you want to avoid the memcpy, you must write specializations for these functions
template <typename T> inline void qToUnaligned(const T src, uchar *dest) template <typename T> Q_ALWAYS_INLINE void qToUnaligned(const T src, uchar *dest)
{ {
// Using sizeof(T) inside memcpy function produces internal compiler error with // Using sizeof(T) inside memcpy function produces internal compiler error with
// MSVC2008/ARM in tst_endian -> use extra indirection to resolve size of T. // MSVC2008/ARM in tst_endian -> use extra indirection to resolve size of T.
const size_t size = sizeof(T); const size_t size = sizeof(T);
memcpy(dest, &src, size); #if QT_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
memcpy
#endif
(dest, &src, size);
} }
template <typename T> inline T qFromUnaligned(const uchar *src)
template <typename T> Q_ALWAYS_INLINE T qFromUnaligned(const uchar *src)
{ {
T dest; T dest;
const size_t size = sizeof(T); const size_t size = sizeof(T);
memcpy(&dest, src, size); #if QT_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
memcpy
#endif
(&dest, src, size);
return dest; return dest;
} }
@ -87,12 +103,6 @@ template <typename T> inline T qFromUnaligned(const uchar *src)
*/ */
template <typename T> T qbswap(T source); template <typename T> T qbswap(T source);
#ifdef __has_builtin
# define QT_HAS_BUILTIN(x) __has_builtin(x)
#else
# define QT_HAS_BUILTIN(x) 0
#endif
// GCC 4.3 implemented all the intrinsics, but the 16-bit one only got implemented in 4.8; // GCC 4.3 implemented all the intrinsics, but the 16-bit one only got implemented in 4.8;
// Clang 2.6 implemented the 32- and 64-bit but waited until 3.2 to implement the 16-bit one // Clang 2.6 implemented the 32- and 64-bit but waited until 3.2 to implement the 16-bit one
#if (defined(Q_CC_GNU) && Q_CC_GNU >= 403) || QT_HAS_BUILTIN(__builtin_bswap32) #if (defined(Q_CC_GNU) && Q_CC_GNU >= 403) || QT_HAS_BUILTIN(__builtin_bswap32)

View File

@ -33,6 +33,29 @@
little and big endian representations of numbers. little and big endian representations of numbers.
*/ */
/*!
\internal
\fn T qFromUnaligned(const uchar *ptr)
\since 5.5
Loads a \c{T} from address \a ptr, which may be misaligned.
Use of this function avoids the undefined behavior that the C++ standard
otherwise attributes to unaligned loads.
*/
/*!
\internal
\fn void qToUnaligned(T t, uchar *ptr)
\since 4.5
Stores \a t to address \a ptr, which may be misaligned.
Use of this function avoids the undefined behavior that the C++ standard
otherwise attributes to unaligned stores.
*/
/*! /*!
\fn T qFromBigEndian(const uchar *src) \fn T qFromBigEndian(const uchar *src)
\since 4.3 \since 4.3

View File

@ -402,7 +402,7 @@ public:
// pack with itself, we'll discard the high part anyway // pack with itself, we'll discard the high part anyway
chunk = _mm_packus_epi16(chunk, chunk); chunk = _mm_packus_epi16(chunk, chunk);
// unaligned 64-bit store // unaligned 64-bit store
qUnalignedStore(l + i, _mm_cvtsi128_si64(chunk)); qToUnaligned(_mm_cvtsi128_si64(chunk), l + i);
i += 8; i += 8;
} }
# endif # endif

View File

@ -42,7 +42,6 @@
#include <QtCore/QList> #include <QtCore/QList>
#include <QtCore/QDebug> #include <QtCore/QDebug>
#include <qendian.h> #include <qendian.h>
#include <private/qsimd_p.h> // for qUnalignedLoad
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
@ -177,7 +176,7 @@ static bool matchNumber(const QMimeMagicRulePrivate *d, const QByteArray &data)
const char *p = data.constData() + d->startPos; const char *p = data.constData() + d->startPos;
const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1); const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1);
for ( ; p <= e; ++p) { for ( ; p <= e; ++p) {
if ((qUnalignedLoad<T>(p) & mask) == (value & mask)) if ((qFromUnaligned<T>(reinterpret_cast<const uchar *>(p)) & mask) == (value & mask))
return true; return true;
} }

View File

@ -35,6 +35,7 @@
#include <qalgorithms.h> #include <qalgorithms.h>
#include <qdatastream.h> #include <qdatastream.h>
#include <qdebug.h> #include <qdebug.h>
#include <qendian.h>
#include <string.h> #include <string.h>
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
@ -162,25 +163,6 @@ QBitArray::QBitArray(int size, bool value)
Same as size(). Same as size().
*/ */
template <typename T> T qUnalignedLoad(const uchar *ptr)
{
/*
* Testing with different compilers shows that they all optimize the memcpy
* call away and replace with direct loads whenever possible. On x86 and PPC,
* GCC does direct unaligned loads; on MIPS, it generates a pair of load-left
* and load-right instructions. ICC and Clang do the same on x86. This is both
* 32- and 64-bit.
*
* On ARM cores without unaligned loads, the compiler leaves a call to
* memcpy.
*/
T u;
memcpy(&u, ptr, sizeof(u));
return u;
}
/*! /*!
If \a on is true, this function returns the number of If \a on is true, this function returns the number of
1-bits stored in the bit array; otherwise the number 1-bits stored in the bit array; otherwise the number
@ -196,17 +178,17 @@ int QBitArray::count(bool on) const
const quint8 *const end = reinterpret_cast<const quint8 *>(d.end()); const quint8 *const end = reinterpret_cast<const quint8 *>(d.end());
while (bits + 7 <= end) { while (bits + 7 <= end) {
quint64 v = qUnalignedLoad<quint64>(bits); quint64 v = qFromUnaligned<quint64>(bits);
bits += 8; bits += 8;
numBits += int(qPopulationCount(v)); numBits += int(qPopulationCount(v));
} }
if (bits + 3 <= end) { if (bits + 3 <= end) {
quint32 v = qUnalignedLoad<quint32>(bits); quint32 v = qFromUnaligned<quint32>(bits);
bits += 4; bits += 4;
numBits += int(qPopulationCount(v)); numBits += int(qPopulationCount(v));
} }
if (bits + 1 < end) { if (bits + 1 < end) {
quint16 v = qUnalignedLoad<quint16>(bits); quint16 v = qFromUnaligned<quint16>(bits);
bits += 2; bits += 2;
numBits += int(qPopulationCount(v)); numBits += int(qPopulationCount(v));
} }

View File

@ -51,6 +51,7 @@
#include <qbytearray.h> #include <qbytearray.h>
#include <qdatetime.h> #include <qdatetime.h>
#include <qbasicatomic.h> #include <qbasicatomic.h>
#include <qendian.h>
#include <private/qsimd_p.h> #include <private/qsimd_p.h>
#ifndef QT_BOOTSTRAPPED #ifndef QT_BOOTSTRAPPED
@ -105,24 +106,24 @@ static uint crc32(const Char *ptr, size_t len, uint h)
p += 8; p += 8;
for ( ; p <= e; p += 8) for ( ; p <= e; p += 8)
h2 = _mm_crc32_u64(h2, qUnalignedLoad<qlonglong>(p - 8)); h2 = _mm_crc32_u64(h2, qFromUnaligned<qlonglong>(p - 8));
h = h2; h = h2;
p -= 8; p -= 8;
len = e - p; len = e - p;
if (len & 4) { if (len & 4) {
h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p)); h = _mm_crc32_u32(h, qFromUnaligned<uint>(p));
p += 4; p += 4;
} }
# else # else
p += 4; p += 4;
for ( ; p <= e; p += 4) for ( ; p <= e; p += 4)
h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p - 4)); h = _mm_crc32_u32(h, qFromUnaligned<uint>(p - 4));
p -= 4; p -= 4;
len = e - p; len = e - p;
# endif # endif
if (len & 2) { if (len & 2) {
h = _mm_crc32_u16(h, qUnalignedLoad<ushort>(p)); h = _mm_crc32_u16(h, qFromUnaligned<ushort>(p));
p += 2; p += 2;
} }
if (sizeof(Char) == 1 && len & 1) if (sizeof(Char) == 1 && len & 1)

View File

@ -716,26 +716,4 @@ void qDumpCPUFeatures()
puts(""); puts("");
} }
/*!
\internal
\fn T qUnalignedLoad(const void *ptr)
\since 5.6.1
Loads a \c{T} from address \a ptr, which may be misaligned.
Use of this function avoid the undefined behavior that the C++ standard
otherwise attributes to unaligned loads.
*/
/*!
\internal
\fn void qUnalignedStore(void *ptr, T t)
\since 5.6.1
Stores \a t to address \a ptr, which may be misaligned.
Use of this function avoid the undefined behavior that the C++ standard
otherwise attributes to unaligned stores.
*/
QT_END_NAMESPACE QT_END_NAMESPACE

View File

@ -476,43 +476,6 @@ unsigned _bit_scan_forward(unsigned val)
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
// these defines are copied from qendian.h
// in Qt 5.7, they have been moved to qglobal.h
// drop them when merging this to 5.7
#ifdef __has_builtin
# define QT_HAS_BUILTIN(x) __has_builtin(x)
#else
# define QT_HAS_BUILTIN(x) 0
#endif
template <typename T>
Q_ALWAYS_INLINE
T qUnalignedLoad(const void *ptr) Q_DECL_NOTHROW
{
T result;
#if QT_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
memcpy
#endif
/*memcpy*/(&result, ptr, sizeof result);
return result;
}
template <typename T>
Q_ALWAYS_INLINE
void qUnalignedStore(void *ptr, T t) Q_DECL_NOTHROW
{
#if QT_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
memcpy
#endif
/*memcpy*/(ptr, &t, sizeof t);
}
#undef QT_HAS_BUILTIN
QT_END_NAMESPACE QT_END_NAMESPACE
#endif // QSIMD_P_H #endif // QSIMD_P_H

View File

@ -577,7 +577,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
// we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes) // we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes)
if (uc + offset + 7 < e) { if (uc + offset + 7 < e) {
// same, but we're using an 8-byte load // same, but we're using an 8-byte load
__m128i chunk = _mm_cvtsi64_si128(qUnalignedLoad<long long>(c + offset)); __m128i chunk = _mm_cvtsi64_si128(qFromUnaligned<long long>(c + offset));
__m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask); __m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask);
__m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset)); __m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset));