Replace qUnaligned{Load,Store} with the existing q{To,From}Unaligned
Move the Q_ALWAYS_INLINE and forcing of __builtin_memcpy to the existing functions. Change-Id: Icaa7fb2a490246bda156ffff143c137e520eea79 Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
This commit is contained in:
parent
421aa422af
commit
540978288e
@ -42,6 +42,11 @@
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#ifdef __has_builtin
|
||||
# define QT_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
# define QT_HAS_BUILTIN(x) 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ENDIAN FUNCTIONS
|
||||
@ -64,18 +69,29 @@ template <typename T> inline void qbswap(const T src, uchar *dest)
|
||||
|
||||
// Used to implement a type-safe and alignment-safe copy operation
|
||||
// If you want to avoid the memcpy, you must write specializations for these functions
|
||||
template <typename T> inline void qToUnaligned(const T src, uchar *dest)
|
||||
template <typename T> Q_ALWAYS_INLINE void qToUnaligned(const T src, uchar *dest)
|
||||
{
|
||||
// Using sizeof(T) inside memcpy function produces internal compiler error with
|
||||
// MSVC2008/ARM in tst_endian -> use extra indirection to resolve size of T.
|
||||
const size_t size = sizeof(T);
|
||||
memcpy(dest, &src, size);
|
||||
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
||||
__builtin_memcpy
|
||||
#else
|
||||
memcpy
|
||||
#endif
|
||||
(dest, &src, size);
|
||||
}
|
||||
template <typename T> inline T qFromUnaligned(const uchar *src)
|
||||
|
||||
template <typename T> Q_ALWAYS_INLINE T qFromUnaligned(const uchar *src)
|
||||
{
|
||||
T dest;
|
||||
const size_t size = sizeof(T);
|
||||
memcpy(&dest, src, size);
|
||||
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
||||
__builtin_memcpy
|
||||
#else
|
||||
memcpy
|
||||
#endif
|
||||
(&dest, src, size);
|
||||
return dest;
|
||||
}
|
||||
|
||||
@ -87,12 +103,6 @@ template <typename T> inline T qFromUnaligned(const uchar *src)
|
||||
*/
|
||||
template <typename T> T qbswap(T source);
|
||||
|
||||
#ifdef __has_builtin
|
||||
# define QT_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
# define QT_HAS_BUILTIN(x) 0
|
||||
#endif
|
||||
|
||||
// GCC 4.3 implemented all the intrinsics, but the 16-bit one only got implemented in 4.8;
|
||||
// Clang 2.6 implemented the 32- and 64-bit but waited until 3.2 to implement the 16-bit one
|
||||
#if (defined(Q_CC_GNU) && Q_CC_GNU >= 403) || QT_HAS_BUILTIN(__builtin_bswap32)
|
||||
|
@ -33,6 +33,29 @@
|
||||
little and big endian representations of numbers.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\internal
|
||||
\fn T qFromUnaligned(const uchar *ptr)
|
||||
\since 5.5
|
||||
|
||||
Loads a \c{T} from address \a ptr, which may be misaligned.
|
||||
|
||||
Use of this function avoids the undefined behavior that the C++ standard
|
||||
otherwise attributes to unaligned loads.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\internal
|
||||
\fn void qToUnaligned(T t, uchar *ptr)
|
||||
\since 4.5
|
||||
|
||||
Stores \a t to address \a ptr, which may be misaligned.
|
||||
|
||||
Use of this function avoids the undefined behavior that the C++ standard
|
||||
otherwise attributes to unaligned stores.
|
||||
*/
|
||||
|
||||
|
||||
/*!
|
||||
\fn T qFromBigEndian(const uchar *src)
|
||||
\since 4.3
|
||||
|
@ -402,7 +402,7 @@ public:
|
||||
// pack with itself, we'll discard the high part anyway
|
||||
chunk = _mm_packus_epi16(chunk, chunk);
|
||||
// unaligned 64-bit store
|
||||
qUnalignedStore(l + i, _mm_cvtsi128_si64(chunk));
|
||||
qToUnaligned(_mm_cvtsi128_si64(chunk), l + i);
|
||||
i += 8;
|
||||
}
|
||||
# endif
|
||||
|
@ -42,7 +42,6 @@
|
||||
#include <QtCore/QList>
|
||||
#include <QtCore/QDebug>
|
||||
#include <qendian.h>
|
||||
#include <private/qsimd_p.h> // for qUnalignedLoad
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -177,7 +176,7 @@ static bool matchNumber(const QMimeMagicRulePrivate *d, const QByteArray &data)
|
||||
const char *p = data.constData() + d->startPos;
|
||||
const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1);
|
||||
for ( ; p <= e; ++p) {
|
||||
if ((qUnalignedLoad<T>(p) & mask) == (value & mask))
|
||||
if ((qFromUnaligned<T>(reinterpret_cast<const uchar *>(p)) & mask) == (value & mask))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <qalgorithms.h>
|
||||
#include <qdatastream.h>
|
||||
#include <qdebug.h>
|
||||
#include <qendian.h>
|
||||
#include <string.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
@ -162,25 +163,6 @@ QBitArray::QBitArray(int size, bool value)
|
||||
Same as size().
|
||||
*/
|
||||
|
||||
template <typename T> T qUnalignedLoad(const uchar *ptr)
|
||||
{
|
||||
/*
|
||||
* Testing with different compilers shows that they all optimize the memcpy
|
||||
* call away and replace with direct loads whenever possible. On x86 and PPC,
|
||||
* GCC does direct unaligned loads; on MIPS, it generates a pair of load-left
|
||||
* and load-right instructions. ICC and Clang do the same on x86. This is both
|
||||
* 32- and 64-bit.
|
||||
*
|
||||
* On ARM cores without unaligned loads, the compiler leaves a call to
|
||||
* memcpy.
|
||||
*/
|
||||
|
||||
T u;
|
||||
memcpy(&u, ptr, sizeof(u));
|
||||
return u;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
If \a on is true, this function returns the number of
|
||||
1-bits stored in the bit array; otherwise the number
|
||||
@ -196,17 +178,17 @@ int QBitArray::count(bool on) const
|
||||
const quint8 *const end = reinterpret_cast<const quint8 *>(d.end());
|
||||
|
||||
while (bits + 7 <= end) {
|
||||
quint64 v = qUnalignedLoad<quint64>(bits);
|
||||
quint64 v = qFromUnaligned<quint64>(bits);
|
||||
bits += 8;
|
||||
numBits += int(qPopulationCount(v));
|
||||
}
|
||||
if (bits + 3 <= end) {
|
||||
quint32 v = qUnalignedLoad<quint32>(bits);
|
||||
quint32 v = qFromUnaligned<quint32>(bits);
|
||||
bits += 4;
|
||||
numBits += int(qPopulationCount(v));
|
||||
}
|
||||
if (bits + 1 < end) {
|
||||
quint16 v = qUnalignedLoad<quint16>(bits);
|
||||
quint16 v = qFromUnaligned<quint16>(bits);
|
||||
bits += 2;
|
||||
numBits += int(qPopulationCount(v));
|
||||
}
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include <qbytearray.h>
|
||||
#include <qdatetime.h>
|
||||
#include <qbasicatomic.h>
|
||||
#include <qendian.h>
|
||||
#include <private/qsimd_p.h>
|
||||
|
||||
#ifndef QT_BOOTSTRAPPED
|
||||
@ -105,24 +106,24 @@ static uint crc32(const Char *ptr, size_t len, uint h)
|
||||
|
||||
p += 8;
|
||||
for ( ; p <= e; p += 8)
|
||||
h2 = _mm_crc32_u64(h2, qUnalignedLoad<qlonglong>(p - 8));
|
||||
h2 = _mm_crc32_u64(h2, qFromUnaligned<qlonglong>(p - 8));
|
||||
h = h2;
|
||||
p -= 8;
|
||||
|
||||
len = e - p;
|
||||
if (len & 4) {
|
||||
h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p));
|
||||
h = _mm_crc32_u32(h, qFromUnaligned<uint>(p));
|
||||
p += 4;
|
||||
}
|
||||
# else
|
||||
p += 4;
|
||||
for ( ; p <= e; p += 4)
|
||||
h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p - 4));
|
||||
h = _mm_crc32_u32(h, qFromUnaligned<uint>(p - 4));
|
||||
p -= 4;
|
||||
len = e - p;
|
||||
# endif
|
||||
if (len & 2) {
|
||||
h = _mm_crc32_u16(h, qUnalignedLoad<ushort>(p));
|
||||
h = _mm_crc32_u16(h, qFromUnaligned<ushort>(p));
|
||||
p += 2;
|
||||
}
|
||||
if (sizeof(Char) == 1 && len & 1)
|
||||
|
@ -716,26 +716,4 @@ void qDumpCPUFeatures()
|
||||
puts("");
|
||||
}
|
||||
|
||||
/*!
|
||||
\internal
|
||||
\fn T qUnalignedLoad(const void *ptr)
|
||||
\since 5.6.1
|
||||
|
||||
Loads a \c{T} from address \a ptr, which may be misaligned.
|
||||
|
||||
Use of this function avoid the undefined behavior that the C++ standard
|
||||
otherwise attributes to unaligned loads.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\internal
|
||||
\fn void qUnalignedStore(void *ptr, T t)
|
||||
\since 5.6.1
|
||||
|
||||
Stores \a t to address \a ptr, which may be misaligned.
|
||||
|
||||
Use of this function avoid the undefined behavior that the C++ standard
|
||||
otherwise attributes to unaligned stores.
|
||||
*/
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -476,43 +476,6 @@ unsigned _bit_scan_forward(unsigned val)
|
||||
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
|
||||
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
|
||||
|
||||
// these defines are copied from qendian.h
|
||||
// in Qt 5.7, they have been moved to qglobal.h
|
||||
// drop them when merging this to 5.7
|
||||
#ifdef __has_builtin
|
||||
# define QT_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
# define QT_HAS_BUILTIN(x) 0
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
Q_ALWAYS_INLINE
|
||||
T qUnalignedLoad(const void *ptr) Q_DECL_NOTHROW
|
||||
{
|
||||
T result;
|
||||
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
||||
__builtin_memcpy
|
||||
#else
|
||||
memcpy
|
||||
#endif
|
||||
/*memcpy*/(&result, ptr, sizeof result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Q_ALWAYS_INLINE
|
||||
void qUnalignedStore(void *ptr, T t) Q_DECL_NOTHROW
|
||||
{
|
||||
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
||||
__builtin_memcpy
|
||||
#else
|
||||
memcpy
|
||||
#endif
|
||||
/*memcpy*/(ptr, &t, sizeof t);
|
||||
}
|
||||
|
||||
#undef QT_HAS_BUILTIN
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QSIMD_P_H
|
||||
|
@ -577,7 +577,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
|
||||
// we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes)
|
||||
if (uc + offset + 7 < e) {
|
||||
// same, but we're using an 8-byte load
|
||||
__m128i chunk = _mm_cvtsi64_si128(qUnalignedLoad<long long>(c + offset));
|
||||
__m128i chunk = _mm_cvtsi64_si128(qFromUnaligned<long long>(c + offset));
|
||||
__m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask);
|
||||
|
||||
__m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset));
|
||||
|
Loading…
Reference in New Issue
Block a user