Replace qUnaligned{Load,Store} with the existing q{To,From}Unaligned
Move the Q_ALWAYS_INLINE and forcing of __builtin_memcpy to the existing functions. Change-Id: Icaa7fb2a490246bda156ffff143c137e520eea79 Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
This commit is contained in:
parent
421aa422af
commit
540978288e
@ -42,6 +42,11 @@
|
|||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
|
|
||||||
|
#ifdef __has_builtin
|
||||||
|
# define QT_HAS_BUILTIN(x) __has_builtin(x)
|
||||||
|
#else
|
||||||
|
# define QT_HAS_BUILTIN(x) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ENDIAN FUNCTIONS
|
* ENDIAN FUNCTIONS
|
||||||
@ -64,18 +69,29 @@ template <typename T> inline void qbswap(const T src, uchar *dest)
|
|||||||
|
|
||||||
// Used to implement a type-safe and alignment-safe copy operation
|
// Used to implement a type-safe and alignment-safe copy operation
|
||||||
// If you want to avoid the memcpy, you must write specializations for these functions
|
// If you want to avoid the memcpy, you must write specializations for these functions
|
||||||
template <typename T> inline void qToUnaligned(const T src, uchar *dest)
|
template <typename T> Q_ALWAYS_INLINE void qToUnaligned(const T src, uchar *dest)
|
||||||
{
|
{
|
||||||
// Using sizeof(T) inside memcpy function produces internal compiler error with
|
// Using sizeof(T) inside memcpy function produces internal compiler error with
|
||||||
// MSVC2008/ARM in tst_endian -> use extra indirection to resolve size of T.
|
// MSVC2008/ARM in tst_endian -> use extra indirection to resolve size of T.
|
||||||
const size_t size = sizeof(T);
|
const size_t size = sizeof(T);
|
||||||
memcpy(dest, &src, size);
|
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
||||||
|
__builtin_memcpy
|
||||||
|
#else
|
||||||
|
memcpy
|
||||||
|
#endif
|
||||||
|
(dest, &src, size);
|
||||||
}
|
}
|
||||||
template <typename T> inline T qFromUnaligned(const uchar *src)
|
|
||||||
|
template <typename T> Q_ALWAYS_INLINE T qFromUnaligned(const uchar *src)
|
||||||
{
|
{
|
||||||
T dest;
|
T dest;
|
||||||
const size_t size = sizeof(T);
|
const size_t size = sizeof(T);
|
||||||
memcpy(&dest, src, size);
|
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
||||||
|
__builtin_memcpy
|
||||||
|
#else
|
||||||
|
memcpy
|
||||||
|
#endif
|
||||||
|
(&dest, src, size);
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,12 +103,6 @@ template <typename T> inline T qFromUnaligned(const uchar *src)
|
|||||||
*/
|
*/
|
||||||
template <typename T> T qbswap(T source);
|
template <typename T> T qbswap(T source);
|
||||||
|
|
||||||
#ifdef __has_builtin
|
|
||||||
# define QT_HAS_BUILTIN(x) __has_builtin(x)
|
|
||||||
#else
|
|
||||||
# define QT_HAS_BUILTIN(x) 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// GCC 4.3 implemented all the intrinsics, but the 16-bit one only got implemented in 4.8;
|
// GCC 4.3 implemented all the intrinsics, but the 16-bit one only got implemented in 4.8;
|
||||||
// Clang 2.6 implemented the 32- and 64-bit but waited until 3.2 to implement the 16-bit one
|
// Clang 2.6 implemented the 32- and 64-bit but waited until 3.2 to implement the 16-bit one
|
||||||
#if (defined(Q_CC_GNU) && Q_CC_GNU >= 403) || QT_HAS_BUILTIN(__builtin_bswap32)
|
#if (defined(Q_CC_GNU) && Q_CC_GNU >= 403) || QT_HAS_BUILTIN(__builtin_bswap32)
|
||||||
|
@ -33,6 +33,29 @@
|
|||||||
little and big endian representations of numbers.
|
little and big endian representations of numbers.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\internal
|
||||||
|
\fn T qFromUnaligned(const uchar *ptr)
|
||||||
|
\since 5.5
|
||||||
|
|
||||||
|
Loads a \c{T} from address \a ptr, which may be misaligned.
|
||||||
|
|
||||||
|
Use of this function avoids the undefined behavior that the C++ standard
|
||||||
|
otherwise attributes to unaligned loads.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\internal
|
||||||
|
\fn void qToUnaligned(T t, uchar *ptr)
|
||||||
|
\since 4.5
|
||||||
|
|
||||||
|
Stores \a t to address \a ptr, which may be misaligned.
|
||||||
|
|
||||||
|
Use of this function avoids the undefined behavior that the C++ standard
|
||||||
|
otherwise attributes to unaligned stores.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\fn T qFromBigEndian(const uchar *src)
|
\fn T qFromBigEndian(const uchar *src)
|
||||||
\since 4.3
|
\since 4.3
|
||||||
|
@ -402,7 +402,7 @@ public:
|
|||||||
// pack with itself, we'll discard the high part anyway
|
// pack with itself, we'll discard the high part anyway
|
||||||
chunk = _mm_packus_epi16(chunk, chunk);
|
chunk = _mm_packus_epi16(chunk, chunk);
|
||||||
// unaligned 64-bit store
|
// unaligned 64-bit store
|
||||||
qUnalignedStore(l + i, _mm_cvtsi128_si64(chunk));
|
qToUnaligned(_mm_cvtsi128_si64(chunk), l + i);
|
||||||
i += 8;
|
i += 8;
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
@ -42,7 +42,6 @@
|
|||||||
#include <QtCore/QList>
|
#include <QtCore/QList>
|
||||||
#include <QtCore/QDebug>
|
#include <QtCore/QDebug>
|
||||||
#include <qendian.h>
|
#include <qendian.h>
|
||||||
#include <private/qsimd_p.h> // for qUnalignedLoad
|
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
|
|
||||||
@ -177,7 +176,7 @@ static bool matchNumber(const QMimeMagicRulePrivate *d, const QByteArray &data)
|
|||||||
const char *p = data.constData() + d->startPos;
|
const char *p = data.constData() + d->startPos;
|
||||||
const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1);
|
const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1);
|
||||||
for ( ; p <= e; ++p) {
|
for ( ; p <= e; ++p) {
|
||||||
if ((qUnalignedLoad<T>(p) & mask) == (value & mask))
|
if ((qFromUnaligned<T>(reinterpret_cast<const uchar *>(p)) & mask) == (value & mask))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
#include <qalgorithms.h>
|
#include <qalgorithms.h>
|
||||||
#include <qdatastream.h>
|
#include <qdatastream.h>
|
||||||
#include <qdebug.h>
|
#include <qdebug.h>
|
||||||
|
#include <qendian.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
@ -162,25 +163,6 @@ QBitArray::QBitArray(int size, bool value)
|
|||||||
Same as size().
|
Same as size().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
template <typename T> T qUnalignedLoad(const uchar *ptr)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Testing with different compilers shows that they all optimize the memcpy
|
|
||||||
* call away and replace with direct loads whenever possible. On x86 and PPC,
|
|
||||||
* GCC does direct unaligned loads; on MIPS, it generates a pair of load-left
|
|
||||||
* and load-right instructions. ICC and Clang do the same on x86. This is both
|
|
||||||
* 32- and 64-bit.
|
|
||||||
*
|
|
||||||
* On ARM cores without unaligned loads, the compiler leaves a call to
|
|
||||||
* memcpy.
|
|
||||||
*/
|
|
||||||
|
|
||||||
T u;
|
|
||||||
memcpy(&u, ptr, sizeof(u));
|
|
||||||
return u;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
If \a on is true, this function returns the number of
|
If \a on is true, this function returns the number of
|
||||||
1-bits stored in the bit array; otherwise the number
|
1-bits stored in the bit array; otherwise the number
|
||||||
@ -196,17 +178,17 @@ int QBitArray::count(bool on) const
|
|||||||
const quint8 *const end = reinterpret_cast<const quint8 *>(d.end());
|
const quint8 *const end = reinterpret_cast<const quint8 *>(d.end());
|
||||||
|
|
||||||
while (bits + 7 <= end) {
|
while (bits + 7 <= end) {
|
||||||
quint64 v = qUnalignedLoad<quint64>(bits);
|
quint64 v = qFromUnaligned<quint64>(bits);
|
||||||
bits += 8;
|
bits += 8;
|
||||||
numBits += int(qPopulationCount(v));
|
numBits += int(qPopulationCount(v));
|
||||||
}
|
}
|
||||||
if (bits + 3 <= end) {
|
if (bits + 3 <= end) {
|
||||||
quint32 v = qUnalignedLoad<quint32>(bits);
|
quint32 v = qFromUnaligned<quint32>(bits);
|
||||||
bits += 4;
|
bits += 4;
|
||||||
numBits += int(qPopulationCount(v));
|
numBits += int(qPopulationCount(v));
|
||||||
}
|
}
|
||||||
if (bits + 1 < end) {
|
if (bits + 1 < end) {
|
||||||
quint16 v = qUnalignedLoad<quint16>(bits);
|
quint16 v = qFromUnaligned<quint16>(bits);
|
||||||
bits += 2;
|
bits += 2;
|
||||||
numBits += int(qPopulationCount(v));
|
numBits += int(qPopulationCount(v));
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,7 @@
|
|||||||
#include <qbytearray.h>
|
#include <qbytearray.h>
|
||||||
#include <qdatetime.h>
|
#include <qdatetime.h>
|
||||||
#include <qbasicatomic.h>
|
#include <qbasicatomic.h>
|
||||||
|
#include <qendian.h>
|
||||||
#include <private/qsimd_p.h>
|
#include <private/qsimd_p.h>
|
||||||
|
|
||||||
#ifndef QT_BOOTSTRAPPED
|
#ifndef QT_BOOTSTRAPPED
|
||||||
@ -105,24 +106,24 @@ static uint crc32(const Char *ptr, size_t len, uint h)
|
|||||||
|
|
||||||
p += 8;
|
p += 8;
|
||||||
for ( ; p <= e; p += 8)
|
for ( ; p <= e; p += 8)
|
||||||
h2 = _mm_crc32_u64(h2, qUnalignedLoad<qlonglong>(p - 8));
|
h2 = _mm_crc32_u64(h2, qFromUnaligned<qlonglong>(p - 8));
|
||||||
h = h2;
|
h = h2;
|
||||||
p -= 8;
|
p -= 8;
|
||||||
|
|
||||||
len = e - p;
|
len = e - p;
|
||||||
if (len & 4) {
|
if (len & 4) {
|
||||||
h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p));
|
h = _mm_crc32_u32(h, qFromUnaligned<uint>(p));
|
||||||
p += 4;
|
p += 4;
|
||||||
}
|
}
|
||||||
# else
|
# else
|
||||||
p += 4;
|
p += 4;
|
||||||
for ( ; p <= e; p += 4)
|
for ( ; p <= e; p += 4)
|
||||||
h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p - 4));
|
h = _mm_crc32_u32(h, qFromUnaligned<uint>(p - 4));
|
||||||
p -= 4;
|
p -= 4;
|
||||||
len = e - p;
|
len = e - p;
|
||||||
# endif
|
# endif
|
||||||
if (len & 2) {
|
if (len & 2) {
|
||||||
h = _mm_crc32_u16(h, qUnalignedLoad<ushort>(p));
|
h = _mm_crc32_u16(h, qFromUnaligned<ushort>(p));
|
||||||
p += 2;
|
p += 2;
|
||||||
}
|
}
|
||||||
if (sizeof(Char) == 1 && len & 1)
|
if (sizeof(Char) == 1 && len & 1)
|
||||||
|
@ -716,26 +716,4 @@ void qDumpCPUFeatures()
|
|||||||
puts("");
|
puts("");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
|
||||||
\internal
|
|
||||||
\fn T qUnalignedLoad(const void *ptr)
|
|
||||||
\since 5.6.1
|
|
||||||
|
|
||||||
Loads a \c{T} from address \a ptr, which may be misaligned.
|
|
||||||
|
|
||||||
Use of this function avoid the undefined behavior that the C++ standard
|
|
||||||
otherwise attributes to unaligned loads.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*!
|
|
||||||
\internal
|
|
||||||
\fn void qUnalignedStore(void *ptr, T t)
|
|
||||||
\since 5.6.1
|
|
||||||
|
|
||||||
Stores \a t to address \a ptr, which may be misaligned.
|
|
||||||
|
|
||||||
Use of this function avoid the undefined behavior that the C++ standard
|
|
||||||
otherwise attributes to unaligned stores.
|
|
||||||
*/
|
|
||||||
|
|
||||||
QT_END_NAMESPACE
|
QT_END_NAMESPACE
|
||||||
|
@ -476,43 +476,6 @@ unsigned _bit_scan_forward(unsigned val)
|
|||||||
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
|
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
|
||||||
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
|
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
|
||||||
|
|
||||||
// these defines are copied from qendian.h
|
|
||||||
// in Qt 5.7, they have been moved to qglobal.h
|
|
||||||
// drop them when merging this to 5.7
|
|
||||||
#ifdef __has_builtin
|
|
||||||
# define QT_HAS_BUILTIN(x) __has_builtin(x)
|
|
||||||
#else
|
|
||||||
# define QT_HAS_BUILTIN(x) 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
Q_ALWAYS_INLINE
|
|
||||||
T qUnalignedLoad(const void *ptr) Q_DECL_NOTHROW
|
|
||||||
{
|
|
||||||
T result;
|
|
||||||
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
|
||||||
__builtin_memcpy
|
|
||||||
#else
|
|
||||||
memcpy
|
|
||||||
#endif
|
|
||||||
/*memcpy*/(&result, ptr, sizeof result);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
Q_ALWAYS_INLINE
|
|
||||||
void qUnalignedStore(void *ptr, T t) Q_DECL_NOTHROW
|
|
||||||
{
|
|
||||||
#if QT_HAS_BUILTIN(__builtin_memcpy)
|
|
||||||
__builtin_memcpy
|
|
||||||
#else
|
|
||||||
memcpy
|
|
||||||
#endif
|
|
||||||
/*memcpy*/(ptr, &t, sizeof t);
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef QT_HAS_BUILTIN
|
|
||||||
|
|
||||||
QT_END_NAMESPACE
|
QT_END_NAMESPACE
|
||||||
|
|
||||||
#endif // QSIMD_P_H
|
#endif // QSIMD_P_H
|
||||||
|
@ -577,7 +577,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
|
|||||||
// we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes)
|
// we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes)
|
||||||
if (uc + offset + 7 < e) {
|
if (uc + offset + 7 < e) {
|
||||||
// same, but we're using an 8-byte load
|
// same, but we're using an 8-byte load
|
||||||
__m128i chunk = _mm_cvtsi64_si128(qUnalignedLoad<long long>(c + offset));
|
__m128i chunk = _mm_cvtsi64_si128(qFromUnaligned<long long>(c + offset));
|
||||||
__m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask);
|
__m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask);
|
||||||
|
|
||||||
__m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset));
|
__m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset));
|
||||||
|
Loading…
Reference in New Issue
Block a user