QString, QJson, QHash: Fix UBs involving unaligned loads

Found by UBSan:

  src/corelib/tools/qstring.cpp:587:42: runtime error: load of misaligned address 0x2acbf4b7551b for type 'const long long int', which requires 8 byte alignment
  src/corelib/json/qjson_p.h:405:30: runtime error: store to misaligned address 0x0000019b1e52 for type 'quint64', which requires 8 byte alignment
  src/corelib/tools/qhash.cpp:116:27: runtime error: load of misaligned address 0x2b8f9ce80e85 for type 'const qlonglong', which requires 8 byte alignment
  src/corelib/tools/qhash.cpp:133:26: runtime error: load of misaligned address 0x2b8f9ce80e8d for type 'const ushort', which requires 2 byte alignment

Fix by memcpy()ing into a local variable. Wrap this trick in
template functions in qsimd_p.h. These are marked as always-
inline and use __builtin_memcpy() where available in an
attempt to avoid the memcpy() function call overhead in debug
builds.

While this looks prohibitively expensive, from the pov of the
C++ abstract machine, it is 100% equivalent, except for the
absence of undefined behavior. In one case, the cast produces
a local temporary which is then copied into the function, and
in the other case, that local variable comes from return value
of qUnalignedLoad().

Consequently, GCC compiles these two versions into identical
assembler code (only verfied for ucstrncmp, but there's no
reason to believe that it wouldn't hold for the other cases,
too).

Task-number: QTBUG-51651
Change-Id: Ia50d4a1d7580b6f803e0895c9f3d89c7da37840c
Reviewed-by: Olivier Goffart (Woboq GmbH) <ogoffart@woboq.com>
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@theqtcompany.com>
This commit is contained in:
Marc Mutz 2016-03-06 19:34:06 +01:00
parent 1b441c3941
commit 1bfc7f680f
5 changed files with 65 additions and 6 deletions

View File

@ -402,7 +402,7 @@ public:
// pack with itself, we'll discard the high part anyway // pack with itself, we'll discard the high part anyway
chunk = _mm_packus_epi16(chunk, chunk); chunk = _mm_packus_epi16(chunk, chunk);
// unaligned 64-bit store // unaligned 64-bit store
*(quint64*)&l[i] = _mm_cvtsi128_si64(chunk); qUnalignedStore(l + i, _mm_cvtsi128_si64(chunk));
i += 8; i += 8;
} }
# endif # endif

View File

@ -106,24 +106,24 @@ static uint crc32(const Char *ptr, size_t len, uint h)
p += 8; p += 8;
for ( ; p <= e; p += 8) for ( ; p <= e; p += 8)
h2 = _mm_crc32_u64(h2, *reinterpret_cast<const qlonglong *>(p - 8)); h2 = _mm_crc32_u64(h2, qUnalignedLoad<qlonglong>(p - 8));
h = h2; h = h2;
p -= 8; p -= 8;
len = e - p; len = e - p;
if (len & 4) { if (len & 4) {
h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p)); h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p));
p += 4; p += 4;
} }
# else # else
p += 4; p += 4;
for ( ; p <= e; p += 4) for ( ; p <= e; p += 4)
h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p - 4)); h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p - 4));
p -= 4; p -= 4;
len = e - p; len = e - p;
# endif # endif
if (len & 2) { if (len & 2) {
h = _mm_crc32_u16(h, *reinterpret_cast<const ushort *>(p)); h = _mm_crc32_u16(h, qUnalignedLoad<ushort>(p));
p += 2; p += 2;
} }
if (sizeof(Char) == 1 && len & 1) if (sizeof(Char) == 1 && len & 1)

View File

@ -716,4 +716,26 @@ void qDumpCPUFeatures()
puts(""); puts("");
} }
/*!
\internal
\fn T qUnalignedLoad(const void *ptr)
\since 5.6.1
Loads a \c{T} from address \a ptr, which may be misaligned.
Use of this function avoid the undefined behavior that the C++ standard
otherwise attributes to unaligned loads.
*/
/*!
\internal
\fn void qUnalignedStore(void *ptr, T t)
\since 5.6.1
Stores \a t to address \a ptr, which may be misaligned.
Use of this function avoid the undefined behavior that the C++ standard
otherwise attributes to unaligned stores.
*/
QT_END_NAMESPACE QT_END_NAMESPACE

View File

@ -470,6 +470,43 @@ unsigned _bit_scan_forward(unsigned val)
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
// these defines are copied from qendian.h
// in Qt 5.7, they have been moved to qglobal.h
// drop them when merging this to 5.7
#ifdef __has_builtin
# define QT_HAS_BUILTIN(x) __has_builtin(x)
#else
# define QT_HAS_BUILTIN(x) 0
#endif
template <typename T>
Q_ALWAYS_INLINE
T qUnalignedLoad(const void *ptr) Q_DECL_NOTHROW
{
T result;
#if QT_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
memcpy
#endif
/*memcpy*/(&result, ptr, sizeof result);
return result;
}
template <typename T>
Q_ALWAYS_INLINE
void qUnalignedStore(void *ptr, T t) Q_DECL_NOTHROW
{
#if QT_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
memcpy
#endif
/*memcpy*/(ptr, &t, sizeof t);
}
#undef QT_HAS_BUILTIN
QT_END_NAMESPACE QT_END_NAMESPACE
#endif // QSIMD_P_H #endif // QSIMD_P_H

View File

@ -577,7 +577,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
// we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes) // we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes)
if (uc + offset + 7 < e) { if (uc + offset + 7 < e) {
// same, but we're using an 8-byte load // same, but we're using an 8-byte load
__m128i chunk = _mm_cvtsi64_si128(*(const long long *)(c + offset)); __m128i chunk = _mm_cvtsi64_si128(qUnalignedLoad<long long>(c + offset));
__m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask); __m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask);
__m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset)); __m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset));