From 58f2aa907f63bd0be61b2b6e55511c0867b42683 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sun, 11 Nov 2018 18:57:12 -0800 Subject: [PATCH] Work around GCC bug in generating 64-bit population of SSE register We know what code we want it to generate, so I just replaced the _mm_set1_epi64x() with the code we want it to generate. Except that GCC sees through and tries to "optimize" my code... so that asm() statement makes it separate the two operations. This generates optimal code for both 32- and 64-bit. 64-bit: vmovq %rdi, %xmm0 vpbroadcastq %xmm0, %ymm0 32-bit: vmovq 8(%esp), %xmm0 vpbroadcastq %xmm0, %ymm0 See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87976 Change-Id: I42a48bd64ccc41aebf84fffd15664109b97fe42b Reviewed-by: Allan Sandfeld Jensen --- src/gui/painting/qdrawhelper_avx2.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp index 3a37b85366..2e36f538bd 100644 --- a/src/gui/painting/qdrawhelper_avx2.cpp +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -359,7 +359,18 @@ void Q_DECL_VECTORCALL qt_memfillXX_avx2(uchar *dest, __m256i value256, qsizetyp void qt_memfill64_avx2(quint64 *dest, quint64 value, qsizetype count) { - qt_memfillXX_avx2(reinterpret_cast(dest), _mm256_set1_epi64x(value), count * sizeof(quint64)); +#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) + // work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80820 + __m128i value64 = _mm_set_epi64x(0, value); // _mm_cvtsi64_si128(value); +# ifdef Q_PROCESSOR_X86_64 + asm ("" : "+x" (value64)); +# endif + __m256i value256 = _mm256_broadcastq_epi64(value64); +#else + __m256i value256 = _mm256_set1_epi64x(value); +#endif + + qt_memfillXX_avx2(reinterpret_cast(dest), value256, count * sizeof(quint64)); } void qt_memfill32_avx2(quint32 *dest, quint32 value, qsizetype count)