qsimd_p.h: add a hack to allow AVX to work with MinGW

GCC is unable to emit the SEH metadata about the stack aligning that is
required to execute AVX aligned instructions (VMOVDQA, VMOVAPS, etc.),
so it just doesn't align the stack. That causes crashes on a 50/50
chance every time the compiler attempts to address a stack-aligned
variable. In a debug-mode build, because it always loads & saves
everything on the stack, the chance of a crash happening is a near
certainty.

So we hack around it by going behind the compiler's back and instructing
the assembler to emit the unaligned counterparts of the instructions
every time the compiler wished to emit the aligned one. There's no
performance penalty: if the variable is actually aligned, the unaligned
instruction executes in the exact same time.

Change-Id: Ib42b3adc93bf4d43bd55fffd16c29cac0da18972
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Thiago Macieira 2021-12-20 21:11:17 -03:00
parent 970b249140
commit c062fed427
2 changed files with 23 additions and 3 deletions

View File

@ -161,9 +161,6 @@ attribute_target("sha") void test_shani()
#endif
#if T(AVX)
# if defined(__WIN64__) && defined(__GNUC__) && !defined(__clang__)
# error "AVX support is broken in 64-bit MinGW - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49001"
# endif
attribute_target("avx") void test_avx()
{
__m256d a = _mm256_setzero_pd();

View File

@ -190,6 +190,29 @@
# define __SSE__ 1
# endif
# if defined(Q_OS_WIN) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG)
// 64-bit GCC on Windows does not support AVX, so we hack around it by forcing
// it to emit unaligned loads & stores
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49001
asm(
".macro vmovapd args:vararg\n"
" vmovupd \\args\n"
".endm\n"
".macro vmovaps args:vararg\n"
" vmovups \\args\n"
".endm\n"
".macro vmovdqa args:vararg\n"
" vmovdqu \\args\n"
".endm\n"
".macro vmovdqa32 args:vararg\n"
" vmovdqu32 \\args\n"
".endm\n"
".macro vmovdqa64 args:vararg\n"
" vmovdqu64 \\args\n"
".endm\n"
);
# endif
# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_OS_WASM)
// GCC 4.4 and Clang 2.8 added a few more intrinsics there
# include <x86intrin.h>