Use regular SSE2 stores instead of non-temporal ones in qt_memfill32

There's no good reason to use non-temporal stores. Images may be 100x
bigger than strings, but they're not so big that the non-temporal stores
make sense.

Patch-By: Allan Sandfeld Jensen <allan.jensen@qt.io>
Change-Id: Iba4b5c183776497d8ee1fffd156380ec9103ef1a
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
Thiago Macieira 2018-11-05 12:57:48 -08:00
parent f519cd501e
commit db3777dd50

View File

@ -266,20 +266,20 @@ void qt_memfill32(quint32 *dest, quint32 value, int count)
int count128 = count / 4;
__m128i *dst128 = reinterpret_cast<__m128i*>(dest);
__m128i *end128 = dst128 + count128;
const __m128i value128 = _mm_set_epi32(value, value, value, value);
const __m128i value128 = _mm_set1_epi32(value);
while (dst128 + 3 < end128) {
_mm_stream_si128(dst128 + 0, value128);
_mm_stream_si128(dst128 + 1, value128);
_mm_stream_si128(dst128 + 2, value128);
_mm_stream_si128(dst128 + 3, value128);
_mm_store_si128(dst128 + 0, value128);
_mm_store_si128(dst128 + 1, value128);
_mm_store_si128(dst128 + 2, value128);
_mm_store_si128(dst128 + 3, value128);
dst128 += 4;
}
switch (count128 & 0x3) {
case 3: _mm_stream_si128(dst128++, value128); Q_FALLTHROUGH();
case 2: _mm_stream_si128(dst128++, value128); Q_FALLTHROUGH();
case 1: _mm_stream_si128(dst128++, value128);
case 3: _mm_store_si128(dst128++, value128); Q_FALLTHROUGH();
case 2: _mm_store_si128(dst128++, value128); Q_FALLTHROUGH();
case 1: _mm_store_si128(dst128++, value128);
}
}