Reapply SSE4 acceleration to ARGB32->ARGB32PM conversion

After the merger of fetch and convert, we were missing the hook
to the accelerated merged version of ARGB32->ARGB32PM conversion,
causing a minor performance regression.

Change-Id: I3965d1a95f2305306005db09640f2775aa645d2e
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Allan Sandfeld Jensen 2018-05-16 17:50:16 +02:00
parent ec97be5585
commit 8f6f9cbaa9
2 changed files with 22 additions and 2 deletions

View File

@ -6283,13 +6283,19 @@ static void qInitDrawhelperFunctions()
if (qCpuHasFeature(SSE4_1)) {
extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *);
extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *);
extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *);
extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *);
extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *);
qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;

View File

@ -45,7 +45,7 @@
QT_BEGIN_NAMESPACE
template<bool RGBA>
static inline void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count)
static void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count)
{
int i = 0;
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
@ -83,7 +83,7 @@ static inline void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int
_mm_storeu_si128((__m128i *)&buffer[i], srcVector);
}
} else {
_mm_storeu_si128((__m128i *)&buffer[i], _mm_setzero_si128());
_mm_storeu_si128((__m128i *)&buffer[i], zero);
}
}
@ -103,6 +103,20 @@ void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const Q
convertARGBToARGB32PM_sse4<true>(buffer, buffer, count);
}
const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
convertARGBToARGB32PM_sse4<false>(buffer, reinterpret_cast<const uint *>(src) + index, count);
return buffer;
}
const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
convertARGBToARGB32PM_sse4<true>(buffer, reinterpret_cast<const uint *>(src) + index, count);
return buffer;
}
void QT_FASTCALL storeRGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{