Only use 32bit version of qPremultiply

With auto-vectorization enabled in QtGui, the 32bit version of
qPremultiply is faster than the 64bit version since it can be vectorized
wider (4x on 128bit as opposed to 2x). Since all our important 64bit
targets have SIMD, that makes the 64bit version pointless.

Change-Id: I4e9070a3a3c8e2b54f17a95ba0aee0405cbb8ec9
Reviewed-by: Marc Mutz <marc.mutz@kdab.com>
This commit is contained in:
Allan Sandfeld Jensen 2015-01-27 14:08:17 +01:00
parent 1749c1dd42
commit 9be87085d8
2 changed files with 3 additions and 19 deletions

View File

@ -72,21 +72,7 @@ inline Q_DECL_CONSTEXPR int qGray(QRgb rgb) // convert RGB to gra
inline Q_DECL_CONSTEXPR bool qIsGray(QRgb rgb)
{ return qRed(rgb) == qGreen(rgb) && qRed(rgb) == qBlue(rgb); }
template <int ProcessorWordSize>
inline QRgb qPremultiply_impl(QRgb x);
template <> // 64-bit version
inline QRgb qPremultiply_impl<8>(QRgb x)
{
const uint a = qAlpha(x);
quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
t &= 0x000000ff00ff00ff;
return (uint(t)) | (uint(t >> 24)) | (a << 24);
}
template <> // 32-bit version
inline QRgb qPremultiply_impl<4>(QRgb x)
inline Q_DECL_RELAXED_CONSTEXPR QRgb qPremultiply(QRgb x)
{
const uint a = qAlpha(x);
uint t = (x & 0xff00ff) * a;
@ -96,12 +82,9 @@ inline QRgb qPremultiply_impl<4>(QRgb x)
x = ((x >> 8) & 0xff) * a;
x = (x + ((x >> 8) & 0xff) + 0x80);
x &= 0xff00;
x |= t | (a << 24);
return x;
return x | t | (a << 24);
}
inline QRgb qPremultiply(QRgb x) { return qPremultiply_impl<Q_PROCESSOR_WORDSIZE>(x); }
Q_GUI_EXPORT extern const uint qt_inv_premul_factor[];
inline QRgb qUnpremultiply(QRgb p)

View File

@ -147,6 +147,7 @@ void tst_QImageConversion::convertGeneric_data()
QTest::newRow("argb32 -> rgb666") << argb32 << QImage::Format_RGB666;
QTest::newRow("argb32 -> argb8565pm") << argb32 << QImage::Format_ARGB8565_Premultiplied;
QTest::newRow("argb32 -> argb4444pm") << argb32 << QImage::Format_ARGB4444_Premultiplied;
QTest::newRow("argb32 -> argb32pm") << argb32 << QImage::Format_ARGB32_Premultiplied;
}
void tst_QImageConversion::convertGeneric()