Only use 32bit version of qPremultiply

With auto-vectorization enabled in QtGui, the 32bit version of qPremultiply is faster than the 64bit version since it can be vectorized wider (4x on 128bit as opposed to 2x). Since all our important 64bit targets have SIMD, that makes the 64bit version pointless. Change-Id: I4e9070a3a3c8e2b54f17a95ba0aee0405cbb8ec9 Reviewed-by: Marc Mutz <marc.mutz@kdab.com>
2015-01-27 14:08:17 +01:00 · 2015-01-27 14:08:17 +01:00 · 9be87085d8
commit 9be87085d8
parent 1749c1dd42
2 changed files with 3 additions and 19 deletions
--- a/src/gui/painting/qrgb.h
+++ b/src/gui/painting/qrgb.h
@ -72,21 +72,7 @@ inline Q_DECL_CONSTEXPR int qGray(QRgb rgb)                // convert RGB to gra
 inline Q_DECL_CONSTEXPR bool qIsGray(QRgb rgb)
 { return qRed(rgb) == qGreen(rgb) && qRed(rgb) == qBlue(rgb); }

-template <int ProcessorWordSize>
-inline QRgb qPremultiply_impl(QRgb x);
-
-template <> // 64-bit version
-inline QRgb qPremultiply_impl<8>(QRgb x)
-{
-    const uint a = qAlpha(x);
-    quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
-    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
-    t &= 0x000000ff00ff00ff;
-    return (uint(t)) | (uint(t >> 24)) | (a << 24);
-}
-
-template <> // 32-bit version
-inline QRgb qPremultiply_impl<4>(QRgb x)
+inline Q_DECL_RELAXED_CONSTEXPR QRgb qPremultiply(QRgb x)
 {
    const uint a = qAlpha(x);
    uint t = (x & 0xff00ff) * a;
@ -96,12 +82,9 @@ inline QRgb qPremultiply_impl<4>(QRgb x)
    x = ((x >> 8) & 0xff) * a;
    x = (x + ((x >> 8) & 0xff) + 0x80);
    x &= 0xff00;
-    x |= t | (a << 24);
-    return x;
+    return x | t | (a << 24);
 }

-inline QRgb qPremultiply(QRgb x) { return qPremultiply_impl<Q_PROCESSOR_WORDSIZE>(x); }
-
 Q_GUI_EXPORT extern const uint qt_inv_premul_factor[];

 inline QRgb qUnpremultiply(QRgb p)
--- a/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp
+++ b/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp
@ -147,6 +147,7 @@ void tst_QImageConversion::convertGeneric_data()
    QTest::newRow("argb32 -> rgb666") << argb32 << QImage::Format_RGB666;
    QTest::newRow("argb32 -> argb8565pm") << argb32 << QImage::Format_ARGB8565_Premultiplied;
    QTest::newRow("argb32 -> argb4444pm") << argb32 << QImage::Format_ARGB4444_Premultiplied;
+    QTest::newRow("argb32 -> argb32pm") << argb32 << QImage::Format_ARGB32_Premultiplied;
 }

 void tst_QImageConversion::convertGeneric()