Unalias some core drawhelper loops
Some compilers will assume src and buffer are different and only vectorize the unaliased case and take a slow path when they are equal. In our case they are as often equal, so we need to manually unalias the variables to make sure both cases are fully optimized. Change-Id: I6ec86171dd179844facdf45376253c55980d9e36 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
076087717e
commit
fd80b8aaeb
@ -174,29 +174,31 @@ template<QImage::Format Format>
|
|||||||
static const uint *QT_FASTCALL convertToRGB32(uint *buffer, const uint *src, int count,
|
static const uint *QT_FASTCALL convertToRGB32(uint *buffer, const uint *src, int count,
|
||||||
const QVector<QRgb> *, QDitherInfo *)
|
const QVector<QRgb> *, QDitherInfo *)
|
||||||
{
|
{
|
||||||
Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
|
auto conversion = [](uint s) {
|
||||||
Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
|
// MSVC needs these constexpr defined in here otherwise it will create a capture.
|
||||||
Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
|
Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
|
||||||
|
Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
|
||||||
|
Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
|
||||||
|
|
||||||
Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
|
Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
|
||||||
Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
|
Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
|
||||||
Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
|
Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
|
||||||
|
|
||||||
Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
|
Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
|
||||||
Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
|
Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
|
||||||
Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
|
Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i) {
|
uint red = (s >> redShift<Format>()) & redMask;
|
||||||
uint red = (src[i] >> redShift<Format>()) & redMask;
|
uint green = (s >> greenShift<Format>()) & greenMask;
|
||||||
uint green = (src[i] >> greenShift<Format>()) & greenMask;
|
uint blue = (s >> blueShift<Format>()) & blueMask;
|
||||||
uint blue = (src[i] >> blueShift<Format>()) & blueMask;
|
|
||||||
|
|
||||||
red = ((red << redLeftShift) | (red >> redRightShift)) << 16;
|
red = ((red << redLeftShift) | (red >> redRightShift)) << 16;
|
||||||
green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8;
|
green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8;
|
||||||
blue = (blue << blueLeftShift) | (blue >> blueRightShift);
|
blue = (blue << blueLeftShift) | (blue >> blueRightShift);
|
||||||
buffer[i] = 0xff000000 | red | green | blue;
|
return 0xff000000 | red | green | blue;
|
||||||
}
|
};
|
||||||
|
|
||||||
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion);
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -348,21 +350,21 @@ static const uint *QT_FASTCALL convertRGBFromARGB32PM(uint *buffer, const uint *
|
|||||||
|
|
||||||
// RGB32 -> RGB888 is not a precision loss.
|
// RGB32 -> RGB888 is not a precision loss.
|
||||||
if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) {
|
if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) {
|
||||||
Q_CONSTEXPR uint rMask = (1 << rWidth) - 1;
|
auto conversion = [](uint s) {
|
||||||
Q_CONSTEXPR uint gMask = (1 << gWidth) - 1;
|
const uint c = fromRGB ? s : qUnpremultiply(s);
|
||||||
Q_CONSTEXPR uint bMask = (1 << bWidth) - 1;
|
Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
|
||||||
|
Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
|
||||||
|
Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
|
||||||
|
Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
|
||||||
|
Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
|
||||||
|
Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
|
||||||
|
|
||||||
Q_CONSTEXPR uchar rRightShift = 24 - rWidth;
|
|
||||||
Q_CONSTEXPR uchar gRightShift = 16 - gWidth;
|
|
||||||
Q_CONSTEXPR uchar bRightShift = 8 - bWidth;
|
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i) {
|
|
||||||
const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]);
|
|
||||||
const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
|
const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
|
||||||
const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
|
const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
|
||||||
const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
|
const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
|
||||||
buffer[i] = r | g | b;
|
return r | g | b;
|
||||||
}
|
};
|
||||||
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion);
|
||||||
} else {
|
} else {
|
||||||
// We do ordered dither by using a rounding conversion, but instead of
|
// We do ordered dither by using a rounding conversion, but instead of
|
||||||
// adding half of input precision, we add the adjusted result from the
|
// adding half of input precision, we add the adjusted result from the
|
||||||
@ -394,32 +396,32 @@ template<QImage::Format Format, bool fromRGB>
|
|||||||
static const uint *QT_FASTCALL convertARGBPMFromARGB32PM(uint *buffer, const uint *src, int count,
|
static const uint *QT_FASTCALL convertARGBPMFromARGB32PM(uint *buffer, const uint *src, int count,
|
||||||
const QVector<QRgb> *, QDitherInfo *dither)
|
const QVector<QRgb> *, QDitherInfo *dither)
|
||||||
{
|
{
|
||||||
Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
|
|
||||||
Q_CONSTEXPR uchar rWidth = redWidth<Format>();
|
|
||||||
Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
|
|
||||||
Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
|
|
||||||
|
|
||||||
if (!dither) {
|
if (!dither) {
|
||||||
Q_CONSTEXPR uint aMask = (1 << aWidth) - 1;
|
auto conversion = [](uint c) {
|
||||||
Q_CONSTEXPR uint rMask = (1 << rWidth) - 1;
|
Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1;
|
||||||
Q_CONSTEXPR uint gMask = (1 << gWidth) - 1;
|
Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
|
||||||
Q_CONSTEXPR uint bMask = (1 << bWidth) - 1;
|
Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
|
||||||
|
Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
|
||||||
|
|
||||||
Q_CONSTEXPR uchar aRightShift = 32 - aWidth;
|
Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>();
|
||||||
Q_CONSTEXPR uchar rRightShift = 24 - rWidth;
|
Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
|
||||||
Q_CONSTEXPR uchar gRightShift = 16 - gWidth;
|
Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
|
||||||
Q_CONSTEXPR uchar bRightShift = 8 - bWidth;
|
Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
|
||||||
|
|
||||||
Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>();
|
Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>();
|
||||||
for (int i = 0; i < count; ++i) {
|
|
||||||
const uint c = src[i];
|
|
||||||
const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>());
|
const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>());
|
||||||
const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
|
const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
|
||||||
const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
|
const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
|
||||||
const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
|
const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
|
||||||
buffer[i] = a | r | g | b;
|
return a | r | g | b;
|
||||||
}
|
};
|
||||||
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion);
|
||||||
} else {
|
} else {
|
||||||
|
Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
|
||||||
|
Q_CONSTEXPR uchar rWidth = redWidth<Format>();
|
||||||
|
Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
|
||||||
|
Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
|
||||||
|
|
||||||
const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
|
const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
const uint c = src[i];
|
const uint c = src[i];
|
||||||
@ -514,8 +516,7 @@ static const uint *QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, const uint
|
|||||||
static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count,
|
static const uint *QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, const uint *src, int count,
|
||||||
const QVector<QRgb> *, QDitherInfo *)
|
const QVector<QRgb> *, QDitherInfo *)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, RGBA2ARGB);
|
||||||
buffer[i] = RGBA2ARGB(src[i]);
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -568,8 +569,7 @@ static const uint *QT_FASTCALL convertARGB32FromARGB32PM(uint *buffer, const uin
|
|||||||
static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const uint *src, int count,
|
static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const uint *src, int count,
|
||||||
const QVector<QRgb> *, QDitherInfo *)
|
const QVector<QRgb> *, QDitherInfo *)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, ARGB2RGBA);
|
||||||
buffer[i] = ARGB2RGBA(src[i]);
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -695,8 +695,7 @@ static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM(uint *buffer, const u
|
|||||||
static const uint *QT_FASTCALL convertRGBXFromRGB32(uint *buffer, const uint *src, int count,
|
static const uint *QT_FASTCALL convertRGBXFromRGB32(uint *buffer, const uint *src, int count,
|
||||||
const QVector<QRgb> *, QDitherInfo *)
|
const QVector<QRgb> *, QDitherInfo *)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); });
|
||||||
buffer[i] = ARGB2RGBA(0xff000000 | src[i]);
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -713,8 +712,7 @@ static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const ui
|
|||||||
const QVector<QRgb> *, QDitherInfo *dither)
|
const QVector<QRgb> *, QDitherInfo *dither)
|
||||||
{
|
{
|
||||||
if (!dither) {
|
if (!dither) {
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>);
|
||||||
buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(src[i]);
|
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
const uint c = src[i];
|
const uint c = src[i];
|
||||||
@ -796,8 +794,7 @@ template<QtPixelOrder PixelOrder>
|
|||||||
static const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM(uint *buffer, const uint *src, int count,
|
static const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM(uint *buffer, const uint *src, int count,
|
||||||
const QVector<QRgb> *, QDitherInfo *)
|
const QVector<QRgb> *, QDitherInfo *)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertArgb32ToA2rgb30<PixelOrder>);
|
||||||
buffer[i] = qConvertArgb32ToA2rgb30<PixelOrder>(src[i]);
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -814,8 +811,7 @@ template<QtPixelOrder PixelOrder>
|
|||||||
static const uint *QT_FASTCALL convertRGB30FromARGB32PM(uint *buffer, const uint *src, int count,
|
static const uint *QT_FASTCALL convertRGB30FromARGB32PM(uint *buffer, const uint *src, int count,
|
||||||
const QVector<QRgb> *, QDitherInfo *)
|
const QVector<QRgb> *, QDitherInfo *)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertRgb32ToRgb30<PixelOrder>);
|
||||||
buffer[i] = qConvertRgb32ToRgb30<PixelOrder>(qUnpremultiply(src[i]));
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1104,22 +1104,30 @@ inline int qBlue565(quint16 rgb) {
|
|||||||
return (b << 3) | (b >> 2);
|
return (b << 3) | (b >> 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We manually unalias the variables to make sure the compiler
|
||||||
|
// fully optimizes both aliased and unaliased cases.
|
||||||
|
#define UNALIASED_CONVERSION_LOOP(buffer, src, count, conversion) \
|
||||||
|
if (src == buffer) { \
|
||||||
|
for (int i = 0; i < count; ++i) \
|
||||||
|
buffer[i] = conversion(buffer[i]); \
|
||||||
|
} else { \
|
||||||
|
for (int i = 0; i < count; ++i) \
|
||||||
|
buffer[i] = conversion(src[i]); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static Q_ALWAYS_INLINE const uint *qt_convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count)
|
static Q_ALWAYS_INLINE const uint *qt_convertARGB32ToARGB32PM(uint *buffer, const uint *src, int count)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, qPremultiply);
|
||||||
buffer[i] = qPremultiply(src[i]);
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Q_ALWAYS_INLINE const uint *qt_convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count)
|
static Q_ALWAYS_INLINE const uint *qt_convertRGBA8888ToARGB32PM(uint *buffer, const uint *src, int count)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < count; ++i)
|
UNALIASED_CONVERSION_LOOP(buffer, src, count, [](uint s) { return qPremultiply(RGBA2ARGB(s));});
|
||||||
buffer[i] = qPremultiply(RGBA2ARGB(src[i]));
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const uint qt_bayer_matrix[16][16] = {
|
const uint qt_bayer_matrix[16][16] = {
|
||||||
{ 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc,
|
{ 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc,
|
||||||
0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff},
|
0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff},
|
||||||
|
Loading…
Reference in New Issue
Block a user