Avoid using QRgba64 for buffers

Benchmarking showed most time rendering in rgb64 mode was spend on
memsetting the buffers because they were not declared with a primitive
type.

This patch changes the buffers to quint64, but leaves refactoring
function arguments to a later patch in the dev branch.

Change-Id: Iacc81b0d8e9570b1975dffb85c955b0aabb096a7
Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
This commit is contained in:
Allan Sandfeld Jensen 2016-12-12 14:38:34 +01:00
parent 20fd99d863
commit bee9a78e13

View File

@ -2870,8 +2870,8 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
uint sbuf1[buffer_size];
uint sbuf2[buffer_size];
QRgba64 buf1[buffer_size];
QRgba64 buf2[buffer_size];
quint64 buf1[buffer_size];
quint64 buf2[buffer_size];
QRgba64 *b = buffer;
while (length) {
int len = qMin(length, buffer_size / 2);
@ -2947,9 +2947,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
fx += fdx;
}
layout->convertToARGB64PM(buf1, sbuf1, len * 2, clut, 0);
layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
if (disty)
layout->convertToARGB64PM(buf2, sbuf2, len * 2, clut, 0);
layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
for (int i = 0; i < len; ++i) {
int distx = (fracX & 0x0000ffff);
@ -2967,7 +2967,7 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
_mm_storel_epi64((__m128i*)(b+i), vt);
#else
b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
#endif
fracX += fdx;
}
@ -2978,8 +2978,8 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
uint sbuf1[buffer_size];
uint sbuf2[buffer_size];
QRgba64 buf1[buffer_size];
QRgba64 buf2[buffer_size];
quint64 buf1[buffer_size];
quint64 buf2[buffer_size];
QRgba64 *end = buffer + length;
QRgba64 *b = buffer;
@ -3087,13 +3087,13 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
fx += fdx;
fy += fdy;
}
layout->convertToARGB64PM(buf1, sbuf1, len * 2, clut, 0);
layout->convertToARGB64PM(buf2, sbuf2, len * 2, clut, 0);
layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
for (int i = 0; i < len; ++i) {
int distx = (fracX & 0x0000ffff);
int disty = (fracY & 0x0000ffff);
b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
fracX += fdx;
fracY += fdy;
}
@ -3110,8 +3110,8 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
uint sbuf1[buffer_size];
uint sbuf2[buffer_size];
QRgba64 buf1[buffer_size];
QRgba64 buf2[buffer_size];
quint64 buf1[buffer_size];
quint64 buf2[buffer_size];
QRgba64 *b = buffer;
int distxs[buffer_size / 2];
@ -3159,13 +3159,13 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
fw += fdw;
}
layout->convertToARGB64PM(buf1, sbuf1, len * 2, clut, 0);
layout->convertToARGB64PM(buf2, sbuf2, len * 2, clut, 0);
layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
for (int i = 0; i < len; ++i) {
int distx = distxs[i];
int disty = distys[i];
b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
}
length -= len;
@ -3712,7 +3712,7 @@ void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
return blend_color_generic(count, spans, userData);
}
QRgba64 buffer[buffer_size];
quint64 buffer[buffer_size];
const QRgba64 color = data->solid.color;
while (count--) {
@ -3720,7 +3720,7 @@ void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
int length = spans->len;
while (length) {
int l = qMin(buffer_size, length);
QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.funcSolid64(dest, l, color, spans->coverage);
op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
length -= l;
@ -3901,11 +3901,11 @@ public:
}
};
class BlendSrcGenericRGB64 : public QBlendBase<QRgba64>
class BlendSrcGenericRGB64 : public QBlendBase<quint64>
{
public:
BlendSrcGenericRGB64(QSpanData *d, const Operator &o)
: QBlendBase<QRgba64>(d, o)
: QBlendBase<quint64>(d, o)
{
}
@ -3914,20 +3914,20 @@ public:
return op.func64 && op.destFetch64 && op.destStore64;
}
const QRgba64 *fetch(int x, int y, int len)
const quint64 *fetch(int x, int y, int len)
{
dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
return op.srcFetch64(src_buffer, &op, data, y, x, len);
dest = (quint64 *)op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, y, len);
return (const quint64 *)op.srcFetch64((QRgba64 *)src_buffer, &op, data, y, x, len);
}
void process(int, int, int len, int coverage, const QRgba64 *src, int offset)
void process(int, int, int len, int coverage, const quint64 *src, int offset)
{
op.func64(dest + offset, src + offset, len, coverage);
op.func64((QRgba64 *)dest + offset, (const QRgba64 *)src + offset, len, coverage);
}
void store(int x, int y, int len)
{
op.destStore64(data->rasterBuffer, x, y, dest, len);
op.destStore64(data->rasterBuffer, x, y, (QRgba64 *)dest, len);
}
};
@ -4006,8 +4006,8 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
qWarning("Unsupported blend");
return blend_untransformed_generic(count, spans, userData);
}
QRgba64 buffer[buffer_size];
QRgba64 src_buffer[buffer_size];
quint64 buffer[buffer_size];
quint64 src_buffer[buffer_size];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
@ -4031,8 +4031,8 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
int l = qMin(buffer_size, length);
const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.func64(dest, src, l, coverage);
op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
x += l;
@ -4247,8 +4247,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
qDebug("unsupported rgb64 blend");
return blend_tiled_generic(count, spans, userData);
}
QRgba64 buffer[buffer_size];
QRgba64 src_buffer[buffer_size];
quint64 buffer[buffer_size];
quint64 src_buffer[buffer_size];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
@ -4275,8 +4275,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
int l = qMin(image_width - sx, length);
if (buffer_size < l)
l = buffer_size;
const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.func64(dest, src, l, coverage);
op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
x += l;