Multithread qdrawhelper routines

Adds multi-threading to span handling routines in
qdrawhelper, effectively multi-threading almost all fills.

Also increases the number of spans processed at a time to
make the multithreading wider.

Change-Id: Iad04e187f880666855bebcd87d668f6c6fb5db7b
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
This commit is contained in:
Allan Sandfeld Jensen 2019-11-26 17:50:25 +01:00
parent 3346611a47
commit b18959ad7e
2 changed files with 548 additions and 478 deletions

View File

@ -61,6 +61,15 @@
#include <qloggingcategory.h>
#include <qmath.h>
#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
#define QT_USE_THREAD_PARALLEL_FILLS
#endif
#if defined(QT_USE_THREAD_PARALLEL_FILLS)
#include <qsemaphore.h>
#include <qthreadpool.h>
#endif
QT_BEGIN_NAMESPACE
Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
@ -3802,36 +3811,60 @@ static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP b
// -------------------- blend methods ---------------------
#if defined(QT_USE_THREAD_PARALLEL_FILLS)
#define QT_THREAD_PARALLEL_FILLS(function) \
const int segments = (count + 32) / 64; \
QThreadPool *threadPool = QThreadPool::globalInstance(); \
if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) { \
QSemaphore semaphore; \
int c = 0; \
for (int i = 0; i < segments; ++i) { \
int cn = (count - c) / (segments - i); \
threadPool->start([&, c, cn]() { \
function(c, c + cn); \
semaphore.release(1); \
}); \
c += cn; \
} \
semaphore.acquire(segments); \
} else \
function(0, count)
#else
#define QT_THREAD_PARALLEL_FILLS(function) function(0, count)
#endif
static void blend_color_generic(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
uint buffer[BufferSize];
Operator op = getOperator(data, nullptr, 0);
const Operator op = getOperator(data, nullptr, 0);
const uint color = data->solidColor.rgba();
const bool solidFill = op.mode == QPainter::CompositionMode_Source;
const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
while (count--) {
int x = spans->x;
int length = spans->len;
if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length) {
auto function = [=] (int cStart, int cEnd) {
alignas(16) uint buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
int x = spans[c].x;
int length = spans[c].len;
if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore) {
// If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
op.destStore(data->rasterBuffer, x, spans->y, &color, 1);
spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
op.destStore(data->rasterBuffer, x, spans[c].y, &color, 1);
spanfill_from_first(data->rasterBuffer, bpp, x, spans[c].y, length);
length = 0;
}
while (length) {
int l = qMin(BufferSize, length);
uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
op.funcSolid(dest, l, color, spans->coverage);
uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l);
op.funcSolid(dest, l, color, spans[c].coverage);
if (op.destStore)
op.destStore(data->rasterBuffer, x, spans->y, dest, l);
op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
length -= l;
x += l;
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
static void blend_color_argb(int count, const QSpan *spans, void *userData)
@ -3861,50 +3894,55 @@ static void blend_color_argb(int count, const QSpan *spans, void *userData)
}
return;
}
while (count--) {
uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
op.funcSolid(target, spans->len, color, spans->coverage);
++spans;
const auto funcSolid = op.funcSolid;
auto function = [=] (int cStart, int cEnd) {
for (int c = cStart; c < cEnd; ++c) {
uint *target = ((uint *)data->rasterBuffer->scanLine(spans[c].y)) + spans[c].x;
funcSolid(target, spans[c].len, color, spans[c].coverage);
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
static void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
{
#if QT_CONFIG(raster_64bit)
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, nullptr, 0);
const Operator op = getOperator(data, nullptr, 0);
if (!op.funcSolid64) {
qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
return blend_color_generic(count, spans, userData);
}
alignas(8) QRgba64 buffer[BufferSize];
const QRgba64 color = data->solidColor.rgba64();
const bool solidFill = op.mode == QPainter::CompositionMode_Source;
const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
while (count--) {
int x = spans->x;
int length = spans->len;
if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length && op.destStore64) {
auto function = [=, &op] (int cStart, int cEnd)
{
alignas(16) QRgba64 buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
int x = spans[c].x;
int length = spans[c].len;
if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore64) {
// If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
op.destStore64(data->rasterBuffer, x, spans->y, &color, 1);
spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
op.destStore64(data->rasterBuffer, x, spans[c].y, &color, 1);
spanfill_from_first(data->rasterBuffer, bpp, x, spans[c].y, length);
length = 0;
}
while (length) {
int l = qMin(BufferSize, length);
QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
op.funcSolid64(dest, l, color, spans->coverage);
QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l);
op.funcSolid64(dest, l, color, spans[c].coverage);
if (op.destStore64)
op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
length -= l;
x += l;
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
#else
blend_color_generic(count, spans, userData);
#endif
@ -3914,67 +3952,70 @@ static void blend_color_generic_fp(int count, const QSpan *spans, void *userData
{
#if QT_CONFIG(raster_fp)
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, nullptr, 0);
const Operator op = getOperator(data, nullptr, 0);
if (!op.funcSolidFP || !op.destFetchFP) {
qCDebug(lcQtGuiDrawHelper, "blend_color_generic_fp: unsupported 4xF16 blend attempted, falling back to 32-bit");
return blend_color_generic(count, spans, userData);
}
QRgbaFloat32 buffer[BufferSize];
float r, g, b, a;
data->solidColor.getRgbF(&r, &g, &b, &a);
const QRgbaFloat32 color{r, g, b, a};
const bool solidFill = op.mode == QPainter::CompositionMode_Source;
QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
while (count--) {
int x = spans->x;
int length = spans->len;
if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length && op.destStoreFP) {
auto function = [=, &op] (int cStart, int cEnd)
{
alignas(16) QRgbaFloat32 buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
int x = spans[c].x;
int length = spans[c].len;
if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStoreFP) {
// If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
op.destStoreFP(data->rasterBuffer, x, spans->y, &color, 1);
spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
op.destStoreFP(data->rasterBuffer, x, spans[c].y, &color, 1);
spanfill_from_first(data->rasterBuffer, bpp, x, spans[c].y, length);
length = 0;
}
while (length) {
int l = qMin(BufferSize, length);
QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans->y, l);
op.funcSolidFP(dest, l, color, spans->coverage);
QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l);
op.funcSolidFP(dest, l, color, spans[c].coverage);
if (op.destStoreFP)
op.destStoreFP(data->rasterBuffer, x, spans->y, dest, l);
op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
length -= l;
x += l;
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
#else
blend_color_generic(count, spans, userData);
#endif
}
template <typename T>
void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handler)
void handleSpans(int count, const QSpan *spans, const QSpanData *data, const Operator &op)
{
uint const_alpha = 256;
if (data->type == QSpanData::Texture)
const_alpha = data->texture.const_alpha;
const bool solidSource = handler.op.mode == QPainter::CompositionMode_Source && const_alpha == 256;
const int const_alpha = (data->type == QSpanData::Texture) ? data->texture.const_alpha : 256;
const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256;
auto function = [=, &op] (int cStart, int cEnd)
{
T handler(data, op);
int coverage = 0;
while (count) {
if (!spans->len) {
++spans;
--count;
for (int c = cStart; c < cEnd;) {
if (!spans[c].len) {
++c;
continue;
}
int x = spans->x;
const int y = spans->y;
int right = x + spans->len;
const bool fetchDest = !solidSource || spans->coverage < 255;
int x = spans[c].x;
const int y = spans[c].y;
int right = x + spans[c].len;
const bool fetchDest = !solidSource || spans[c].coverage < 255;
// compute length of adjacent spans
for (int i = 1; i < count && spans[i].y == y && spans[i].x == right && fetchDest == (!solidSource || spans[i].coverage < 255); ++i)
for (int i = c + 1; i < cEnd && spans[i].y == y && spans[i].x == right && fetchDest == (!solidSource || spans[i].coverage < 255); ++i)
right += spans[i].len;
int length = right - x;
@ -3988,10 +4029,10 @@ void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handle
const auto *src = handler.fetch(process_x, y, process_length, fetchDest);
int offset = 0;
while (l > 0) {
if (x == spans->x) // new span?
coverage = (spans->coverage * const_alpha) >> 8;
if (x == spans[c].x) // new span?
coverage = (spans[c].coverage * const_alpha) >> 8;
int right = spans->x + spans->len;
int right = spans[c].x + spans[c].len;
int len = qMin(l, right - x);
handler.process(x, y, len, coverage, src, offset);
@ -4000,20 +4041,20 @@ void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handle
x += len;
offset += len;
if (x == right) { // done with current span?
++spans;
--count;
}
if (x == right) // done with current span?
++c;
}
handler.store(process_x, y, process_length);
}
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
struct QBlendBase
{
const QSpanData *data;
const Operator op;
const Operator &op;
};
class BlendSrcGeneric : public QBlendBase
@ -4094,7 +4135,7 @@ public:
QRgbaFloat32 *dest = nullptr;
alignas(16) QRgbaFloat32 buffer[BufferSize];
alignas(16) QRgbaFloat32 src_buffer[BufferSize];
BlendSrcGenericRGBFP(QSpanData *d, const Operator &o)
BlendSrcGenericRGBFP(const QSpanData *d, const Operator &o)
: QBlendBase{d, o}
{
}
@ -4129,22 +4170,20 @@ public:
static void blend_src_generic(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
BlendSrcGeneric blend(data, getOperator(data, spans, count));
handleSpans(count, spans, data, blend);
const Operator op = getOperator(data, nullptr, 0);
handleSpans<BlendSrcGeneric>(count, spans, data, op);
}
#if QT_CONFIG(raster_64bit)
static void blend_src_generic_rgb64(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, spans, count);
BlendSrcGenericRGB64 blend64(data, op);
if (blend64.isSupported())
handleSpans(count, spans, data, blend64);
else {
const Operator op = getOperator(data, nullptr, 0);
if (op.func64 && op.destFetch64) {
handleSpans<BlendSrcGenericRGB64>(count, spans, data, op);
} else {
qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
BlendSrcGeneric blend32(data, op);
handleSpans(count, spans, data, blend32);
handleSpans<BlendSrcGeneric>(count, spans, data, op);
}
}
#endif
@ -4153,14 +4192,12 @@ static void blend_src_generic_rgb64(int count, const QSpan *spans, void *userDat
static void blend_src_generic_fp(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, spans, count);
BlendSrcGenericRGBFP blendFP(data, op);
if (blendFP.isSupported())
handleSpans(count, spans, data, blendFP);
else {
const Operator op = getOperator(data, spans, count);
if (op.funcFP && op.destFetchFP && op.srcFetchFP) {
handleSpans<BlendSrcGenericRGBFP>(count, spans, data, op);
} else {
qCDebug(lcQtGuiDrawHelper, "blend_src_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit");
BlendSrcGeneric blend32(data, op);
handleSpans(count, spans, data, blend32);
handleSpans<BlendSrcGeneric>(count, spans, data, op);
}
}
#endif
@ -4169,24 +4206,27 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
uint buffer[BufferSize];
uint src_buffer[BufferSize];
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
const int image_width = data->texture.width;
const int image_height = data->texture.height;
int xoff = -qRound(-data->dx);
int yoff = -qRound(-data->dy);
const bool solidSource = op.mode == QPainter::CompositionMode_Source && data->texture.const_alpha == 256 && op.destFetch != destFetchARGB32P;
const int const_alpha = data->texture.const_alpha;
const int xoff = -qRound(-data->dx);
const int yoff = -qRound(-data->dy);
const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch != destFetchARGB32P;
for (; count--; spans++) {
if (!spans->len)
auto function = [=, &op] (int cStart, int cEnd)
{
alignas(16) uint buffer[BufferSize];
alignas(16) uint src_buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
if (!spans[c].len)
continue;
int x = spans->x;
int length = spans->len;
int x = spans[c].x;
int length = spans[c].len;
int sx = xoff + x;
int sy = yoff + spans->y;
const bool fetchDest = !solidSource || spans->coverage < 255;
int sy = yoff + spans[c].y;
const bool fetchDest = !solidSource || spans[c].coverage < 255;
if (sy >= 0 && sy < image_height && sx < image_width) {
if (sx < 0) {
x -= sx;
@ -4196,14 +4236,14 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use
if (sx + length > image_width)
length = image_width - sx;
if (length > 0) {
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * const_alpha) >> 8;
while (length) {
int l = qMin(BufferSize, length);
const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
uint *dest = fetchDest ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
uint *dest = fetchDest ? op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
op.func(dest, src, l, coverage);
if (op.destStore)
op.destStore(data->rasterBuffer, x, spans->y, dest, l);
op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
x += l;
sx += l;
length -= l;
@ -4211,6 +4251,8 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use
}
}
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
#if QT_CONFIG(raster_64bit)
@ -4218,28 +4260,31 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
if (!op.func64) {
qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
return blend_untransformed_generic(count, spans, userData);
}
alignas(8) QRgba64 buffer[BufferSize];
alignas(8) QRgba64 src_buffer[BufferSize];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
int xoff = -qRound(-data->dx);
int yoff = -qRound(-data->dy);
const bool solidSource = op.mode == QPainter::CompositionMode_Source && data->texture.const_alpha == 256 && op.destFetch64 != destFetchRGB64;
const int const_alpha = data->texture.const_alpha;
const int xoff = -qRound(-data->dx);
const int yoff = -qRound(-data->dy);
const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch64 != destFetchRGB64;
for (; count--; spans++) {
if (!spans->len)
auto function = [=, &op] (int cStart, int cEnd)
{
alignas(16) QRgba64 buffer[BufferSize];
alignas(16) QRgba64 src_buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
if (!spans[c].len)
continue;
int x = spans->x;
int length = spans->len;
int x = spans[c].x;
int length = spans[c].len;
int sx = xoff + x;
int sy = yoff + spans->y;
const bool fetchDest = !solidSource || spans->coverage < 255;
int sy = yoff + spans[c].y;
const bool fetchDest = !solidSource || spans[c].coverage < 255;
if (sy >= 0 && sy < image_height && sx < image_width) {
if (sx < 0) {
x -= sx;
@ -4249,14 +4294,14 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
if (sx + length > image_width)
length = image_width - sx;
if (length > 0) {
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * const_alpha) >> 8;
while (length) {
int l = qMin(BufferSize, length);
const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = fetchDest ? op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
QRgba64 *dest = fetchDest ? op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
op.func64(dest, src, l, coverage);
if (op.destStore64)
op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
x += l;
sx += l;
length -= l;
@ -4264,6 +4309,8 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
}
}
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
#endif
@ -4272,28 +4319,30 @@ static void blend_untransformed_generic_fp(int count, const QSpan *spans, void *
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
if (!op.funcFP) {
qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgbaf16: unsupported 4xFP16 blend attempted, falling back to 32-bit");
return blend_untransformed_generic(count, spans, userData);
}
QRgbaFloat32 buffer[BufferSize];
QRgbaFloat32 src_buffer[BufferSize];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
int xoff = -qRound(-data->dx);
int yoff = -qRound(-data->dy);
const int xoff = -qRound(-data->dx);
const int yoff = -qRound(-data->dy);
const bool solidSource = op.mode == QPainter::CompositionMode_Source && data->texture.const_alpha == 256 && op.destFetchFP != destFetchRGBFP;
for (; count--; spans++) {
if (!spans->len)
auto function = [=, &op] (int cStart, int cEnd)
{
alignas(16) QRgbaFloat32 buffer[BufferSize];
alignas(16) QRgbaFloat32 src_buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
if (!spans[c].len)
continue;
int x = spans->x;
int length = spans->len;
int x = spans[c].x;
int length = spans[c].len;
int sx = xoff + x;
int sy = yoff + spans->y;
const bool fetchDest = !solidSource || spans->coverage < 255;
int sy = yoff + spans[c].y;
const bool fetchDest = !solidSource || spans[c].coverage < 255;
if (sy >= 0 && sy < image_height && sx < image_width) {
if (sx < 0) {
x -= sx;
@ -4303,14 +4352,14 @@ static void blend_untransformed_generic_fp(int count, const QSpan *spans, void *
if (sx + length > image_width)
length = image_width - sx;
if (length > 0) {
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
while (length) {
int l = qMin(BufferSize, length);
const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l);
QRgbaFloat32 *dest = fetchDest ? op.destFetchFP(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
QRgbaFloat32 *dest = fetchDest ? op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
op.funcFP(dest, src, l, coverage);
if (op.destStoreFP)
op.destStoreFP(data->rasterBuffer, x, spans->y, dest, l);
op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
x += l;
sx += l;
length -= l;
@ -4318,6 +4367,8 @@ static void blend_untransformed_generic_fp(int count, const QSpan *spans, void *
}
}
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
#endif
@ -4330,20 +4381,23 @@ static void blend_untransformed_argb(int count, const QSpan *spans, void *userDa
return;
}
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
const int image_width = data->texture.width;
const int image_height = data->texture.height;
int xoff = -qRound(-data->dx);
int yoff = -qRound(-data->dy);
const int const_alpha = data->texture.const_alpha;
const int xoff = -qRound(-data->dx);
const int yoff = -qRound(-data->dy);
for (; count--; spans++) {
if (!spans->len)
auto function = [=, &op] (int cStart, int cEnd)
{
for (int c = cStart; c < cEnd; ++c) {
if (!spans[c].len)
continue;
int x = spans->x;
int length = spans->len;
int x = spans[c].x;
int length = spans[c].len;
int sx = xoff + x;
int sy = yoff + spans->y;
int sy = yoff + spans[c].y;
if (sy >= 0 && sy < image_height && sx < image_width) {
if (sx < 0) {
x -= sx;
@ -4353,13 +4407,15 @@ static void blend_untransformed_argb(int count, const QSpan *spans, void *userDa
if (sx + length > image_width)
length = image_width - sx;
if (length > 0) {
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * const_alpha) >> 8;
const uint *src = (const uint *)data->texture.scanLine(sy) + sx;
uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x;
uint *dest = ((uint *)data->rasterBuffer->scanLine(spans[c].y)) + x;
op.func(dest, src, length, coverage);
}
}
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
@ -4431,22 +4487,19 @@ static void blend_untransformed_rgb565(int count, const QSpan *spans, void *user
int xoff = -qRound(-data->dx);
int yoff = -qRound(-data->dy);
const QSpan *end = spans + count;
while (spans < end) {
if (!spans->len) {
++spans;
auto function = [=](int cStart, int cEnd)
{
for (int c = cStart; c < cEnd; ++c) {
if (!spans[c].len)
continue;
}
const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
if (coverage == 0) {
++spans;
const quint8 coverage = (data->texture.const_alpha * spans[c].coverage) >> 8;
if (coverage == 0)
continue;
}
int x = spans->x;
int length = spans->len;
int x = spans[c].x;
int length = spans[c].len;
int sx = xoff + x;
int sy = yoff + spans->y;
int sy = yoff + spans[c].y;
if (sy >= 0 && sy < image_height && sx < image_width) {
if (sx < 0) {
x -= sx;
@ -4456,7 +4509,7 @@ static void blend_untransformed_rgb565(int count, const QSpan *spans, void *user
if (sx + length > image_width)
length = image_width - sx;
if (length > 0) {
quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans->y) + x;
quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(spans[c].y) + x;
const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
if (coverage == 255) {
memcpy(dest, src, length * sizeof(quint16));
@ -4468,20 +4521,20 @@ static void blend_untransformed_rgb565(int count, const QSpan *spans, void *user
}
}
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
uint buffer[BufferSize];
uint src_buffer[BufferSize];
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
const int image_width = data->texture.width;
const int image_height = data->texture.height;
const int const_alpha = data->texture.const_alpha;
int xoff = -qRound(-data->dx) % image_width;
int yoff = -qRound(-data->dy) % image_height;
@ -4490,34 +4543,39 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
if (yoff < 0)
yoff += image_height;
while (count--) {
int x = spans->x;
int length = spans->len;
int sx = (xoff + spans->x) % image_width;
int sy = (spans->y + yoff) % image_height;
auto function = [=, &op](int cStart, int cEnd)
{
alignas(16) uint buffer[BufferSize];
alignas(16) uint src_buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
int x = spans[c].x;
int length = spans[c].len;
int sx = (xoff + spans[c].x) % image_width;
int sy = (spans[c].y + yoff) % image_height;
if (sx < 0)
sx += image_width;
if (sy < 0)
sy += image_height;
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * const_alpha) >> 8;
while (length) {
int l = qMin(image_width - sx, length);
if (BufferSize < l)
l = BufferSize;
const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l);
op.func(dest, src, l, coverage);
if (op.destStore)
op.destStore(data->rasterBuffer, x, spans->y, dest, l);
op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
x += l;
sx += l;
length -= l;
if (sx >= image_width)
sx = 0;
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
#if QT_CONFIG(raster_64bit)
@ -4525,13 +4583,11 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
if (!op.func64) {
qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
return blend_tiled_generic(count, spans, userData);
}
alignas(8) QRgba64 buffer[BufferSize];
alignas(8) QRgba64 src_buffer[BufferSize];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
@ -4546,6 +4602,7 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 || isBpp64)) {
alignas(16) QRgba64 src_buffer[BufferSize];
// If destination isn't blended into the result, we can do the tiling directly on destination pixels.
while (count--) {
int x = spans->x;
@ -4593,34 +4650,39 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
return;
}
while (count--) {
int x = spans->x;
int length = spans->len;
int sx = (xoff + spans->x) % image_width;
int sy = (spans->y + yoff) % image_height;
auto function = [=, &op](int cStart, int cEnd)
{
alignas(16) QRgba64 buffer[BufferSize];
alignas(16) QRgba64 src_buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
int x = spans[c].x;
int length = spans[c].len;
int sx = (xoff + spans[c].x) % image_width;
int sy = (spans[c].y + yoff) % image_height;
if (sx < 0)
sx += image_width;
if (sy < 0)
sy += image_height;
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
while (length) {
int l = qMin(image_width - sx, length);
if (BufferSize < l)
l = BufferSize;
const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l);
op.func64(dest, src, l, coverage);
if (op.destStore64)
op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
x += l;
sx += l;
length -= l;
if (sx >= image_width)
sx = 0;
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
#endif
@ -4629,13 +4691,11 @@ static void blend_tiled_generic_fp(int count, const QSpan *spans, void *userData
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
if (!op.funcFP) {
qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit");
return blend_tiled_generic(count, spans, userData);
}
QRgbaFloat32 buffer[BufferSize];
QRgbaFloat32 src_buffer[BufferSize];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
@ -4649,34 +4709,39 @@ static void blend_tiled_generic_fp(int count, const QSpan *spans, void *userData
// Consider tiling optimizing like the other versions.
while (count--) {
int x = spans->x;
int length = spans->len;
int sx = (xoff + spans->x) % image_width;
int sy = (spans->y + yoff) % image_height;
auto function = [=, &op](int cStart, int cEnd)
{
alignas(16) QRgbaFloat32 buffer[BufferSize];
alignas(16) QRgbaFloat32 src_buffer[BufferSize];
for (int c = cStart; c < cEnd; ++c) {
int x = spans[c].x;
int length = spans[c].len;
int sx = (xoff + spans[c].x) % image_width;
int sy = (spans[c].y + yoff) % image_height;
if (sx < 0)
sx += image_width;
if (sy < 0)
sy += image_height;
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
while (length) {
int l = qMin(image_width - sx, length);
if (BufferSize < l)
l = BufferSize;
const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l);
QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans->y, l);
QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l);
op.funcFP(dest, src, l, coverage);
if (op.destStoreFP)
op.destStoreFP(data->rasterBuffer, x, spans->y, dest, l);
op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
x += l;
sx += l;
length -= l;
if (sx >= image_width)
sx = 0;
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
#endif
@ -4689,10 +4754,10 @@ static void blend_tiled_argb(int count, const QSpan *spans, void *userData)
return;
}
Operator op = getOperator(data, spans, count);
const Operator op = getOperator(data, spans, count);
int image_width = data->texture.width;
int image_height = data->texture.height;
const int image_width = data->texture.width;
const int image_height = data->texture.height;
int xoff = -qRound(-data->dx) % image_width;
int yoff = -qRound(-data->dy) % image_height;
@ -4700,33 +4765,37 @@ static void blend_tiled_argb(int count, const QSpan *spans, void *userData)
xoff += image_width;
if (yoff < 0)
yoff += image_height;
const auto func = op.func;
const int const_alpha = data->texture.const_alpha;
while (count--) {
int x = spans->x;
int length = spans->len;
int sx = (xoff + spans->x) % image_width;
int sy = (spans->y + yoff) % image_height;
auto function = [=] (int cStart, int cEnd) {
for (int c = cStart; c < cEnd; ++c) {
int x = spans[c].x;
int length = spans[c].len;
int sx = (xoff + spans[c].x) % image_width;
int sy = (spans[c].y + yoff) % image_height;
if (sx < 0)
sx += image_width;
if (sy < 0)
sy += image_height;
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
const int coverage = (spans[c].coverage * const_alpha) >> 8;
while (length) {
int l = qMin(image_width - sx, length);
if (BufferSize < l)
l = BufferSize;
const uint *src = (const uint *)data->texture.scanLine(sy) + sx;
uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x;
op.func(dest, src, l, coverage);
uint *dest = ((uint *)data->rasterBuffer->scanLine(spans[c].y)) + x;
func(dest, src, l, coverage);
x += l;
sx += l;
length -= l;
if (sx >= image_width)
sx = 0;
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
@ -4752,17 +4821,17 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
if (yoff < 0)
yoff += image_height;
while (count--) {
const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
if (coverage == 0) {
++spans;
const int const_alpha = data->texture.const_alpha;
auto function = [=] (int cStart, int cEnd) {
for (int c = cStart; c < cEnd; ++c) {
const quint8 coverage = (const_alpha * spans[c].coverage) >> 8;
if (coverage == 0)
continue;
}
int x = spans->x;
int length = spans->len;
int sx = (xoff + spans->x) % image_width;
int sy = (spans->y + yoff) % image_height;
int x = spans[c].x;
int length = spans[c].len;
int sx = (xoff + spans[c].x) % image_width;
int sy = (spans[c].y + yoff) % image_height;
if (sx < 0)
sx += image_width;
if (sy < 0)
@ -4776,7 +4845,7 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
int l = qMin(image_width - sx, length);
if (BufferSize < l)
l = BufferSize;
quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + tx;
quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans[c].y)) + tx;
const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
memcpy(dest, src, l * sizeof(quint16));
length -= l;
@ -4792,9 +4861,9 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
// We are dealing with one block of data
// - More likely to fit in the cache
// - can use memcpy
int copy_image_width = qMin(image_width, int(spans->len));
length = spans->len - copy_image_width;
quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x;
int copy_image_width = qMin(image_width, int(spans[c].len));
length = spans[c].len - copy_image_width;
quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(spans[c].y)) + x;
quint16 *dest = src + copy_image_width;
while (copy_image_width < length) {
memcpy(dest, src, copy_image_width * sizeof(quint16));
@ -4812,7 +4881,7 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
int l = qMin(image_width - sx, length);
if (BufferSize < l)
l = BufferSize;
quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x;
quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans[c].y)) + x;
const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha);
x += l;
@ -4823,8 +4892,9 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
}
}
}
++spans;
}
};
QT_THREAD_PARALLEL_FILLS(function);
}
/* Image formats here are target formats */

View File

@ -1457,7 +1457,7 @@ static void fillRect_normalized(const QRect &r, QSpanData *data,
ProcessSpans blend = isUnclipped ? data->unclipped_blend : data->blend;
const int nspans = 256;
const int nspans = 512;
QT_FT_Span spans[nspans];
Q_ASSERT(data->blend);
@ -2700,7 +2700,7 @@ void QRasterPaintEngine::alphaPenBlt(const void* src, int bpl, int depth, int rx
if (w <= 0 || h <= 0)
return;
const int NSPANS = 256;
const int NSPANS = 512;
QSpan spans[NSPANS];
int current = 0;
@ -3424,7 +3424,7 @@ void QRasterPaintEngine::drawBitmap(const QPointF &pos, const QImage &image, QSp
Q_ASSERT(image.depth() == 1);
const int spanCount = 256;
const int spanCount = 512;
QT_FT_Span spans[spanCount];
int n = 0;
@ -4124,7 +4124,7 @@ static void qt_span_fill_clipped(int spanCount, const QSpan *spans, void *userDa
Q_ASSERT(fillData->blend && fillData->unclipped_blend);
const int NSPANS = 256;
const int NSPANS = 512;
QSpan cspans[NSPANS];
int currentClip = 0;
const QSpan *end = spans + spanCount;