Multithread some QImage routines

Use QThreadPool to process QImage smooth-scaling, format conversions,
and colorspace transforms multithreaded.

Change-Id: Ic142b1fa899f56e7e5099d36ca713701a47b681b
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Allan Sandfeld Jensen 2019-11-22 13:02:31 +01:00
parent cc59f0de55
commit 332816779c
6 changed files with 957 additions and 480 deletions

View File

@ -69,6 +69,11 @@
#include <private/qimage_p.h>
#include <private/qfont_p.h>
#if QT_CONFIG(thread)
#include "qsemaphore.h"
#include "qthreadpool.h"
#endif
QT_BEGIN_NAMESPACE
static inline bool isLocked(QImageData *data)
@ -5024,18 +5029,43 @@ void QImage::applyColorTransform(const QColorTransform &transform)
Q_UNREACHABLE();
}
std::function<void(int,int)> transformSegment;
if (depth() > 32) {
for (int i = 0; i < height(); ++i) {
QRgba64 *scanline = reinterpret_cast<QRgba64 *>(scanLine(i));
transformSegment = [&](int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
QRgba64 *scanline = reinterpret_cast<QRgba64 *>(scanLine(y));
transform.d->apply(scanline, scanline, width(), flags);
}
};
} else {
for (int i = 0; i < height(); ++i) {
QRgb *scanline = reinterpret_cast<QRgb *>(scanLine(i));
transformSegment = [&](int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
QRgb *scanline = reinterpret_cast<QRgb *>(scanLine(y));
transform.d->apply(scanline, scanline, width(), flags);
}
};
}
#if QT_CONFIG(thread)
int segments = sizeInBytes() / (1<<16);
segments = std::min(segments, height());
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (height() - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
transformSegment(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
} else
#endif
transformSegment(0, height());
if (oldFormat != format())
*this = std::move(*this).convertToFormat(oldFormat);
}

View File

@ -43,7 +43,12 @@
#include <private/qendian_p.h>
#include <private/qsimd_p.h>
#include <private/qimage_p.h>
#include <qendian.h>
#if QT_CONFIG(thread)
#include <qsemaphore.h>
#include <qthreadpool.h>
#endif
QT_BEGIN_NAMESPACE
@ -159,12 +164,8 @@ void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversio
// Cannot be used with indexed formats.
Q_ASSERT(dest->format > QImage::Format_Indexed8);
Q_ASSERT(src->format > QImage::Format_Indexed8);
uint buf[BufferSize];
uint *buffer = buf;
const QPixelLayout *srcLayout = &qPixelLayouts[src->format];
const QPixelLayout *destLayout = &qPixelLayouts[dest->format];
const uchar *srcData = src->data;
uchar *destData = dest->data;
FetchAndConvertPixelsFunc fetch = srcLayout->fetchToARGB32PM;
ConvertAndStorePixelsFunc store = destLayout->storeFromARGB32PM;
@ -197,12 +198,17 @@ void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversio
else
store = destLayout->storeFromRGB32;
}
auto convertSegment = [=](int yStart, int yEnd) {
uint buf[BufferSize];
uint *buffer = buf;
const uchar *srcData = src->data + src->bytes_per_line * yStart;
uchar *destData = dest->data + dest->bytes_per_line * yStart;
QDitherInfo dither;
QDitherInfo *ditherPtr = nullptr;
if ((flags & Qt::PreferDither) && (flags & Qt::Dither_Mask) != Qt::ThresholdDither)
ditherPtr = &dither;
for (int y = 0; y < src->height; ++y) {
for (int y = yStart; y < yEnd; ++y) {
dither.y = y;
int x = 0;
while (x < src->width) {
@ -212,30 +218,54 @@ void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversio
buffer = reinterpret_cast<uint *>(destData) + x;
else
l = qMin(l, BufferSize);
const uint *ptr = fetch(buffer, srcData, x, l, nullptr, ditherPtr);
store(destData, ptr, x, l, nullptr, ditherPtr);
const uint *ptr = fetch(buffer, srcData, x, l, 0, ditherPtr);
store(destData, ptr, x, l, 0, ditherPtr);
x += l;
}
srcData += src->bytes_per_line;
destData += dest->bytes_per_line;
}
};
#if QT_CONFIG(thread)
int segments = src->nbytes / (1<<16);
segments = std::min(segments, src->height);
if (segments <= 1)
return convertSegment(0, src->height);
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (src->height - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
convertSegment(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
#else
convertSegment(0, src->height);
#endif
}
void convert_generic_to_rgb64(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(dest->format > QImage::Format_Indexed8);
Q_ASSERT(src->format > QImage::Format_Indexed8);
QRgba64 buf[BufferSize];
QRgba64 *buffer = buf;
const QPixelLayout *srcLayout = &qPixelLayouts[src->format];
const QPixelLayout *destLayout = &qPixelLayouts[dest->format];
const uchar *srcData = src->data;
uchar *destData = dest->data;
const FetchAndConvertPixelsFunc64 fetch = srcLayout->fetchToRGBA64PM;
const ConvertAndStorePixelsFunc64 store = qStoreFromRGBA64PM[dest->format];
for (int y = 0; y < src->height; ++y) {
auto convertSegment = [=](int yStart, int yEnd) {
QRgba64 buf[BufferSize];
QRgba64 *buffer = buf;
const uchar *srcData = src->data + yStart * src->bytes_per_line;
uchar *destData = dest->data + yStart * dest->bytes_per_line;
for (int y = yStart; y < yEnd; ++y) {
int x = 0;
while (x < src->width) {
int l = src->width - x;
@ -250,6 +280,28 @@ void convert_generic_to_rgb64(QImageData *dest, const QImageData *src, Qt::Image
srcData += src->bytes_per_line;
destData += dest->bytes_per_line;
}
};
#if QT_CONFIG(thread)
int segments = src->nbytes / (1<<16);
segments = std::min(segments, src->height);
if (segments <= 1)
return convertSegment(0, src->height);
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (src->height - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
convertSegment(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
#else
convertSegment(0, src->height);
#endif
}
bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::ImageConversionFlags flags)
@ -270,11 +322,6 @@ bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::Im
&& qt_highColorPrecision(dst_format, !srcLayout->hasAlphaChannel))
return false;
uint buf[BufferSize];
uint *buffer = buf;
uchar *srcData = data->data;
uchar *destData = data->data;
QImageData::ImageSizeParameters params = { data->bytes_per_line, data->nbytes };
if (data->depth != destDepth) {
params = QImageData::calculateImageParameters(data->width, data->height, destDepth);
@ -313,12 +360,17 @@ bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::Im
else
store = destLayout->storeFromRGB32;
}
auto convertSegment = [=](int yStart, int yEnd) {
uint buf[BufferSize];
uint *buffer = buf;
uchar *srcData = data->data + data->bytes_per_line * yStart;
uchar *destData = srcData;
QDitherInfo dither;
QDitherInfo *ditherPtr = nullptr;
if ((flags & Qt::PreferDither) && (flags & Qt::Dither_Mask) != Qt::ThresholdDither)
ditherPtr = &dither;
for (int y = 0; y < data->height; ++y) {
for (int y = yStart; y < yEnd; ++y) {
dither.y = y;
int x = 0;
while (x < data->width) {
@ -335,6 +387,25 @@ bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::Im
srcData += data->bytes_per_line;
destData += params.bytesPerLine;
}
};
#if QT_CONFIG(thread)
int segments = data->nbytes / (1<<16);
segments = std::min(segments, data->height);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (data->height - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
convertSegment(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
} else
#endif
convertSegment(0, data->height);
if (params.totalSize != data->nbytes) {
Q_ASSERT(params.totalSize < data->nbytes);
void *newData = realloc(data->data, params.totalSize);

View File

@ -43,6 +43,11 @@
#include "qcolor.h"
#include "qrgba64_p.h"
#if QT_CONFIG(thread)
#include "qsemaphore.h"
#include "qthreadpool.h"
#endif
QT_BEGIN_NAMESPACE
/*
@ -239,6 +244,8 @@ static QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const QImage &img,
isi = new QImageScaleInfo;
if (!isi)
return nullptr;
isi->sh = sh;
isi->sw = sw;
isi->xup_yup = (qAbs(dw) >= sw) + ((qAbs(dh) >= sh) << 1);
@ -303,7 +310,8 @@ static void qt_qimageScaleAARGBA_up_xy(QImageScaleInfo *isi, unsigned int *dest,
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
/* calculate the source line we'll scan from */
const unsigned int *sptr = ypoints[y];
unsigned int *dptr = dest + (y * dow);
@ -330,6 +338,26 @@ static void qt_qimageScaleAARGBA_up_xy(QImageScaleInfo *isi, unsigned int *dest,
}
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
/* scale by area sampling - with alpha */
@ -411,7 +439,8 @@ static void qt_qimageScaleAARGBA_up_x_down_y(QImageScaleInfo *isi, unsigned int
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
@ -438,6 +467,26 @@ static void qt_qimageScaleAARGBA_up_x_down_y(QImageScaleInfo *isi, unsigned int
*dptr++ = qRgba(r >> 14, g >> 14, b >> 14, a >> 14);
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
@ -449,7 +498,8 @@ static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
unsigned int *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
int Cx = xapoints[x] >> 16;
@ -477,6 +527,26 @@ static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *dest,
@ -487,7 +557,8 @@ static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *des
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = (yapoints[y]) >> 16;
int yap = (yapoints[y]) & 0xffff;
@ -526,6 +597,26 @@ static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *des
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
#if QT_CONFIG(raster_64bit)
@ -546,7 +637,8 @@ static void qt_qimageScaleRgba64_up_xy(QImageScaleInfo *isi, QRgba64 *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
const QRgba64 *sptr = ypoints[y];
QRgba64 *dptr = dest + (y * dow);
const int yap = yapoints[y];
@ -572,6 +664,26 @@ static void qt_qimageScaleRgba64_up_xy(QImageScaleInfo *isi, QRgba64 *dest,
}
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
void qt_qimageScaleRgba64(QImageScaleInfo *isi, QRgba64 *dest,
@ -616,7 +728,8 @@ static void qt_qimageScaleRgba64_up_x_down_y(QImageScaleInfo *isi, QRgba64 *dest
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = (yapoints[y]) >> 16;
int yap = (yapoints[y]) & 0xffff;
@ -643,6 +756,26 @@ static void qt_qimageScaleRgba64_up_x_down_y(QImageScaleInfo *isi, QRgba64 *dest
*dptr++ = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14);
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest,
@ -653,7 +786,8 @@ static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
QRgba64 *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
int Cx = xapoints[x] >> 16;
@ -681,6 +815,26 @@ static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest,
@ -691,7 +845,8 @@ static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = (yapoints[y]) >> 16;
int yap = (yapoints[y]) & 0xffff;
@ -728,6 +883,26 @@ static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest,
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
#endif
@ -817,7 +992,8 @@ static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
@ -842,6 +1018,26 @@ static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *
*dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
@ -853,7 +1049,8 @@ static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
unsigned int *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
int Cx = xapoints[x] >> 16;
@ -878,6 +1075,26 @@ static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *
*dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest,
@ -888,7 +1105,8 @@ static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
@ -925,6 +1143,26 @@ static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
QImage qSmoothScaleImage(const QImage &src, int dw, int dh)

View File

@ -41,6 +41,11 @@
#include "qimage.h"
#include <private/qsimd_p.h>
#if QT_CONFIG(thread)
#include "qsemaphore.h"
#include "qthreadpool.h"
#endif
#if defined(__ARM_NEON__)
QT_BEGIN_NAMESPACE
@ -76,7 +81,8 @@ void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *d
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
@ -103,6 +109,26 @@ void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *d
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
template<bool RGB>
@ -115,7 +141,8 @@ void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *d
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
unsigned int *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
int Cx = xapoints[x] >> 16;
@ -142,6 +169,26 @@ void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *d
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
template<bool RGB>
@ -153,7 +200,8 @@ void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
@ -190,6 +238,26 @@ void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest,
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
template void qt_qimageScaleAARGBA_up_x_down_y_neon<false>(QImageScaleInfo *isi, unsigned int *dest,

View File

@ -66,6 +66,8 @@ namespace QImageScale {
int *xapoints{nullptr};
int *yapoints{nullptr};
int xup_yup{0};
int sh = 0;
int sw = 0;
};
}

View File

@ -42,6 +42,11 @@
#include <private/qdrawhelper_x86_p.h>
#include <private/qsimd_p.h>
#if QT_CONFIG(thread)
#include "qsemaphore.h"
#include "qthreadpool.h"
#endif
#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
QT_BEGIN_NAMESPACE
@ -70,16 +75,17 @@ void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *d
int dw, int dh, int dow, int sow)
{
const unsigned int **ypoints = isi->ypoints;
int *xpoints = isi->xpoints;
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
const int *xpoints = isi->xpoints;
const int *xapoints = isi->xapoints;
const int *yapoints = isi->yapoints;
const __m128i v256 = _mm_set1_epi32(256);
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
const int Cy = yapoints[y] >> 16;
const int yap = yapoints[y] & 0xffff;
const __m128i vCy = _mm_set1_epi32(Cy);
const __m128i vyap = _mm_set1_epi32(yap);
@ -88,7 +94,7 @@ void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *d
const unsigned int *sptr = ypoints[y] + xpoints[x];
__m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
int xap = xapoints[x];
const int xap = xapoints[x];
if (xap > 0) {
const __m128i vxap = _mm_set1_epi32(xap);
const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
@ -100,14 +106,34 @@ void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *d
vx = _mm_srli_epi32(vx, 8);
}
vx = _mm_srli_epi32(vx, 14);
vx = _mm_packus_epi32(vx, _mm_setzero_si128());
vx = _mm_packus_epi16(vx, _mm_setzero_si128());
vx = _mm_packus_epi32(vx, vx);
vx = _mm_packus_epi16(vx, vx);
*dptr = _mm_cvtsi128_si32(vx);
if (RGB)
*dptr |= 0xff000000;
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
template<bool RGB>
@ -122,7 +148,8 @@ void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *d
const __m128i v256 = _mm_set1_epi32(256);
/* go through every scanline in the output buffer */
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
unsigned int *dptr = dest + (y * dow);
for (int x = 0; x < dw; x++) {
int Cx = xapoints[x] >> 16;
@ -145,14 +172,34 @@ void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *d
vx = _mm_srli_epi32(vx, 8);
}
vx = _mm_srli_epi32(vx, 14);
vx = _mm_packus_epi32(vx, _mm_setzero_si128());
vx = _mm_packus_epi16(vx, _mm_setzero_si128());
vx = _mm_packus_epi32(vx, vx);
vx = _mm_packus_epi16(vx, vx);
*dptr = _mm_cvtsi128_si32(vx);
if (RGB)
*dptr |= 0xff000000;
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
template<bool RGB>
@ -164,7 +211,8 @@ void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
for (int y = 0; y < dh; y++) {
auto scaleSection = [&] (int yStart, int yEnd) {
for (int y = yStart; y < yEnd; ++y) {
int Cy = yapoints[y] >> 16;
int yap = yapoints[y] & 0xffff;
const __m128i vCy = _mm_set1_epi32(Cy);
@ -200,6 +248,26 @@ void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
dptr++;
}
}
};
#if QT_CONFIG(thread)
int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
segments = std::min(segments, dh);
if (segments > 1) {
QSemaphore semaphore;
int y = 0;
for (int i = 0; i < segments; ++i) {
int yn = (dh - y) / (segments - i);
QThreadPool::globalInstance()->start([&, y, yn]() {
scaleSection(y, y + yn);
semaphore.release(1);
});
y += yn;
}
semaphore.acquire(segments);
return;
}
#endif
scaleSection(0, dh);
}
template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,