Improve accuracy in fast path bilinear sampling
Adds rounding before using the optimized low accuracy interpolation, this reduces the magnitude of error in the scaled result from ~4 bits to just 2 bits. Change-Id: Ie4e618bf5b1f4a74367aa419ebbd534cc6a846b3 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
This commit is contained in:
parent
529b1c9e2a
commit
b91f86a212
@ -1827,9 +1827,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper
|
||||
|
||||
/** \internal
|
||||
interpolate 4 argb pixels with the distx and disty factor.
|
||||
distx and disty bust be between 0 and 16
|
||||
distx and disty must be between 0 and 16
|
||||
*/
|
||||
static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty)
|
||||
static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
|
||||
{
|
||||
uint distxy = distx * disty;
|
||||
//idistx * disty = (16-distx) * disty = 16*disty - distxy
|
||||
@ -2176,7 +2176,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
const uint *s1 = (const uint *)data->texture.scanLine(y1);
|
||||
const uint *s2 = (const uint *)data->texture.scanLine(y2);
|
||||
int disty = (fy & 0x0000ffff) >> 12;
|
||||
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||
|
||||
if (blendType != BlendTransformedBilinearTiled) {
|
||||
#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
|
||||
@ -2190,7 +2190,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
uint tr = s1[x2]; \
|
||||
uint bl = s2[x1]; \
|
||||
uint br = s2[x2]; \
|
||||
int distx = (fx & 0x0000ffff) >> 12; \
|
||||
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; \
|
||||
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
|
||||
fx += fdx; \
|
||||
++b; \
|
||||
@ -2209,6 +2209,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
const __m128i v_256 = _mm_set1_epi16(256);
|
||||
const __m128i v_disty = _mm_set1_epi16(disty);
|
||||
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
||||
const __m128i v_fx_r = _mm_set1_epi32(0x8);
|
||||
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
||||
|
||||
while (b < boundedEnd) {
|
||||
@ -2222,7 +2223,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
|
||||
const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
|
||||
|
||||
__m128i v_distx = _mm_srli_epi16(v_fx, 12);
|
||||
__m128i v_distx = _mm_srli_epi16(v_fx, 8);
|
||||
v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
|
||||
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||
|
||||
@ -2252,6 +2254,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
}
|
||||
|
||||
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
|
||||
const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
|
||||
|
||||
while (b < boundedEnd) {
|
||||
|
||||
@ -2260,7 +2263,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
Vect_buffer v_fx_shifted;
|
||||
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
|
||||
|
||||
int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
|
||||
int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx.vect, v_ffff_mask), v_fx_r), 12);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int x1 = v_fx_shifted.i[i];
|
||||
@ -2290,7 +2293,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
uint tr = s1[x2];
|
||||
uint bl = s2[x1];
|
||||
uint br = s2[x2];
|
||||
int distx = (fx & 0x0000ffff) >> 12;
|
||||
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
|
||||
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||
fx += fdx;
|
||||
++b;
|
||||
@ -2362,6 +2365,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
const __m128i v_256 = _mm_set1_epi16(256);
|
||||
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
||||
const __m128i v_fdy = _mm_set1_epi32(fdy*4);
|
||||
const __m128i v_fxy_r = _mm_set1_epi32(0x8);
|
||||
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
||||
__m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
|
||||
|
||||
@ -2396,6 +2400,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
|
||||
__m128i v_distx = _mm_srli_epi16(v_fx, 12);
|
||||
__m128i v_disty = _mm_srli_epi16(v_fy, 12);
|
||||
v_distx = _mm_srli_epi16(_mm_add_epi32(v_fx, v_fxy_r), 4);
|
||||
v_disty = _mm_srli_epi16(_mm_add_epi32(v_fy, v_fxy_r), 4);
|
||||
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||
v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
|
||||
@ -2434,8 +2440,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
int disty = (fy & 0x0000ffff) >> 8;
|
||||
*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
|
||||
#else
|
||||
int distx = (fx & 0x0000ffff) >> 12;
|
||||
int disty = (fy & 0x0000ffff) >> 12;
|
||||
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
|
||||
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||
#endif
|
||||
|
||||
@ -2664,13 +2670,13 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
||||
fracX += fdx;
|
||||
}
|
||||
} else { //scale down
|
||||
int disty = (fy & 0x0000ffff) >> 12;
|
||||
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
uint tl = buf1[i * 2 + 0];
|
||||
uint tr = buf1[i * 2 + 1];
|
||||
uint bl = buf2[i * 2 + 0];
|
||||
uint br = buf2[i * 2 + 1];
|
||||
int distx = (fracX & 0x0000ffff) >> 12;
|
||||
int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
|
||||
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||
fracX += fdx;
|
||||
}
|
||||
@ -2736,8 +2742,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
||||
uint bl = buf2[i * 2 + 0];
|
||||
uint br = buf2[i * 2 + 1];
|
||||
|
||||
int distx = (fracX & 0x0000ffff) >> 12;
|
||||
int disty = (fracY & 0x0000ffff) >> 12;
|
||||
int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
|
||||
int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12;
|
||||
|
||||
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||
fracX += fdx;
|
||||
|
@ -106,6 +106,7 @@ private slots:
|
||||
void smoothScale();
|
||||
void smoothScale2_data();
|
||||
void smoothScale2();
|
||||
void smoothScale3_data();
|
||||
void smoothScale3();
|
||||
void smoothScale4();
|
||||
|
||||
@ -1715,9 +1716,12 @@ static inline int rand8()
|
||||
return int(256. * (qrand() / (RAND_MAX + 1.0)));
|
||||
}
|
||||
|
||||
// compares img.scale against the bilinear filtering used by QPainter
|
||||
void tst_QImage::smoothScale3()
|
||||
void tst_QImage::smoothScale3_data()
|
||||
{
|
||||
QTest::addColumn<QImage>("img");
|
||||
QTest::addColumn<qreal>("scale_x");
|
||||
QTest::addColumn<qreal>("scale_y");
|
||||
|
||||
QImage img(128, 128, QImage::Format_RGB32);
|
||||
for (int y = 0; y < img.height(); ++y) {
|
||||
for (int x = 0; x < img.width(); ++x) {
|
||||
@ -1730,36 +1734,49 @@ void tst_QImage::smoothScale3()
|
||||
}
|
||||
}
|
||||
|
||||
qreal scales[2] = { .5, 2 };
|
||||
QTest::newRow("(0.5, 0.5)") << img << qreal(0.5) << qreal(0.5);
|
||||
QTest::newRow("(0.5, 1.0)") << img << qreal(0.5) << qreal(1.0);
|
||||
QTest::newRow("(1.0, 0.5)") << img << qreal(1.0) << qreal(0.5);
|
||||
QTest::newRow("(0.5, 2.0)") << img << qreal(0.5) << qreal(2.0);
|
||||
QTest::newRow("(1.0, 2.0)") << img << qreal(1.0) << qreal(2.0);
|
||||
QTest::newRow("(2.0, 0.5)") << img << qreal(2.0) << qreal(0.5);
|
||||
QTest::newRow("(2.0, 1.0)") << img << qreal(2.0) << qreal(1.0);
|
||||
QTest::newRow("(2.0, 2.0)") << img << qreal(2) << qreal(2);
|
||||
}
|
||||
// compares img.scale against the bilinear filtering used by QPainter
|
||||
void tst_QImage::smoothScale3()
|
||||
{
|
||||
QFETCH(QImage, img);
|
||||
QFETCH(qreal, scale_x);
|
||||
QFETCH(qreal, scale_y);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
QImage a = img.scaled(img.size() * scales[i], Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
|
||||
QImage b(a.size(), a.format());
|
||||
b.fill(0x0);
|
||||
QImage a = img.scaled(img.width() * scale_x, img.height() * scale_y, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
|
||||
QImage b(a.size(), a.format());
|
||||
b.fill(0x0);
|
||||
|
||||
QPainter p(&b);
|
||||
p.setRenderHint(QPainter::SmoothPixmapTransform);
|
||||
p.scale(scales[i], scales[i]);
|
||||
p.drawImage(0, 0, img);
|
||||
p.end();
|
||||
int err = 0;
|
||||
QPainter p(&b);
|
||||
p.setRenderHint(QPainter::SmoothPixmapTransform);
|
||||
p.scale(scale_x, scale_y);
|
||||
p.drawImage(0, 0, img);
|
||||
p.end();
|
||||
int err = 0;
|
||||
|
||||
for (int y = 0; y < a.height(); ++y) {
|
||||
for (int x = 0; x < a.width(); ++x) {
|
||||
QRgb ca = a.pixel(x, y);
|
||||
QRgb cb = b.pixel(x, y);
|
||||
for (int y = 0; y < a.height(); ++y) {
|
||||
for (int x = 0; x < a.width(); ++x) {
|
||||
QRgb ca = a.pixel(x, y);
|
||||
QRgb cb = b.pixel(x, y);
|
||||
|
||||
// tolerate a little bit of rounding errors
|
||||
bool r = true;
|
||||
r &= qAbs(qRed(ca) - qRed(cb)) <= 18;
|
||||
r &= qAbs(qGreen(ca) - qGreen(cb)) <= 18;
|
||||
r &= qAbs(qBlue(ca) - qBlue(cb)) <= 18;
|
||||
if (!r)
|
||||
err++;
|
||||
}
|
||||
// tolerate a little bit of rounding errors
|
||||
int tolerance = 3;
|
||||
bool r = true;
|
||||
r &= qAbs(qRed(ca) - qRed(cb)) <= tolerance;
|
||||
r &= qAbs(qGreen(ca) - qGreen(cb)) <= tolerance;
|
||||
r &= qAbs(qBlue(ca) - qBlue(cb)) <= tolerance;
|
||||
if (!r)
|
||||
err++;
|
||||
}
|
||||
QCOMPARE(err, 0);
|
||||
}
|
||||
QCOMPARE(err, 0);
|
||||
}
|
||||
|
||||
// Tests smooth upscale is smooth
|
||||
|
Loading…
Reference in New Issue
Block a user