From b91f86a2128093ad7c65fa30b63ef87a9e55a4e0 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Wed, 6 Jul 2016 17:11:59 +0200 Subject: [PATCH] Improve accuracy in fast path bilinear sampling Adds rounding before using the optimized low accuracy interpolation, this reduces the magnitude of error in the scaled result from ~4 bits to just 2 bits. Change-Id: Ie4e618bf5b1f4a74367aa419ebbd534cc6a846b3 Reviewed-by: Eirik Aavitsland --- src/gui/painting/qdrawhelper.cpp | 32 ++++++---- tests/auto/gui/image/qimage/tst_qimage.cpp | 69 ++++++++++++++-------- 2 files changed, 62 insertions(+), 39 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index b452019251..f7b81944c5 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1827,9 +1827,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper /** \internal interpolate 4 argb pixels with the distx and disty factor. - distx and disty bust be between 0 and 16 + distx and disty must be between 0 and 16 */ -static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty) +static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) { uint distxy = distx * disty; //idistx * disty = (16-distx) * disty = 16*disty - distxy @@ -2176,7 +2176,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); const uint *s1 = (const uint *)data->texture.scanLine(y1); const uint *s2 = (const uint *)data->texture.scanLine(y2); - int disty = (fy & 0x0000ffff) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; if (blendType != BlendTransformedBilinearTiled) { #define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \ @@ -2190,7 +2190,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c uint tr = s1[x2]; \ uint bl = s2[x1]; \ uint br = s2[x2]; \ - int distx = (fx & 0x0000ffff) >> 12; \ + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; \ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \ fx += fdx; \ ++b; \ @@ -2209,6 +2209,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c const __m128i v_256 = _mm_set1_epi16(256); const __m128i v_disty = _mm_set1_epi16(disty); const __m128i v_fdx = _mm_set1_epi32(fdx*4); + const __m128i v_fx_r = _mm_set1_epi32(0x8); __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); while (b < boundedEnd) { @@ -2222,7 +2223,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]); const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]); - __m128i v_distx = _mm_srli_epi16(v_fx, 12); + __m128i v_distx = _mm_srli_epi16(v_fx, 8); + v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4); v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); @@ -2252,6 +2254,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c } const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff); + const int32x4_t v_fx_r = vdupq_n_s32(0x0800); while (b < boundedEnd) { @@ -2260,7 +2263,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c Vect_buffer v_fx_shifted; v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16); - int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12); + int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx.vect, v_ffff_mask), v_fx_r), 12); for (int i = 0; i < 4; i++) { int x1 = v_fx_shifted.i[i]; @@ -2290,7 +2293,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c uint tr = s1[x2]; uint bl = s2[x1]; uint br = s2[x2]; - int distx = (fx & 0x0000ffff) >> 12; + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); fx += fdx; ++b; @@ -2362,6 +2365,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c const __m128i v_256 = _mm_set1_epi16(256); const __m128i v_fdx = _mm_set1_epi32(fdx*4); const __m128i v_fdy = _mm_set1_epi32(fdy*4); + const __m128i v_fxy_r = _mm_set1_epi32(0x8); __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy); @@ -2396,6 +2400,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c __m128i v_distx = _mm_srli_epi16(v_fx, 12); __m128i v_disty = _mm_srli_epi16(v_fy, 12); + v_distx = _mm_srli_epi16(_mm_add_epi32(v_fx, v_fxy_r), 4); + v_disty = _mm_srli_epi16(_mm_add_epi32(v_fy, v_fxy_r), 4); v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); @@ -2434,8 +2440,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c int disty = (fy & 0x0000ffff) >> 8; *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); #else - int distx = (fx & 0x0000ffff) >> 12; - int disty = (fy & 0x0000ffff) >> 12; + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); #endif @@ -2664,13 +2670,13 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper fracX += fdx; } } else { //scale down - int disty = (fy & 0x0000ffff) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; for (int i = 0; i < len; ++i) { uint tl = buf1[i * 2 + 0]; uint tr = buf1[i * 2 + 1]; uint bl = buf2[i * 2 + 0]; uint br = buf2[i * 2 + 1]; - int distx = (fracX & 0x0000ffff) >> 12; + int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12; b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); fracX += fdx; } @@ -2736,8 +2742,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper uint bl = buf2[i * 2 + 0]; uint br = buf2[i * 2 + 1]; - int distx = (fracX & 0x0000ffff) >> 12; - int disty = (fracY & 0x0000ffff) >> 12; + int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12; b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); fracX += fdx; diff --git a/tests/auto/gui/image/qimage/tst_qimage.cpp b/tests/auto/gui/image/qimage/tst_qimage.cpp index 1bc4ec2ae7..7e1a02d716 100644 --- a/tests/auto/gui/image/qimage/tst_qimage.cpp +++ b/tests/auto/gui/image/qimage/tst_qimage.cpp @@ -106,6 +106,7 @@ private slots: void smoothScale(); void smoothScale2_data(); void smoothScale2(); + void smoothScale3_data(); void smoothScale3(); void smoothScale4(); @@ -1715,9 +1716,12 @@ static inline int rand8() return int(256. * (qrand() / (RAND_MAX + 1.0))); } -// compares img.scale against the bilinear filtering used by QPainter -void tst_QImage::smoothScale3() +void tst_QImage::smoothScale3_data() { + QTest::addColumn("img"); + QTest::addColumn("scale_x"); + QTest::addColumn("scale_y"); + QImage img(128, 128, QImage::Format_RGB32); for (int y = 0; y < img.height(); ++y) { for (int x = 0; x < img.width(); ++x) { @@ -1730,36 +1734,49 @@ void tst_QImage::smoothScale3() } } - qreal scales[2] = { .5, 2 }; + QTest::newRow("(0.5, 0.5)") << img << qreal(0.5) << qreal(0.5); + QTest::newRow("(0.5, 1.0)") << img << qreal(0.5) << qreal(1.0); + QTest::newRow("(1.0, 0.5)") << img << qreal(1.0) << qreal(0.5); + QTest::newRow("(0.5, 2.0)") << img << qreal(0.5) << qreal(2.0); + QTest::newRow("(1.0, 2.0)") << img << qreal(1.0) << qreal(2.0); + QTest::newRow("(2.0, 0.5)") << img << qreal(2.0) << qreal(0.5); + QTest::newRow("(2.0, 1.0)") << img << qreal(2.0) << qreal(1.0); + QTest::newRow("(2.0, 2.0)") << img << qreal(2) << qreal(2); +} +// compares img.scale against the bilinear filtering used by QPainter +void tst_QImage::smoothScale3() +{ + QFETCH(QImage, img); + QFETCH(qreal, scale_x); + QFETCH(qreal, scale_y); - for (int i = 0; i < 2; ++i) { - QImage a = img.scaled(img.size() * scales[i], Qt::IgnoreAspectRatio, Qt::SmoothTransformation); - QImage b(a.size(), a.format()); - b.fill(0x0); + QImage a = img.scaled(img.width() * scale_x, img.height() * scale_y, Qt::IgnoreAspectRatio, Qt::SmoothTransformation); + QImage b(a.size(), a.format()); + b.fill(0x0); - QPainter p(&b); - p.setRenderHint(QPainter::SmoothPixmapTransform); - p.scale(scales[i], scales[i]); - p.drawImage(0, 0, img); - p.end(); - int err = 0; + QPainter p(&b); + p.setRenderHint(QPainter::SmoothPixmapTransform); + p.scale(scale_x, scale_y); + p.drawImage(0, 0, img); + p.end(); + int err = 0; - for (int y = 0; y < a.height(); ++y) { - for (int x = 0; x < a.width(); ++x) { - QRgb ca = a.pixel(x, y); - QRgb cb = b.pixel(x, y); + for (int y = 0; y < a.height(); ++y) { + for (int x = 0; x < a.width(); ++x) { + QRgb ca = a.pixel(x, y); + QRgb cb = b.pixel(x, y); - // tolerate a little bit of rounding errors - bool r = true; - r &= qAbs(qRed(ca) - qRed(cb)) <= 18; - r &= qAbs(qGreen(ca) - qGreen(cb)) <= 18; - r &= qAbs(qBlue(ca) - qBlue(cb)) <= 18; - if (!r) - err++; - } + // tolerate a little bit of rounding errors + int tolerance = 3; + bool r = true; + r &= qAbs(qRed(ca) - qRed(cb)) <= tolerance; + r &= qAbs(qGreen(ca) - qGreen(cb)) <= tolerance; + r &= qAbs(qBlue(ca) - qBlue(cb)) <= tolerance; + if (!r) + err++; } - QCOMPARE(err, 0); } + QCOMPARE(err, 0); } // Tests smooth upscale is smooth