Improve accuracy in fast path bilinear sampling
Adds rounding before using the optimized low accuracy interpolation, this reduces the magnitude of error in the scaled result from ~4 bits to just 2 bits. Change-Id: Ie4e618bf5b1f4a74367aa419ebbd534cc6a846b3 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
This commit is contained in:
parent
529b1c9e2a
commit
b91f86a212
@ -1827,9 +1827,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper
|
|||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
interpolate 4 argb pixels with the distx and disty factor.
|
interpolate 4 argb pixels with the distx and disty factor.
|
||||||
distx and disty bust be between 0 and 16
|
distx and disty must be between 0 and 16
|
||||||
*/
|
*/
|
||||||
static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty)
|
static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
|
||||||
{
|
{
|
||||||
uint distxy = distx * disty;
|
uint distxy = distx * disty;
|
||||||
//idistx * disty = (16-distx) * disty = 16*disty - distxy
|
//idistx * disty = (16-distx) * disty = 16*disty - distxy
|
||||||
@ -2176,7 +2176,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||||
const uint *s1 = (const uint *)data->texture.scanLine(y1);
|
const uint *s1 = (const uint *)data->texture.scanLine(y1);
|
||||||
const uint *s2 = (const uint *)data->texture.scanLine(y2);
|
const uint *s2 = (const uint *)data->texture.scanLine(y2);
|
||||||
int disty = (fy & 0x0000ffff) >> 12;
|
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||||
|
|
||||||
if (blendType != BlendTransformedBilinearTiled) {
|
if (blendType != BlendTransformedBilinearTiled) {
|
||||||
#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
|
#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
|
||||||
@ -2190,7 +2190,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
uint tr = s1[x2]; \
|
uint tr = s1[x2]; \
|
||||||
uint bl = s2[x1]; \
|
uint bl = s2[x1]; \
|
||||||
uint br = s2[x2]; \
|
uint br = s2[x2]; \
|
||||||
int distx = (fx & 0x0000ffff) >> 12; \
|
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; \
|
||||||
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
|
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
|
||||||
fx += fdx; \
|
fx += fdx; \
|
||||||
++b; \
|
++b; \
|
||||||
@ -2209,6 +2209,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
const __m128i v_256 = _mm_set1_epi16(256);
|
const __m128i v_256 = _mm_set1_epi16(256);
|
||||||
const __m128i v_disty = _mm_set1_epi16(disty);
|
const __m128i v_disty = _mm_set1_epi16(disty);
|
||||||
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
||||||
|
const __m128i v_fx_r = _mm_set1_epi32(0x8);
|
||||||
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
||||||
|
|
||||||
while (b < boundedEnd) {
|
while (b < boundedEnd) {
|
||||||
@ -2222,7 +2223,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
|
const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
|
||||||
const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
|
const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]);
|
||||||
|
|
||||||
__m128i v_distx = _mm_srli_epi16(v_fx, 12);
|
__m128i v_distx = _mm_srli_epi16(v_fx, 8);
|
||||||
|
v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), 4);
|
||||||
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||||
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||||
|
|
||||||
@ -2252,6 +2254,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
|
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
|
||||||
|
const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
|
||||||
|
|
||||||
while (b < boundedEnd) {
|
while (b < boundedEnd) {
|
||||||
|
|
||||||
@ -2260,7 +2263,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
Vect_buffer v_fx_shifted;
|
Vect_buffer v_fx_shifted;
|
||||||
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
|
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
|
||||||
|
|
||||||
int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
|
int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx.vect, v_ffff_mask), v_fx_r), 12);
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
int x1 = v_fx_shifted.i[i];
|
int x1 = v_fx_shifted.i[i];
|
||||||
@ -2290,7 +2293,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
uint tr = s1[x2];
|
uint tr = s1[x2];
|
||||||
uint bl = s2[x1];
|
uint bl = s2[x1];
|
||||||
uint br = s2[x2];
|
uint br = s2[x2];
|
||||||
int distx = (fx & 0x0000ffff) >> 12;
|
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
|
||||||
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||||
fx += fdx;
|
fx += fdx;
|
||||||
++b;
|
++b;
|
||||||
@ -2362,6 +2365,7 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
const __m128i v_256 = _mm_set1_epi16(256);
|
const __m128i v_256 = _mm_set1_epi16(256);
|
||||||
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
||||||
const __m128i v_fdy = _mm_set1_epi32(fdy*4);
|
const __m128i v_fdy = _mm_set1_epi32(fdy*4);
|
||||||
|
const __m128i v_fxy_r = _mm_set1_epi32(0x8);
|
||||||
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
||||||
__m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
|
__m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
|
||||||
|
|
||||||
@ -2396,6 +2400,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
|
|
||||||
__m128i v_distx = _mm_srli_epi16(v_fx, 12);
|
__m128i v_distx = _mm_srli_epi16(v_fx, 12);
|
||||||
__m128i v_disty = _mm_srli_epi16(v_fy, 12);
|
__m128i v_disty = _mm_srli_epi16(v_fy, 12);
|
||||||
|
v_distx = _mm_srli_epi16(_mm_add_epi32(v_fx, v_fxy_r), 4);
|
||||||
|
v_disty = _mm_srli_epi16(_mm_add_epi32(v_fy, v_fxy_r), 4);
|
||||||
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||||
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||||
v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
|
v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
|
||||||
@ -2434,8 +2440,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
|||||||
int disty = (fy & 0x0000ffff) >> 8;
|
int disty = (fy & 0x0000ffff) >> 8;
|
||||||
*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
|
*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
|
||||||
#else
|
#else
|
||||||
int distx = (fx & 0x0000ffff) >> 12;
|
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
|
||||||
int disty = (fy & 0x0000ffff) >> 12;
|
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||||
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2664,13 +2670,13 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
|||||||
fracX += fdx;
|
fracX += fdx;
|
||||||
}
|
}
|
||||||
} else { //scale down
|
} else { //scale down
|
||||||
int disty = (fy & 0x0000ffff) >> 12;
|
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
uint tl = buf1[i * 2 + 0];
|
uint tl = buf1[i * 2 + 0];
|
||||||
uint tr = buf1[i * 2 + 1];
|
uint tr = buf1[i * 2 + 1];
|
||||||
uint bl = buf2[i * 2 + 0];
|
uint bl = buf2[i * 2 + 0];
|
||||||
uint br = buf2[i * 2 + 1];
|
uint br = buf2[i * 2 + 1];
|
||||||
int distx = (fracX & 0x0000ffff) >> 12;
|
int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
|
||||||
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||||
fracX += fdx;
|
fracX += fdx;
|
||||||
}
|
}
|
||||||
@ -2736,8 +2742,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
|||||||
uint bl = buf2[i * 2 + 0];
|
uint bl = buf2[i * 2 + 0];
|
||||||
uint br = buf2[i * 2 + 1];
|
uint br = buf2[i * 2 + 1];
|
||||||
|
|
||||||
int distx = (fracX & 0x0000ffff) >> 12;
|
int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
|
||||||
int disty = (fracY & 0x0000ffff) >> 12;
|
int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12;
|
||||||
|
|
||||||
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||||
fracX += fdx;
|
fracX += fdx;
|
||||||
|
@ -106,6 +106,7 @@ private slots:
|
|||||||
void smoothScale();
|
void smoothScale();
|
||||||
void smoothScale2_data();
|
void smoothScale2_data();
|
||||||
void smoothScale2();
|
void smoothScale2();
|
||||||
|
void smoothScale3_data();
|
||||||
void smoothScale3();
|
void smoothScale3();
|
||||||
void smoothScale4();
|
void smoothScale4();
|
||||||
|
|
||||||
@ -1715,9 +1716,12 @@ static inline int rand8()
|
|||||||
return int(256. * (qrand() / (RAND_MAX + 1.0)));
|
return int(256. * (qrand() / (RAND_MAX + 1.0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// compares img.scale against the bilinear filtering used by QPainter
|
void tst_QImage::smoothScale3_data()
|
||||||
void tst_QImage::smoothScale3()
|
|
||||||
{
|
{
|
||||||
|
QTest::addColumn<QImage>("img");
|
||||||
|
QTest::addColumn<qreal>("scale_x");
|
||||||
|
QTest::addColumn<qreal>("scale_y");
|
||||||
|
|
||||||
QImage img(128, 128, QImage::Format_RGB32);
|
QImage img(128, 128, QImage::Format_RGB32);
|
||||||
for (int y = 0; y < img.height(); ++y) {
|
for (int y = 0; y < img.height(); ++y) {
|
||||||
for (int x = 0; x < img.width(); ++x) {
|
for (int x = 0; x < img.width(); ++x) {
|
||||||
@ -1730,16 +1734,29 @@ void tst_QImage::smoothScale3()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
qreal scales[2] = { .5, 2 };
|
QTest::newRow("(0.5, 0.5)") << img << qreal(0.5) << qreal(0.5);
|
||||||
|
QTest::newRow("(0.5, 1.0)") << img << qreal(0.5) << qreal(1.0);
|
||||||
|
QTest::newRow("(1.0, 0.5)") << img << qreal(1.0) << qreal(0.5);
|
||||||
|
QTest::newRow("(0.5, 2.0)") << img << qreal(0.5) << qreal(2.0);
|
||||||
|
QTest::newRow("(1.0, 2.0)") << img << qreal(1.0) << qreal(2.0);
|
||||||
|
QTest::newRow("(2.0, 0.5)") << img << qreal(2.0) << qreal(0.5);
|
||||||
|
QTest::newRow("(2.0, 1.0)") << img << qreal(2.0) << qreal(1.0);
|
||||||
|
QTest::newRow("(2.0, 2.0)") << img << qreal(2) << qreal(2);
|
||||||
|
}
|
||||||
|
// compares img.scale against the bilinear filtering used by QPainter
|
||||||
|
void tst_QImage::smoothScale3()
|
||||||
|
{
|
||||||
|
QFETCH(QImage, img);
|
||||||
|
QFETCH(qreal, scale_x);
|
||||||
|
QFETCH(qreal, scale_y);
|
||||||
|
|
||||||
for (int i = 0; i < 2; ++i) {
|
QImage a = img.scaled(img.width() * scale_x, img.height() * scale_y, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
|
||||||
QImage a = img.scaled(img.size() * scales[i], Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
|
|
||||||
QImage b(a.size(), a.format());
|
QImage b(a.size(), a.format());
|
||||||
b.fill(0x0);
|
b.fill(0x0);
|
||||||
|
|
||||||
QPainter p(&b);
|
QPainter p(&b);
|
||||||
p.setRenderHint(QPainter::SmoothPixmapTransform);
|
p.setRenderHint(QPainter::SmoothPixmapTransform);
|
||||||
p.scale(scales[i], scales[i]);
|
p.scale(scale_x, scale_y);
|
||||||
p.drawImage(0, 0, img);
|
p.drawImage(0, 0, img);
|
||||||
p.end();
|
p.end();
|
||||||
int err = 0;
|
int err = 0;
|
||||||
@ -1750,17 +1767,17 @@ void tst_QImage::smoothScale3()
|
|||||||
QRgb cb = b.pixel(x, y);
|
QRgb cb = b.pixel(x, y);
|
||||||
|
|
||||||
// tolerate a little bit of rounding errors
|
// tolerate a little bit of rounding errors
|
||||||
|
int tolerance = 3;
|
||||||
bool r = true;
|
bool r = true;
|
||||||
r &= qAbs(qRed(ca) - qRed(cb)) <= 18;
|
r &= qAbs(qRed(ca) - qRed(cb)) <= tolerance;
|
||||||
r &= qAbs(qGreen(ca) - qGreen(cb)) <= 18;
|
r &= qAbs(qGreen(ca) - qGreen(cb)) <= tolerance;
|
||||||
r &= qAbs(qBlue(ca) - qBlue(cb)) <= 18;
|
r &= qAbs(qBlue(ca) - qBlue(cb)) <= tolerance;
|
||||||
if (!r)
|
if (!r)
|
||||||
err++;
|
err++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
QCOMPARE(err, 0);
|
QCOMPARE(err, 0);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Tests smooth upscale is smooth
|
// Tests smooth upscale is smooth
|
||||||
void tst_QImage::smoothScale4()
|
void tst_QImage::smoothScale4()
|
||||||
|
Loading…
Reference in New Issue
Block a user