Split fetchTransformedBilinear and fetchTransformedBilinear64
Split out basic fetching to share it between the two. Change-Id: I6c27a7cea3a5c10b511232edc68bd32490514a27 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
This commit is contained in:
parent
f1a23a5467
commit
715fd425ef
@ -2657,6 +2657,241 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
|
||||
return buffer;
|
||||
}
|
||||
|
||||
template<TextureBlendType blendType, QPixelLayout::BPP bpp>
|
||||
static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, uint *end, const QTextureData &image,
|
||||
int &fx, int &fy, int fdx, int /*fdy*/)
|
||||
{
|
||||
const QPixelLayout *layout = &qPixelLayouts[image.format];
|
||||
const QVector<QRgb> *clut = image.colorTable;
|
||||
Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout->bpp);
|
||||
// When templated 'fetch' should be inlined at compile time:
|
||||
const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : fetchPixels<bpp>;
|
||||
const ConvertFunc convertToARGB32PM = layout->convertToARGB32PM;
|
||||
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
|
||||
const uchar *s1 = image.scanLine(y1);
|
||||
const uchar *s2 = image.scanLine(y2);
|
||||
|
||||
int disty = (fy & 0x0000ffff) >> 8;
|
||||
int idisty = 256 - disty;
|
||||
int x = fx >> 16;
|
||||
int length = end - b;
|
||||
|
||||
// The idea is first to do the interpolation between the row s1 and the row s2
|
||||
// into an intermediate buffer, then we interpolate between two pixel of this buffer.
|
||||
// +1 for the last pixel to interpolate with, and +1 for rounding errors.
|
||||
uint buf1[buffer_size + 2];
|
||||
uint buf2[buffer_size + 2];
|
||||
const uint *ptr1;
|
||||
const uint *ptr2;
|
||||
|
||||
int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2;
|
||||
Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
|
||||
|
||||
if (blendType == BlendTransformedBilinearTiled) {
|
||||
x %= image.width;
|
||||
if (x < 0)
|
||||
x += image.width;
|
||||
int len1 = qMin(count, image.width - x);
|
||||
int len2 = qMin(x, count - len1);
|
||||
|
||||
ptr1 = fetch(buf1, s1, x, len1);
|
||||
ptr1 = convertToARGB32PM(buf1, ptr1, len1, clut, 0);
|
||||
ptr2 = fetch(buf2, s2, x, len1);
|
||||
ptr2 = convertToARGB32PM(buf2, ptr2, len1, clut, 0);
|
||||
for (int i = 0; i < len1; ++i) {
|
||||
uint t = ptr1[i];
|
||||
uint b = ptr2[i];
|
||||
buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
}
|
||||
|
||||
if (len2) {
|
||||
ptr1 = fetch(buf1 + len1, s1, 0, len2);
|
||||
ptr1 = convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0);
|
||||
ptr2 = fetch(buf2 + len1, s2, 0, len2);
|
||||
ptr2 = convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0);
|
||||
for (int i = 0; i < len2; ++i) {
|
||||
uint t = ptr1[i];
|
||||
uint b = ptr2[i];
|
||||
buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
}
|
||||
}
|
||||
for (int i = image.width; i < count; ++i) {
|
||||
buf1[i] = buf1[i - image.width];
|
||||
buf2[i] = buf2[i - image.width];
|
||||
}
|
||||
} else {
|
||||
int start = qMax(x, image.x1);
|
||||
int end = qMin(x + count, image.x2);
|
||||
int len = qMax(1, end - start);
|
||||
int leading = start - x;
|
||||
|
||||
ptr1 = fetch(buf1 + leading, s1, start, len);
|
||||
ptr1 = convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0);
|
||||
ptr2 = fetch(buf2 + leading, s2, start, len);
|
||||
ptr2 = convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0);
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
uint t = ptr1[i];
|
||||
uint b = ptr2[i];
|
||||
buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
}
|
||||
|
||||
for (int i = 0; i < leading; ++i) {
|
||||
buf1[i] = buf1[leading];
|
||||
buf2[i] = buf2[leading];
|
||||
}
|
||||
for (int i = leading + len; i < count; ++i) {
|
||||
buf1[i] = buf1[i - 1];
|
||||
buf2[i] = buf2[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
// Now interpolate the values from the intermediate_buffer to get the final result.
|
||||
fx &= fixed_scale - 1;
|
||||
Q_ASSERT((fx >> 16) == 0);
|
||||
while (b < end) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2 = x1 + 1;
|
||||
Q_ASSERT(x1 >= 0);
|
||||
Q_ASSERT(x2 < count);
|
||||
|
||||
int distx = (fx & 0x0000ffff) >> 8;
|
||||
int idistx = 256 - distx;
|
||||
int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff;
|
||||
int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00;
|
||||
*b++ = rb | ag;
|
||||
fx += fdx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
typedef void (QT_FASTCALL *BilinearFastTransformFetcher)(uint *buf1, uint *buf2, const int len, const QTextureData &image,
|
||||
int fx, int fy, const int fdx, const int fdy);
|
||||
|
||||
template<TextureBlendType blendType, QPixelLayout::BPP bpp>
|
||||
static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, const int len, const QTextureData &image,
|
||||
int fx, int fy, const int fdx, const int fdy)
|
||||
{
|
||||
const QPixelLayout &layout = qPixelLayouts[image.format];
|
||||
Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout.bpp);
|
||||
// When templated 'fetch1' should be inlined at compile time:
|
||||
const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>;
|
||||
if (fdy == 0) {
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
|
||||
const uchar *s1 = image.scanLine(y1);
|
||||
const uchar *s2 = image.scanLine(y2);
|
||||
|
||||
int i = 0;
|
||||
if (blendType == BlendTransformedBilinear) {
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
|
||||
if (x1 != x2)
|
||||
break;
|
||||
buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
|
||||
buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
|
||||
fx += fdx;
|
||||
}
|
||||
int fastLen = len;
|
||||
if (fdx > 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
|
||||
else if (fdx < 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
|
||||
|
||||
for (; i < fastLen; ++i) {
|
||||
int x = (fx >> 16);
|
||||
buf1[i * 2 + 0] = fetch1(s1, x);
|
||||
buf1[i * 2 + 1] = fetch1(s1, x + 1);
|
||||
buf2[i * 2 + 0] = fetch1(s2, x);
|
||||
buf2[i * 2 + 1] = fetch1(s2, x + 1);
|
||||
fx += fdx;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
|
||||
buf1[i * 2 + 0] = fetch1(s1, x1);
|
||||
buf1[i * 2 + 1] = fetch1(s1, x2);
|
||||
buf2[i * 2 + 0] = fetch1(s2, x1);
|
||||
buf2[i * 2 + 1] = fetch1(s2, x2);
|
||||
fx += fdx;
|
||||
}
|
||||
} else {
|
||||
int i = 0;
|
||||
if (blendType == BlendTransformedBilinear) {
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
|
||||
if (x1 != x2 && y1 != y2)
|
||||
break;
|
||||
const uchar *s1 = image.scanLine(y1);
|
||||
const uchar *s2 = image.scanLine(y2);
|
||||
buf1[i * 2 + 0] = fetch1(s1, x1);
|
||||
buf1[i * 2 + 1] = fetch1(s1, x2);
|
||||
buf2[i * 2 + 0] = fetch1(s2, x1);
|
||||
buf2[i * 2 + 1] = fetch1(s2, x2);
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
int fastLen = len;
|
||||
if (fdx > 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
|
||||
else if (fdx < 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
|
||||
if (fdy > 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
|
||||
else if (fdy < 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
|
||||
|
||||
for (; i < fastLen; ++i) {
|
||||
int x = (fx >> 16);
|
||||
int y = (fy >> 16);
|
||||
const uchar *s1 = image.scanLine(y);
|
||||
const uchar *s2 = s1 + image.bytesPerLine;
|
||||
buf1[i * 2 + 0] = fetch1(s1, x);
|
||||
buf1[i * 2 + 1] = fetch1(s1, x + 1);
|
||||
buf2[i * 2 + 0] = fetch1(s2, x);
|
||||
buf2[i * 2 + 1] = fetch1(s2, x + 1);
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
|
||||
|
||||
const uchar *s1 = image.scanLine(y1);
|
||||
const uchar *s2 = image.scanLine(y2);
|
||||
buf1[i * 2 + 0] = fetch1(s1, x1);
|
||||
buf1[i * 2 + 1] = fetch1(s1, x2);
|
||||
buf2[i * 2 + 0] = fetch1(s2, x1);
|
||||
buf2[i * 2 + 1] = fetch1(s2, x2);
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
|
||||
template<TextureBlendType blendType, QPixelLayout::BPP bpp>
|
||||
static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *,
|
||||
@ -2664,19 +2899,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
||||
{
|
||||
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
|
||||
const QVector<QRgb> *clut = data->texture.colorTable;
|
||||
if (bpp != QPixelLayout::BPPNone) // Like this to not ICE on GCC 5.3.1
|
||||
Q_ASSERT(layout->bpp == bpp);
|
||||
// When templated 'fetch' should be inlined at compile time:
|
||||
const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : FetchPixelsFunc(fetchPixels<bpp>);
|
||||
const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
|
||||
|
||||
int image_width = data->texture.width;
|
||||
int image_height = data->texture.height;
|
||||
|
||||
int image_x1 = data->texture.x1;
|
||||
int image_y1 = data->texture.y1;
|
||||
int image_x2 = data->texture.x2 - 1;
|
||||
int image_y2 = data->texture.y2 - 1;
|
||||
Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp);
|
||||
|
||||
const qreal cx = x + qreal(0.5);
|
||||
const qreal cy = y + qreal(0.5);
|
||||
@ -2692,203 +2915,80 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
||||
fx -= half_point;
|
||||
fy -= half_point;
|
||||
|
||||
if (fdy == 0) { //simple scale, no rotation
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
const uchar *s1 = data->texture.scanLine(y1);
|
||||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
|
||||
if (fdy == 0) { // simple scale, no rotation or shear
|
||||
if (fdx <= fixed_scale && fdx > 0) { // scale up on X
|
||||
int disty = (fy & 0x0000ffff) >> 8;
|
||||
int idisty = 256 - disty;
|
||||
int x = fx >> 16;
|
||||
|
||||
// The idea is first to do the interpolation between the row s1 and the row s2
|
||||
// into an intermediate buffer, then we interpolate between two pixel of this buffer.
|
||||
// +1 for the last pixel to interpolate with, and +1 for rounding errors.
|
||||
uint buf1[buffer_size + 2];
|
||||
uint buf2[buffer_size + 2];
|
||||
const uint *ptr1;
|
||||
const uint *ptr2;
|
||||
|
||||
int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2;
|
||||
Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
|
||||
|
||||
if (blendType == BlendTransformedBilinearTiled) {
|
||||
x %= image_width;
|
||||
if (x < 0)
|
||||
x += image_width;
|
||||
int len1 = qMin(count, image_width - x);
|
||||
int len2 = qMin(x, count - len1);
|
||||
|
||||
ptr1 = fetch(buf1, s1, x, len1);
|
||||
ptr1 = layout->convertToARGB32PM(buf1, ptr1, len1, clut, 0);
|
||||
ptr2 = fetch(buf2, s2, x, len1);
|
||||
ptr2 = layout->convertToARGB32PM(buf2, ptr2, len1, clut, 0);
|
||||
for (int i = 0; i < len1; ++i) {
|
||||
uint t = ptr1[i];
|
||||
uint b = ptr2[i];
|
||||
buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
}
|
||||
|
||||
if (len2) {
|
||||
ptr1 = fetch(buf1 + len1, s1, 0, len2);
|
||||
ptr1 = layout->convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0);
|
||||
ptr2 = fetch(buf2 + len1, s2, 0, len2);
|
||||
ptr2 = layout->convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0);
|
||||
for (int i = 0; i < len2; ++i) {
|
||||
uint t = ptr1[i];
|
||||
uint b = ptr2[i];
|
||||
buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
}
|
||||
}
|
||||
for (int i = image_width; i < count; ++i) {
|
||||
buf1[i] = buf1[i - image_width];
|
||||
buf2[i] = buf2[i - image_width];
|
||||
}
|
||||
} else {
|
||||
int start = qMax(x, image_x1);
|
||||
int end = qMin(x + count, image_x2 + 1);
|
||||
int len = qMax(1, end - start);
|
||||
int leading = start - x;
|
||||
|
||||
ptr1 = fetch(buf1 + leading, s1, start, len);
|
||||
ptr1 = layout->convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0);
|
||||
ptr2 = fetch(buf2 + leading, s2, start, len);
|
||||
ptr2 = layout->convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0);
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
uint t = ptr1[i];
|
||||
uint b = ptr2[i];
|
||||
buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
|
||||
}
|
||||
|
||||
for (int i = 0; i < leading; ++i) {
|
||||
buf1[i] = buf1[leading];
|
||||
buf2[i] = buf2[leading];
|
||||
}
|
||||
for (int i = leading + len; i < count; ++i) {
|
||||
buf1[i] = buf1[i - 1];
|
||||
buf2[i] = buf2[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
// Now interpolate the values from the intermediate_buffer to get the final result.
|
||||
fx &= fixed_scale - 1;
|
||||
Q_ASSERT((fx >> 16) == 0);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2 = x1 + 1;
|
||||
Q_ASSERT(x1 >= 0);
|
||||
Q_ASSERT(x2 < count);
|
||||
|
||||
int distx = (fx & 0x0000ffff) >> 8;
|
||||
int idistx = 256 - distx;
|
||||
int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff;
|
||||
int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00;
|
||||
buffer[i] = rb | ag;
|
||||
fx += fdx;
|
||||
}
|
||||
fetchTransformedBilinear_simple_upscale_helper<blendType, bpp>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
|
||||
} else {
|
||||
const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
|
||||
|
||||
uint buf1[buffer_size];
|
||||
uint buf2[buffer_size];
|
||||
uint *b = buffer;
|
||||
while (length) {
|
||||
int len = qMin(length, buffer_size / 2);
|
||||
int fracX = fx;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
buf1[i * 2 + 0] = fetch1(s1, x1);
|
||||
buf1[i * 2 + 1] = fetch1(s1, x2);
|
||||
buf2[i * 2 + 0] = fetch1(s2, x1);
|
||||
buf2[i * 2 + 1] = fetch1(s2, x2);
|
||||
fx += fdx;
|
||||
}
|
||||
fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
|
||||
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
|
||||
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
|
||||
|
||||
if ((fdx < 0 && fdx > -(fixed_scale / 8)) || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x
|
||||
int disty = (fy & 0x0000ffff) >> 8;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int distx = (fracX & 0x0000ffff) >> 8;
|
||||
int distx = (fx & 0x0000ffff) >> 8;
|
||||
b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
|
||||
fracX += fdx;
|
||||
fx += fdx;
|
||||
}
|
||||
} else { //scale down
|
||||
} else {
|
||||
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
uint tl = buf1[i * 2 + 0];
|
||||
uint tr = buf1[i * 2 + 1];
|
||||
uint bl = buf2[i * 2 + 0];
|
||||
uint br = buf2[i * 2 + 1];
|
||||
int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
|
||||
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
|
||||
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||
fracX += fdx;
|
||||
fx += fdx;
|
||||
}
|
||||
}
|
||||
length -= len;
|
||||
b += len;
|
||||
}
|
||||
}
|
||||
} else { //rotation
|
||||
} else { // rotation or shear
|
||||
const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
|
||||
|
||||
uint buf1[buffer_size];
|
||||
uint buf2[buffer_size];
|
||||
uint *b = buffer;
|
||||
|
||||
while (length) {
|
||||
int len = qMin(length, buffer_size / 2);
|
||||
int fracX = fx;
|
||||
int fracY = fy;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
|
||||
const uchar *s1 = data->texture.scanLine(y1);
|
||||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
buf1[i * 2 + 0] = fetch1(s1, x1);
|
||||
buf1[i * 2 + 1] = fetch1(s1, x2);
|
||||
buf2[i * 2 + 0] = fetch1(s2, x1);
|
||||
buf2[i * 2 + 1] = fetch1(s2, x2);
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
|
||||
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
|
||||
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
|
||||
|
||||
if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) {
|
||||
//if we are zooming more than 8 times, we use 8bit precision for the position.
|
||||
if (qAbs(data->m11) < qreal(1./8.)|| qAbs(data->m22) < qreal(1./8.)) {
|
||||
// If we are zooming more than 8 times, we use 8bit precision for the position.
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int distx = (fracX & 0x0000ffff) >> 8;
|
||||
int disty = (fracY & 0x0000ffff) >> 8;
|
||||
int distx = (fx & 0x0000ffff) >> 8;
|
||||
int disty = (fy & 0x0000ffff) >> 8;
|
||||
|
||||
b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
|
||||
fracX += fdx;
|
||||
fracY += fdy;
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
} else {
|
||||
//we are zooming less than 8x, use 4bit precision
|
||||
// We are zooming less than 8x, use 4bit precision
|
||||
for (int i = 0; i < len; ++i) {
|
||||
uint tl = buf1[i * 2 + 0];
|
||||
uint tr = buf1[i * 2 + 1];
|
||||
uint bl = buf2[i * 2 + 0];
|
||||
uint br = buf2[i * 2 + 1];
|
||||
|
||||
int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
|
||||
int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12;
|
||||
int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
|
||||
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
|
||||
|
||||
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
|
||||
fracX += fdx;
|
||||
fracY += fdy;
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2897,6 +2997,11 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// When templated 'fetch' should be inlined at compile time:
|
||||
const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : fetchPixel<bpp>;
|
||||
|
||||
const QTextureData &image = data->texture;
|
||||
|
||||
const qreal fdx = data->m11;
|
||||
const qreal fdy = data->m12;
|
||||
const qreal fdw = data->m13;
|
||||
@ -2927,8 +3032,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
||||
distxs[i] = int((px - x1) * 256);
|
||||
distys[i] = int((py - y1) * 256);
|
||||
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
|
||||
|
||||
const uchar *s1 = data->texture.scanLine(y1);
|
||||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
@ -2969,21 +3074,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
|
||||
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
|
||||
const QVector<QRgb> *clut = data->texture.colorTable;
|
||||
|
||||
int image_width = data->texture.width;
|
||||
int image_height = data->texture.height;
|
||||
|
||||
int image_x1 = data->texture.x1;
|
||||
int image_y1 = data->texture.y1;
|
||||
int image_x2 = data->texture.x2 - 1;
|
||||
int image_y2 = data->texture.y2 - 1;
|
||||
|
||||
const qreal cx = x + qreal(0.5);
|
||||
const qreal cy = y + qreal(0.5);
|
||||
|
||||
const qreal fdx = data->m11;
|
||||
const qreal fdy = data->m12;
|
||||
const qreal fdw = data->m13;
|
||||
|
||||
if (data->fast_matrix) {
|
||||
// The increment pr x in the scanline
|
||||
int fdx = (int)(data->m11 * fixed_scale);
|
||||
@ -2995,14 +3088,13 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
|
||||
fx -= half_point;
|
||||
fy -= half_point;
|
||||
|
||||
if (fdy == 0) { //simple scale, no rotation
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
const uchar *s1 = data->texture.scanLine(y1);
|
||||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
const BilinearFastTransformFetcher fetcher =
|
||||
(layout->bpp == QPixelLayout::BPP32)
|
||||
? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32>
|
||||
: fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone>;
|
||||
|
||||
if (fdy == 0) { //simple scale, no rotation
|
||||
|
||||
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
|
||||
uint sbuf1[buffer_size];
|
||||
uint sbuf2[buffer_size];
|
||||
quint64 buf1[buffer_size];
|
||||
@ -3010,84 +3102,19 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
|
||||
QRgba64 *b = buffer;
|
||||
while (length) {
|
||||
int len = qMin(length, buffer_size / 2);
|
||||
int fracX = fx;
|
||||
int i = 0;
|
||||
int disty = (fy & 0x0000ffff);
|
||||
#if defined(__SSE2__)
|
||||
const __m128i vdy = _mm_set1_epi16(disty);
|
||||
const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
|
||||
if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) {
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
if (x1 != x2)
|
||||
break;
|
||||
sbuf1[i * 2 + 0] = sbuf1[i * 2 + 1] = ((const uint*)s1)[x1];
|
||||
sbuf2[i * 2 + 0] = sbuf2[i * 2 + 1] = ((const uint*)s2)[x1];
|
||||
fx += fdx;
|
||||
}
|
||||
int fastLen;
|
||||
if (fdx > 0)
|
||||
fastLen = qMin(len, int((image_x2 - (fx >> 16)) / data->m11));
|
||||
else
|
||||
fastLen = qMin(len, int((image_x1 - (fx >> 16)) / data->m11));
|
||||
fastLen -= 3;
|
||||
|
||||
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
||||
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
||||
for (; i < fastLen; i += 4) {
|
||||
int offset = _mm_extract_epi16(v_fx, 1);
|
||||
sbuf1[i * 2 + 0] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 0] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1];
|
||||
offset = _mm_extract_epi16(v_fx, 3);
|
||||
sbuf1[i * 2 + 2] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 2] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1];
|
||||
offset = _mm_extract_epi16(v_fx, 5);
|
||||
sbuf1[i * 2 + 4] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 4] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1];
|
||||
offset = _mm_extract_epi16(v_fx, 7);
|
||||
sbuf1[i * 2 + 6] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 6] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1];
|
||||
v_fx = _mm_add_epi32(v_fx, v_fdx);
|
||||
}
|
||||
fx = _mm_cvtsi128_si32(v_fx);
|
||||
}
|
||||
#endif
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
|
||||
|
||||
if (layout->bpp == QPixelLayout::BPP32) {
|
||||
sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
|
||||
sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
|
||||
sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
|
||||
sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
|
||||
|
||||
} else {
|
||||
sbuf1[i * 2 + 0] = fetch(s1, x1);
|
||||
sbuf1[i * 2 + 1] = fetch(s1, x2);
|
||||
sbuf2[i * 2 + 0] = fetch(s2, x1);
|
||||
sbuf2[i * 2 + 1] = fetch(s2, x2);
|
||||
}
|
||||
|
||||
fx += fdx;
|
||||
}
|
||||
layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
|
||||
if (disty)
|
||||
layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int distx = (fracX & 0x0000ffff);
|
||||
int distx = (fx & 0x0000ffff);
|
||||
#if defined(__SSE2__)
|
||||
const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
@ -3104,13 +3131,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
|
||||
#else
|
||||
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
|
||||
#endif
|
||||
fracX += fdx;
|
||||
fx += fdx;
|
||||
}
|
||||
length -= len;
|
||||
b += len;
|
||||
}
|
||||
} else { //rotation
|
||||
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
|
||||
uint sbuf1[buffer_size];
|
||||
uint sbuf2[buffer_size];
|
||||
quint64 buf1[buffer_size];
|
||||
@ -3120,117 +3146,18 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
|
||||
|
||||
while (b < end) {
|
||||
int len = qMin(length, buffer_size / 2);
|
||||
int fracX = fx;
|
||||
int fracY = fy;
|
||||
int i = 0;
|
||||
#if defined(__SSE2__)
|
||||
if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) {
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
if (x1 != x2 && y1 != y2)
|
||||
break;
|
||||
const uchar *s1 = data->texture.scanLine(y1);
|
||||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
|
||||
sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
|
||||
sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
|
||||
sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
int fastLen = len;
|
||||
if (fdx > 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image_x2) * fixed_scale - fx) / fdx));
|
||||
else if (fdx < 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image_x1) * fixed_scale - fx) / fdx));
|
||||
if (fdy > 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image_y2) * fixed_scale - fy) / fdy));
|
||||
else if (fdy < 0)
|
||||
fastLen = qMin(fastLen, int((qint64(image_y1) * fixed_scale - fy) / fdy));
|
||||
fastLen -= 3;
|
||||
|
||||
const __m128i v_fdx = _mm_set1_epi32(fdx*4);
|
||||
const __m128i v_fdy = _mm_set1_epi32(fdy*4);
|
||||
__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
|
||||
__m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
|
||||
const int bytesPerLine = data->texture.bytesPerLine;
|
||||
const uchar *s1 = data->texture.imageData;
|
||||
const uchar *s2 = s1 + bytesPerLine;
|
||||
const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
for (; i < fastLen; i += 4) {
|
||||
const __m128i vy = _mm_packs_epi32(_mm_srai_epi32(v_fy, 16), _mm_setzero_si128());
|
||||
__m128i voffset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epu16(vy, vbpl));
|
||||
voffset = _mm_add_epi32(voffset, _mm_srli_epi32(v_fx, 16));
|
||||
fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
|
||||
|
||||
int offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
|
||||
sbuf1[i * 2 + 0] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 0] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1];
|
||||
offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
|
||||
sbuf1[i * 2 + 2] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 2] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1];
|
||||
offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
|
||||
sbuf1[i * 2 + 4] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 4] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1];
|
||||
offset = _mm_cvtsi128_si32(voffset);
|
||||
sbuf1[i * 2 + 6] = ((const uint*)s1)[offset];
|
||||
sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1];
|
||||
sbuf2[i * 2 + 6] = ((const uint*)s2)[offset];
|
||||
sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1];
|
||||
|
||||
v_fx = _mm_add_epi32(v_fx, v_fdx);
|
||||
v_fy = _mm_add_epi32(v_fy, v_fdy);
|
||||
}
|
||||
fx = _mm_cvtsi128_si32(v_fx);
|
||||
fy = _mm_cvtsi128_si32(v_fy);
|
||||
}
|
||||
#endif
|
||||
for (; i < len; ++i) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
int y1 = (fy >> 16);
|
||||
int y2;
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
|
||||
const uchar *s1 = data->texture.scanLine(y1);
|
||||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
|
||||
if (layout->bpp == QPixelLayout::BPP32) {
|
||||
sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
|
||||
sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
|
||||
sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
|
||||
sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
|
||||
|
||||
} else {
|
||||
sbuf1[i * 2 + 0] = fetch(s1, x1);
|
||||
sbuf1[i * 2 + 1] = fetch(s1, x2);
|
||||
sbuf2[i * 2 + 0] = fetch(s2, x1);
|
||||
sbuf2[i * 2 + 1] = fetch(s2, x2);
|
||||
}
|
||||
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
|
||||
layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int distx = (fracX & 0x0000ffff);
|
||||
int disty = (fracY & 0x0000ffff);
|
||||
int distx = (fx & 0x0000ffff);
|
||||
int disty = (fy & 0x0000ffff);
|
||||
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
|
||||
fracX += fdx;
|
||||
fracY += fdy;
|
||||
fx += fdx;
|
||||
fy += fdy;
|
||||
}
|
||||
|
||||
length -= len;
|
||||
@ -3238,6 +3165,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const QTextureData &image = data->texture;
|
||||
|
||||
const qreal fdx = data->m11;
|
||||
const qreal fdy = data->m12;
|
||||
const qreal fdw = data->m13;
|
||||
|
||||
qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
|
||||
qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
|
||||
qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
|
||||
@ -3267,8 +3200,8 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
|
||||
distxs[i] = int((px - x1) * (1<<16));
|
||||
distys[i] = int((py - y1) * (1<<16));
|
||||
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
|
||||
|
||||
const uchar *s1 = data->texture.scanLine(y1);
|
||||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
|
Loading…
Reference in New Issue
Block a user