Prepared for SIMD implementation of radial gradients.

Made the radial gradient fetch func into a template to be able to
optimize the inner loop using SIMD instructions.

Reviewed-by: Benjamin Poulain
(cherry picked from commit f16c261348193b4c03f796db4e1e3a5db09267a2)
This commit is contained in:
Samuel Rødal 2011-04-13 09:47:56 +02:00 committed by Olivier Goffart
parent b2c43d5a84
commit 79d238abdc
2 changed files with 174 additions and 177 deletions

View File

@ -75,44 +75,10 @@ enum {
fixed_scale = 1 << 16,
half_point = 1 << 15
};
// must be multiple of 4 for easier SIMD implementations
static const int buffer_size = 2048;
struct LinearGradientValues
{
qreal dx;
qreal dy;
qreal l;
qreal off;
};
struct RadialGradientValues
{
qreal dx;
qreal dy;
qreal a;
};
struct Operator;
typedef uint* (QT_FASTCALL *DestFetchProc)(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length);
typedef void (QT_FASTCALL *DestStoreProc)(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length);
typedef const uint* (QT_FASTCALL *SourceFetchProc)(uint *buffer, const Operator *o, const QSpanData *data, int y, int x, int length);
struct Operator
{
QPainter::CompositionMode mode;
DestFetchProc dest_fetch;
DestStoreProc dest_store;
SourceFetchProc src_fetch;
CompositionFunctionSolid funcSolid;
CompositionFunction func;
union {
LinearGradientValues linear;
RadialGradientValues radial;
// TextureValues texture;
};
};
/*
Destination fetch. This is simple as we don't have to do bounds checks or
transformations
@ -1346,64 +1312,13 @@ static const SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
},
};
static inline uint qt_gradient_pixel(const QGradientData *data, qreal pos)
{
int ipos = int(pos * (GRADIENT_STOPTABLE_SIZE - 1) + qreal(0.5));
// calculate the actual offset.
if (ipos < 0 || ipos >= GRADIENT_STOPTABLE_SIZE) {
if (data->spread == QGradient::RepeatSpread) {
ipos = ipos % GRADIENT_STOPTABLE_SIZE;
ipos = ipos < 0 ? GRADIENT_STOPTABLE_SIZE + ipos : ipos;
} else if (data->spread == QGradient::ReflectSpread) {
const int limit = GRADIENT_STOPTABLE_SIZE * 2 - 1;
ipos = ipos % limit;
ipos = ipos < 0 ? limit + ipos : ipos;
ipos = ipos >= GRADIENT_STOPTABLE_SIZE ? limit - ipos : ipos;
} else {
if (ipos < 0) ipos = 0;
else if (ipos >= GRADIENT_STOPTABLE_SIZE) ipos = GRADIENT_STOPTABLE_SIZE-1;
}
}
Q_ASSERT(ipos >= 0);
Q_ASSERT(ipos < GRADIENT_STOPTABLE_SIZE);
return data->colorTable[ipos];
}
#define FIXPT_BITS 8
#define FIXPT_SIZE (1<<FIXPT_BITS)
static uint qt_gradient_pixel_fixed(const QGradientData *data, int fixed_pos)
{
int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
// calculate the actual offset.
if (ipos < 0 || ipos >= GRADIENT_STOPTABLE_SIZE) {
if (data->spread == QGradient::RepeatSpread) {
ipos = ipos % GRADIENT_STOPTABLE_SIZE;
ipos = ipos < 0 ? GRADIENT_STOPTABLE_SIZE + ipos : ipos;
} else if (data->spread == QGradient::ReflectSpread) {
const int limit = GRADIENT_STOPTABLE_SIZE * 2 - 1;
ipos = ipos % limit;
ipos = ipos < 0 ? limit + ipos : ipos;
ipos = ipos >= GRADIENT_STOPTABLE_SIZE ? limit - ipos : ipos;
} else {
if (ipos < 0) ipos = 0;
else if (ipos >= GRADIENT_STOPTABLE_SIZE) ipos = GRADIENT_STOPTABLE_SIZE-1;
}
}
Q_ASSERT(ipos >= 0);
Q_ASSERT(ipos < GRADIENT_STOPTABLE_SIZE);
return data->colorTable[ipos];
return data->colorTable[qt_gradient_clamp(data, ipos)];
}
static void QT_FASTCALL getLinearGradientValues(LinearGradientValues *v, const QSpanData *data)
@ -1419,8 +1334,8 @@ static void QT_FASTCALL getLinearGradientValues(LinearGradientValues *v, const Q
}
}
static const uint * QT_FASTCALL fetchLinearGradient(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length)
static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length)
{
const uint *b = buffer;
qreal t, inc;
@ -1487,22 +1402,6 @@ static const uint * QT_FASTCALL fetchLinearGradient(uint *buffer, const Operator
return b;
}
static inline qreal determinant(qreal a, qreal b, qreal c)
{
return (b * b) - (4 * a * c);
}
// function to evaluate real roots
static inline qreal realRoots(qreal a, qreal b, qreal detSqrt)
{
return (-b + detSqrt)/(2 * a);
}
static inline qreal qSafeSqrt(qreal x)
{
return x > 0 ? qSqrt(x) : 0;
}
static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const QSpanData *data)
{
v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
@ -1510,87 +1409,34 @@ static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const Q
v->a = data->gradient.radial.radius*data->gradient.radial.radius - v->dx*v->dx - v->dy*v->dy;
}
static const uint * QT_FASTCALL fetchRadialGradient(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length)
class RadialFetchPlain
{
const uint *b = buffer;
qreal rx = data->m21 * (y + qreal(0.5))
+ data->dx + data->m11 * (x + qreal(0.5));
qreal ry = data->m22 * (y + qreal(0.5))
+ data->dy + data->m12 * (x + qreal(0.5));
bool affine = !data->m13 && !data->m23;
//qreal r = data->gradient.radial.radius;
const uint *end = buffer + length;
if (affine) {
rx -= data->gradient.radial.focal.x;
ry -= data->gradient.radial.focal.y;
qreal inv_a = 1 / qreal(2 * op->radial.a);
const qreal delta_rx = data->m11;
const qreal delta_ry = data->m12;
qreal b = 2*(rx * op->radial.dx + ry * op->radial.dy);
qreal delta_b = 2*(delta_rx * op->radial.dx + delta_ry * op->radial.dy);
const qreal b_delta_b = 2 * b * delta_b;
const qreal delta_b_delta_b = 2 * delta_b * delta_b;
const qreal bb = b * b;
const qreal delta_bb = delta_b * delta_b;
b *= inv_a;
delta_b *= inv_a;
const qreal rxrxryry = rx * rx + ry * ry;
const qreal delta_rxrxryry = delta_rx * delta_rx + delta_ry * delta_ry;
const qreal rx_plus_ry = 2*(rx * delta_rx + ry * delta_ry);
const qreal delta_rx_plus_ry = 2 * delta_rxrxryry;
inv_a *= inv_a;
qreal det = (bb + 4 * op->radial.a * rxrxryry) * inv_a;
qreal delta_det = (b_delta_b + delta_bb + 4 * op->radial.a * (rx_plus_ry + delta_rxrxryry)) * inv_a;
const qreal delta_delta_det = (delta_b_delta_b + 4 * op->radial.a * delta_rx_plus_ry) * inv_a;
public:
static inline void fetch(uint *buffer, uint *end, const QSpanData *data, qreal det, qreal delta_det,
qreal delta_delta_det, qreal b, qreal delta_b)
{
while (buffer < end) {
*buffer = qt_gradient_pixel(&data->gradient, qSafeSqrt(det) - b);
*buffer = qt_gradient_pixel(&data->gradient, (det > 0 ? qSqrt(det) : 0) - b);
det += delta_det;
delta_det += delta_delta_det;
b += delta_b;
++buffer;
}
} else {
qreal rw = data->m23 * (y + qreal(0.5))
+ data->m33 + data->m13 * (x + qreal(0.5));
if (!rw)
rw = 1;
while (buffer < end) {
qreal gx = rx/rw - data->gradient.radial.focal.x;
qreal gy = ry/rw - data->gradient.radial.focal.y;
qreal b = 2*(gx*op->radial.dx + gy*op->radial.dy);
qreal det = determinant(op->radial.a, b , -(gx*gx + gy*gy));
qreal s = realRoots(op->radial.a, b, qSafeSqrt(det));
*buffer = qt_gradient_pixel(&data->gradient, s);
rx += data->m11;
ry += data->m12;
rw += data->m13;
if (!rw) {
rw += data->m13;
}
++buffer;
}
}
};
return b;
const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length)
{
return qt_fetch_radial_gradient_template<RadialFetchPlain>(buffer, op, data, y, x, length);
}
static const uint * QT_FASTCALL fetchConicalGradient(uint *buffer, const Operator *, const QSpanData *data,
int y, int x, int length)
static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Operator *, const QSpanData *data,
int y, int x, int length)
{
const uint *b = buffer;
qreal rx = data->m21 * (y + qreal(0.5))
@ -3347,16 +3193,16 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in
case QSpanData::LinearGradient:
solidSource = !data->gradient.alphaColor;
getLinearGradientValues(&op.linear, data);
op.src_fetch = fetchLinearGradient;
op.src_fetch = qt_fetch_linear_gradient;
break;
case QSpanData::RadialGradient:
solidSource = !data->gradient.alphaColor;
getRadialGradientValues(&op.radial, data);
op.src_fetch = fetchRadialGradient;
op.src_fetch = qt_fetch_radial_gradient;
break;
case QSpanData::ConicalGradient:
solidSource = !data->gradient.alphaColor;
op.src_fetch = fetchConicalGradient;
op.src_fetch = qt_fetch_conical_gradient;
break;
case QSpanData::Texture:
op.src_fetch = sourceFetch[getBlendType(data)][data->texture.format];

View File

@ -63,6 +63,7 @@
#endif
#include "private/qrasterdefs_p.h"
#include <private/qsimd_p.h>
#include <private/qmath_p.h>
#ifdef Q_WS_QWS
#include "QtGui/qscreen_qws.h"
@ -178,6 +179,40 @@ void qBlendTextureCallback(int count, const QSpan *spans, void *userData);
typedef void (QT_FASTCALL *CompositionFunction)(uint *dest, const uint *src, int length, uint const_alpha);
typedef void (QT_FASTCALL *CompositionFunctionSolid)(uint *dest, int length, uint color, uint const_alpha);
struct LinearGradientValues
{
qreal dx;
qreal dy;
qreal l;
qreal off;
};
struct RadialGradientValues
{
qreal dx;
qreal dy;
qreal a;
};
struct Operator;
typedef uint* (QT_FASTCALL *DestFetchProc)(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length);
typedef void (QT_FASTCALL *DestStoreProc)(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length);
typedef const uint* (QT_FASTCALL *SourceFetchProc)(uint *buffer, const Operator *o, const QSpanData *data, int y, int x, int length);
struct Operator
{
QPainter::CompositionMode mode;
DestFetchProc dest_fetch;
DestStoreProc dest_store;
SourceFetchProc src_fetch;
CompositionFunctionSolid funcSolid;
CompositionFunction func;
union {
LinearGradientValues linear;
RadialGradientValues radial;
};
};
void qInitDrawhelperAsm();
class QRasterPaintEngine;
@ -308,6 +343,122 @@ struct QSpanData
void adjustSpanMethods();
};
static inline uint qt_gradient_clamp(const QGradientData *data, int ipos)
{
if (ipos < 0 || ipos >= GRADIENT_STOPTABLE_SIZE) {
if (data->spread == QGradient::RepeatSpread) {
ipos = ipos % GRADIENT_STOPTABLE_SIZE;
ipos = ipos < 0 ? GRADIENT_STOPTABLE_SIZE + ipos : ipos;
} else if (data->spread == QGradient::ReflectSpread) {
const int limit = GRADIENT_STOPTABLE_SIZE * 2 - 1;
ipos = ipos % limit;
ipos = ipos < 0 ? limit + ipos : ipos;
ipos = ipos >= GRADIENT_STOPTABLE_SIZE ? limit - ipos : ipos;
} else {
if (ipos < 0)
ipos = 0;
else if (ipos >= GRADIENT_STOPTABLE_SIZE)
ipos = GRADIENT_STOPTABLE_SIZE-1;
}
}
Q_ASSERT(ipos >= 0);
Q_ASSERT(ipos < GRADIENT_STOPTABLE_SIZE);
return ipos;
}
static inline uint qt_gradient_pixel(const QGradientData *data, qreal pos)
{
int ipos = int(pos * (GRADIENT_STOPTABLE_SIZE - 1) + qreal(0.5));
return data->colorTable[qt_gradient_clamp(data, ipos)];
}
static inline qreal qRadialDeterminant(qreal a, qreal b, qreal c)
{
return (b * b) - (4 * a * c);
}
// function to evaluate real roots
static inline qreal qRadialRealRoots(qreal a, qreal b, qreal detSqrt)
{
return (-b + detSqrt)/(2 * a);
}
template <class RadialFetchFunc>
const uint * QT_FASTCALL qt_fetch_radial_gradient_template(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length)
{
const uint *b = buffer;
qreal rx = data->m21 * (y + qreal(0.5))
+ data->dx + data->m11 * (x + qreal(0.5));
qreal ry = data->m22 * (y + qreal(0.5))
+ data->dy + data->m12 * (x + qreal(0.5));
bool affine = !data->m13 && !data->m23;
uint *end = buffer + length;
if (affine) {
rx -= data->gradient.radial.focal.x;
ry -= data->gradient.radial.focal.y;
qreal inv_a = 1 / qreal(2 * op->radial.a);
const qreal delta_rx = data->m11;
const qreal delta_ry = data->m12;
qreal b = 2*(rx * op->radial.dx + ry * op->radial.dy);
qreal delta_b = 2*(delta_rx * op->radial.dx + delta_ry * op->radial.dy);
const qreal b_delta_b = 2 * b * delta_b;
const qreal delta_b_delta_b = 2 * delta_b * delta_b;
const qreal bb = b * b;
const qreal delta_bb = delta_b * delta_b;
b *= inv_a;
delta_b *= inv_a;
const qreal rxrxryry = rx * rx + ry * ry;
const qreal delta_rxrxryry = delta_rx * delta_rx + delta_ry * delta_ry;
const qreal rx_plus_ry = 2*(rx * delta_rx + ry * delta_ry);
const qreal delta_rx_plus_ry = 2 * delta_rxrxryry;
inv_a *= inv_a;
qreal det = (bb + 4 * op->radial.a * rxrxryry) * inv_a;
qreal delta_det = (b_delta_b + delta_bb + 4 * op->radial.a * (rx_plus_ry + delta_rxrxryry)) * inv_a;
const qreal delta_delta_det = (delta_b_delta_b + 4 * op->radial.a * delta_rx_plus_ry) * inv_a;
RadialFetchFunc::fetch(buffer, end, data, det, delta_det, delta_delta_det, b, delta_b);
} else {
qreal rw = data->m23 * (y + qreal(0.5))
+ data->m33 + data->m13 * (x + qreal(0.5));
if (!rw)
rw = 1;
while (buffer < end) {
qreal gx = rx/rw - data->gradient.radial.focal.x;
qreal gy = ry/rw - data->gradient.radial.focal.y;
qreal b = 2*(gx*op->radial.dx + gy*op->radial.dy);
qreal det = qRadialDeterminant(op->radial.a, b , -(gx*gx + gy*gy));
qreal s = qRadialRealRoots(op->radial.a, b, (det > 0 ? qSqrt(det) : 0));
*buffer = qt_gradient_pixel(&data->gradient, s);
rx += data->m11;
ry += data->m12;
rw += data->m13;
if (!rw) {
rw += data->m13;
}
++buffer;
}
}
return b;
}
#if defined(Q_CC_RVCT)
# pragma push
# pragma arm