use integer math instead of float per-pixel. about a 2x speedup. not tested well, so this is just experimental.
BUG= Review URL: https://codereview.chromium.org/15715015 git-svn-id: http://skia.googlecode.com/svn/trunk@9317 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
a2be2cc798
commit
b74706f033
@ -15,25 +15,35 @@
|
|||||||
|
|
||||||
#define DS(x) SkDoubleToScalar(x)
|
#define DS(x) SkDoubleToScalar(x)
|
||||||
|
|
||||||
inline SkPMColor cubicBlend(const SkScalar c[16], SkScalar t, SkPMColor c0, SkPMColor c1, SkPMColor c2, SkPMColor c3) {
|
#define MUL(a, b) ((a) * (b))
|
||||||
SkScalar t2 = t * t, t3 = t2 * t;
|
|
||||||
SkScalar cc[4];
|
static inline SkPMColor cubicBlend(const SkFixed cc[4], SkPMColor c0, SkPMColor c1, SkPMColor c2, SkPMColor c3) {
|
||||||
cc[0] = c[0] + SkScalarMul(c[1], t) + SkScalarMul(c[2], t2) + SkScalarMul(c[3], t3);
|
SkFixed fa = MUL(cc[0], SkGetPackedA32(c0)) + MUL(cc[1], SkGetPackedA32(c1)) + MUL(cc[2], SkGetPackedA32(c2)) + MUL(cc[3], SkGetPackedA32(c3));
|
||||||
cc[1] = c[4] + SkScalarMul(c[5], t) + SkScalarMul(c[6], t2) + SkScalarMul(c[7], t3);
|
SkFixed fr = MUL(cc[0], SkGetPackedR32(c0)) + MUL(cc[1], SkGetPackedR32(c1)) + MUL(cc[2], SkGetPackedR32(c2)) + MUL(cc[3], SkGetPackedR32(c3));
|
||||||
cc[2] = c[8] + SkScalarMul(c[9], t) + SkScalarMul(c[10], t2) + SkScalarMul(c[11], t3);
|
SkFixed fg = MUL(cc[0], SkGetPackedG32(c0)) + MUL(cc[1], SkGetPackedG32(c1)) + MUL(cc[2], SkGetPackedG32(c2)) + MUL(cc[3], SkGetPackedG32(c3));
|
||||||
cc[3] = c[12] + SkScalarMul(c[13], t) + SkScalarMul(c[14], t2) + SkScalarMul(c[15], t3);
|
SkFixed fb = MUL(cc[0], SkGetPackedB32(c0)) + MUL(cc[1], SkGetPackedB32(c1)) + MUL(cc[2], SkGetPackedB32(c2)) + MUL(cc[3], SkGetPackedB32(c3));
|
||||||
SkScalar a = SkScalarClampMax(SkScalarMul(cc[0], SkGetPackedA32(c0)) + SkScalarMul(cc[1], SkGetPackedA32(c1)) + SkScalarMul(cc[2], SkGetPackedA32(c2)) + SkScalarMul(cc[3], SkGetPackedA32(c3)), 255);
|
|
||||||
SkScalar r = SkScalarMul(cc[0], SkGetPackedR32(c0)) + SkScalarMul(cc[1], SkGetPackedR32(c1)) + SkScalarMul(cc[2], SkGetPackedR32(c2)) + SkScalarMul(cc[3], SkGetPackedR32(c3));
|
int a = SkClampMax(SkFixedRoundToInt(fa), 255);
|
||||||
SkScalar g = SkScalarMul(cc[0], SkGetPackedG32(c0)) + SkScalarMul(cc[1], SkGetPackedG32(c1)) + SkScalarMul(cc[2], SkGetPackedG32(c2)) + SkScalarMul(cc[3], SkGetPackedG32(c3));
|
int r = SkClampMax(SkFixedRoundToInt(fr), a);
|
||||||
SkScalar b = SkScalarMul(cc[0], SkGetPackedB32(c0)) + SkScalarMul(cc[1], SkGetPackedB32(c1)) + SkScalarMul(cc[2], SkGetPackedB32(c2)) + SkScalarMul(cc[3], SkGetPackedB32(c3));
|
int g = SkClampMax(SkFixedRoundToInt(fg), a);
|
||||||
return SkPackARGB32(SkScalarRoundToInt(a),
|
int b = SkClampMax(SkFixedRoundToInt(fb), a);
|
||||||
SkScalarRoundToInt(SkScalarClampMax(r, a)),
|
|
||||||
SkScalarRoundToInt(SkScalarClampMax(g, a)),
|
return SkPackARGB32(a, r, g, b);
|
||||||
SkScalarRoundToInt(SkScalarClampMax(b, a)));
|
}
|
||||||
|
|
||||||
|
static float poly_eval(const float cc[4], float t) {
|
||||||
|
return cc[0] + t * (cc[1] + t * (cc[2] + t * cc[3]));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void build_coeff4(SkFixed dst[4], const float src[16], float t) {
|
||||||
|
dst[0] = SkFloatToFixed(poly_eval(&src[ 0], t));
|
||||||
|
dst[1] = SkFloatToFixed(poly_eval(&src[ 4], t));
|
||||||
|
dst[2] = SkFloatToFixed(poly_eval(&src[ 8], t));
|
||||||
|
dst[3] = SkFloatToFixed(poly_eval(&src[12], t));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bicubicFilter(const SkBitmapProcState& s, int x, int y,
|
static void bicubicFilter(const SkBitmapProcState& s, int x, int y,
|
||||||
SkPMColor colors[], int count) {
|
SkPMColor* SK_RESTRICT colors, int count) {
|
||||||
|
|
||||||
static const SkScalar coefficients[16] = {
|
static const SkScalar coefficients[16] = {
|
||||||
DS( 1.0 / 18.0), DS(-9.0 / 18.0), DS( 15.0 / 18.0), DS( -7.0 / 18.0),
|
DS( 1.0 / 18.0), DS(-9.0 / 18.0), DS( 15.0 / 18.0), DS( -7.0 / 18.0),
|
||||||
@ -42,7 +52,9 @@ static void bicubicFilter(const SkBitmapProcState& s, int x, int y,
|
|||||||
DS( 0.0 / 18.0), DS( 0.0 / 18.0), DS( -6.0 / 18.0), DS( 7.0 / 18.0),
|
DS( 0.0 / 18.0), DS( 0.0 / 18.0), DS( -6.0 / 18.0), DS( 7.0 / 18.0),
|
||||||
};
|
};
|
||||||
|
|
||||||
SkPMColor *dptr = &(colors[0]);
|
const int maxX = s.fBitmap->width() - 1;
|
||||||
|
const int maxY = s.fBitmap->height() - 1;
|
||||||
|
|
||||||
while (count-- > 0) {
|
while (count-- > 0) {
|
||||||
SkPoint srcPt;
|
SkPoint srcPt;
|
||||||
s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
|
s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
|
||||||
@ -51,37 +63,45 @@ static void bicubicFilter(const SkBitmapProcState& s, int x, int y,
|
|||||||
srcPt.fY -= SK_ScalarHalf;
|
srcPt.fY -= SK_ScalarHalf;
|
||||||
SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX);
|
SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX);
|
||||||
SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY);
|
SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY);
|
||||||
|
|
||||||
|
SkFixed coeffX[4], coeffY[4];
|
||||||
|
build_coeff4(coeffX, coefficients, fractx);
|
||||||
|
build_coeff4(coeffY, coefficients, fracty);
|
||||||
|
|
||||||
int sx = SkScalarFloorToInt(srcPt.fX);
|
int sx = SkScalarFloorToInt(srcPt.fX);
|
||||||
int sy = SkScalarFloorToInt(srcPt.fY);
|
int sy = SkScalarFloorToInt(srcPt.fY);
|
||||||
int x0 = SkClampMax(sx - 1, s.fBitmap->width() - 1);
|
|
||||||
int x1 = SkClampMax(sx , s.fBitmap->width() - 1);
|
// Here is where we can support other tile modes (e.g. repeat or mirror)
|
||||||
int x2 = SkClampMax(sx + 1, s.fBitmap->width() - 1);
|
int x0 = SkClampMax(sx - 1, maxX);
|
||||||
int x3 = SkClampMax(sx + 2, s.fBitmap->width() - 1);
|
int x1 = SkClampMax(sx , maxX);
|
||||||
int y0 = SkClampMax(sy - 1, s.fBitmap->height() - 1);
|
int x2 = SkClampMax(sx + 1, maxX);
|
||||||
int y1 = SkClampMax(sy , s.fBitmap->height() - 1);
|
int x3 = SkClampMax(sx + 2, maxX);
|
||||||
int y2 = SkClampMax(sy + 1, s.fBitmap->height() - 1);
|
int y0 = SkClampMax(sy - 1, maxY);
|
||||||
int y3 = SkClampMax(sy + 2, s.fBitmap->height() - 1);
|
int y1 = SkClampMax(sy , maxY);
|
||||||
|
int y2 = SkClampMax(sy + 1, maxY);
|
||||||
|
int y3 = SkClampMax(sy + 2, maxY);
|
||||||
|
|
||||||
SkPMColor s00 = *s.fBitmap->getAddr32(x0, y0);
|
SkPMColor s00 = *s.fBitmap->getAddr32(x0, y0);
|
||||||
SkPMColor s10 = *s.fBitmap->getAddr32(x1, y0);
|
SkPMColor s10 = *s.fBitmap->getAddr32(x1, y0);
|
||||||
SkPMColor s20 = *s.fBitmap->getAddr32(x2, y0);
|
SkPMColor s20 = *s.fBitmap->getAddr32(x2, y0);
|
||||||
SkPMColor s30 = *s.fBitmap->getAddr32(x3, y0);
|
SkPMColor s30 = *s.fBitmap->getAddr32(x3, y0);
|
||||||
SkPMColor s0 = cubicBlend(coefficients, fractx, s00, s10, s20, s30);
|
SkPMColor s0 = cubicBlend(coeffX, s00, s10, s20, s30);
|
||||||
SkPMColor s01 = *s.fBitmap->getAddr32(x0, y1);
|
SkPMColor s01 = *s.fBitmap->getAddr32(x0, y1);
|
||||||
SkPMColor s11 = *s.fBitmap->getAddr32(x1, y1);
|
SkPMColor s11 = *s.fBitmap->getAddr32(x1, y1);
|
||||||
SkPMColor s21 = *s.fBitmap->getAddr32(x2, y1);
|
SkPMColor s21 = *s.fBitmap->getAddr32(x2, y1);
|
||||||
SkPMColor s31 = *s.fBitmap->getAddr32(x3, y1);
|
SkPMColor s31 = *s.fBitmap->getAddr32(x3, y1);
|
||||||
SkPMColor s1 = cubicBlend(coefficients, fractx, s01, s11, s21, s31);
|
SkPMColor s1 = cubicBlend(coeffX, s01, s11, s21, s31);
|
||||||
SkPMColor s02 = *s.fBitmap->getAddr32(x0, y2);
|
SkPMColor s02 = *s.fBitmap->getAddr32(x0, y2);
|
||||||
SkPMColor s12 = *s.fBitmap->getAddr32(x1, y2);
|
SkPMColor s12 = *s.fBitmap->getAddr32(x1, y2);
|
||||||
SkPMColor s22 = *s.fBitmap->getAddr32(x2, y2);
|
SkPMColor s22 = *s.fBitmap->getAddr32(x2, y2);
|
||||||
SkPMColor s32 = *s.fBitmap->getAddr32(x3, y2);
|
SkPMColor s32 = *s.fBitmap->getAddr32(x3, y2);
|
||||||
SkPMColor s2 = cubicBlend(coefficients, fractx, s02, s12, s22, s32);
|
SkPMColor s2 = cubicBlend(coeffX, s02, s12, s22, s32);
|
||||||
SkPMColor s03 = *s.fBitmap->getAddr32(x0, y3);
|
SkPMColor s03 = *s.fBitmap->getAddr32(x0, y3);
|
||||||
SkPMColor s13 = *s.fBitmap->getAddr32(x1, y3);
|
SkPMColor s13 = *s.fBitmap->getAddr32(x1, y3);
|
||||||
SkPMColor s23 = *s.fBitmap->getAddr32(x2, y3);
|
SkPMColor s23 = *s.fBitmap->getAddr32(x2, y3);
|
||||||
SkPMColor s33 = *s.fBitmap->getAddr32(x3, y3);
|
SkPMColor s33 = *s.fBitmap->getAddr32(x3, y3);
|
||||||
SkPMColor s3 = cubicBlend(coefficients, fractx, s03, s13, s23, s33);
|
SkPMColor s3 = cubicBlend(coeffX, s03, s13, s23, s33);
|
||||||
*dptr++ = cubicBlend(coefficients, fracty, s0, s1, s2, s3);
|
*colors++ = cubicBlend(coeffY, s0, s1, s2, s3);
|
||||||
x++;
|
x++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user