ARM Skia NEON patches - 29 - Xfermode: SkFourByteInterp

Xfermode: add a NEON version of SkFourByteInterp

Brings a modest performance improvement on its own in
ProcXfermodes when aa is neither zero nor FF. Combined
with 1-pixel NEON modeprocs, it brings up to 35% speed
improvement on the aa case.

Signed-off-by: Kévin PETIT <kevin.petit@arm.com>

BUG=
R=djsollen@google.com, mtklein@google.com, reed@google.com

Author: kevin.petit.arm@gmail.com

Review URL: https://codereview.chromium.org/23724013

git-svn-id: http://skia.googlecode.com/svn/trunk@12448 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
commit-bot@chromium.org 2013-12-02 22:40:56 +00:00
parent 36026de644
commit 46e266cdbe
2 changed files with 31 additions and 2 deletions

View File

@ -82,4 +82,33 @@ static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t
return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
}
static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
unsigned srcScale) {
SkASSERT(srcScale <= 256);
int16x8_t vscale = vdupq_n_s16(srcScale);
int16x8_t vsrc_wide, vdst_wide, vdiff;
uint8x8_t res;
vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
vdiff = vsrc_wide - vdst_wide;
vdiff *= vscale;
vdiff = vshrq_n_s16(vdiff, 8);
vdst_wide += vdiff;
res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
return vget_lane_u32(vreinterpret_u32_u8(res), 0);
}
static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
U8CPU srcWeight) {
SkASSERT(srcWeight <= 255);
unsigned scale = SkAlpha255To256(srcWeight);
return SkFourByteInterp256_neon(src, dst, scale);
}
#endif /* #ifndef SkColor_opts_neon_DEFINED */

View File

@ -632,7 +632,7 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
SkPMColor dstC = dst[i];
SkPMColor C = proc(src[i], dstC);
if (a != 0xFF) {
C = SkFourByteInterp(C, dstC, a);
C = SkFourByteInterp_neon(C, dstC, a);
}
dst[i] = C;
}
@ -700,7 +700,7 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
SkPMColor C = proc(src[i], dstC);
if (0xFF != a) {
C = SkFourByteInterp(C, dstC, a);
C = SkFourByteInterp_neon(C, dstC, a);
}
dst[i] = SkPixel32ToPixel16_ToU16(C);
}