ARM Skia NEON patches - 29 - Xfermode: SkFourByteInterp
Xfermode: add a NEON version of SkFourByteInterp Brings a modest performance improvement on its own in ProcXfermodes when aa is neither zero nor FF. Combined with 1-pixel NEON modeprocs, it brings up to 35% speed improvement on the aa case. Signed-off-by: Kévin PETIT <kevin.petit@arm.com> BUG= R=djsollen@google.com, mtklein@google.com, reed@google.com Author: kevin.petit.arm@gmail.com Review URL: https://codereview.chromium.org/23724013 git-svn-id: http://skia.googlecode.com/svn/trunk@12448 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
36026de644
commit
46e266cdbe
@ -82,4 +82,33 @@ static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t
|
||||
return vmovn_u16(vreinterpretq_u16_s16(dst_wide));
|
||||
}
|
||||
|
||||
static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
|
||||
unsigned srcScale) {
|
||||
SkASSERT(srcScale <= 256);
|
||||
int16x8_t vscale = vdupq_n_s16(srcScale);
|
||||
int16x8_t vsrc_wide, vdst_wide, vdiff;
|
||||
uint8x8_t res;
|
||||
|
||||
vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
|
||||
vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
|
||||
|
||||
vdiff = vsrc_wide - vdst_wide;
|
||||
vdiff *= vscale;
|
||||
|
||||
vdiff = vshrq_n_s16(vdiff, 8);
|
||||
|
||||
vdst_wide += vdiff;
|
||||
|
||||
res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u8(res), 0);
|
||||
}
|
||||
|
||||
static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
|
||||
U8CPU srcWeight) {
|
||||
SkASSERT(srcWeight <= 255);
|
||||
unsigned scale = SkAlpha255To256(srcWeight);
|
||||
return SkFourByteInterp256_neon(src, dst, scale);
|
||||
}
|
||||
|
||||
#endif /* #ifndef SkColor_opts_neon_DEFINED */
|
||||
|
@ -632,7 +632,7 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
|
||||
SkPMColor dstC = dst[i];
|
||||
SkPMColor C = proc(src[i], dstC);
|
||||
if (a != 0xFF) {
|
||||
C = SkFourByteInterp(C, dstC, a);
|
||||
C = SkFourByteInterp_neon(C, dstC, a);
|
||||
}
|
||||
dst[i] = C;
|
||||
}
|
||||
@ -700,7 +700,7 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
|
||||
SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
|
||||
SkPMColor C = proc(src[i], dstC);
|
||||
if (0xFF != a) {
|
||||
C = SkFourByteInterp(C, dstC, a);
|
||||
C = SkFourByteInterp_neon(C, dstC, a);
|
||||
}
|
||||
dst[i] = SkPixel32ToPixel16_ToU16(C);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user