diff --git a/src/opts/SkColor_opts_neon.h b/src/opts/SkColor_opts_neon.h index 85752f5558..a3430b5c51 100644 --- a/src/opts/SkColor_opts_neon.h +++ b/src/opts/SkColor_opts_neon.h @@ -82,4 +82,33 @@ static inline uint8x8_t SkBlend32_neon8(uint8x8_t src, uint8x8_t dst, uint16x8_t return vmovn_u16(vreinterpretq_u16_s16(dst_wide)); } +static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst, + unsigned srcScale) { + SkASSERT(srcScale <= 256); + int16x8_t vscale = vdupq_n_s16(srcScale); + int16x8_t vsrc_wide, vdst_wide, vdiff; + uint8x8_t res; + + vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src)))); + vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst)))); + + vdiff = vsrc_wide - vdst_wide; + vdiff *= vscale; + + vdiff = vshrq_n_s16(vdiff, 8); + + vdst_wide += vdiff; + + res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide)); + + return vget_lane_u32(vreinterpret_u32_u8(res), 0); +} + +static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst, + U8CPU srcWeight) { + SkASSERT(srcWeight <= 255); + unsigned scale = SkAlpha255To256(srcWeight); + return SkFourByteInterp256_neon(src, dst, scale); +} + #endif /* #ifndef SkColor_opts_neon_DEFINED */ diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp index b8d8ef521b..7435dd44de 100644 --- a/src/opts/SkXfermode_opts_arm_neon.cpp +++ b/src/opts/SkXfermode_opts_arm_neon.cpp @@ -632,7 +632,7 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], SkPMColor dstC = dst[i]; SkPMColor C = proc(src[i], dstC); if (a != 0xFF) { - C = SkFourByteInterp(C, dstC, a); + C = SkFourByteInterp_neon(C, dstC, a); } dst[i] = C; } @@ -700,7 +700,7 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst, SkPMColor dstC = SkPixel16ToPixel32(dst[i]); SkPMColor C = proc(src[i], dstC); if (0xFF != a) { - C = SkFourByteInterp(C, dstC, a); + C = SkFourByteInterp_neon(C, dstC, a); } dst[i] = SkPixel32ToPixel16_ToU16(C); }