ARM Skia NEON patches - 32 - Xfermode: 1-pixel NEON modeprocs
In some cases, it's easy to provide a NEON version of the 1-pixel modeprocs. Combined with https://codereview.chromium.org/23724013/ (merged) it allows up to 35% speed improvement on Xfermodes when aa is non-NULL. Signed-off-by: Kévin PETIT <kevin.petit@arm.com> BUG= R=djsollen@google.com, reed@google.com, mtklein@google.com, luisjoseromeroesclusa@hotmail.com Author: kevin.petit.arm@gmail.com Review URL: https://codereview.chromium.org/104883004 git-svn-id: http://skia.googlecode.com/svn/trunk@12525 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
726b97a3c4
commit
d611864e67
@ -1669,6 +1669,7 @@ void SkXfermode::Term() {
|
||||
|
||||
extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
|
||||
SkXfermode::Mode mode);
|
||||
extern SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
|
||||
|
||||
SkXfermode* SkXfermode::Create(Mode mode) {
|
||||
SkASSERT(SK_ARRAY_COUNT(gProcCoeffs) == kModeCount);
|
||||
@ -1690,7 +1691,13 @@ SkXfermode* SkXfermode::Create(Mode mode) {
|
||||
|
||||
SkXfermode* xfer = gCachedXfermodes[mode];
|
||||
if (NULL == xfer) {
|
||||
const ProcCoeff& rec = gProcCoeffs[mode];
|
||||
ProcCoeff rec = gProcCoeffs[mode];
|
||||
|
||||
SkXfermodeProc pp = SkPlatformXfermodeProcFactory(mode);
|
||||
|
||||
if (pp != NULL) {
|
||||
rec.fProc = pp;
|
||||
}
|
||||
|
||||
// check if we have a platform optim for that
|
||||
SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode);
|
||||
|
@ -5,12 +5,22 @@
|
||||
extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec,
|
||||
SkXfermode::Mode mode);
|
||||
|
||||
extern SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode);
|
||||
|
||||
SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
|
||||
SkXfermode::Mode mode) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SkXfermodeProc SkPlatformXfermodeProcFactory_impl(SkXfermode::Mode mode) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
|
||||
SkXfermode::Mode mode) {
|
||||
return SK_ARM_NEON_WRAP(SkPlatformXfermodeFactory_impl)(rec, mode);
|
||||
}
|
||||
|
||||
SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
|
||||
return SK_ARM_NEON_WRAP(SkPlatformXfermodeProcFactory_impl)(mode);
|
||||
}
|
||||
|
@ -92,6 +92,133 @@ static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// 1 pixel modeprocs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc]
|
||||
SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst) {
|
||||
unsigned sa = SkGetPackedA32(src);
|
||||
unsigned da = SkGetPackedA32(dst);
|
||||
unsigned isa = 255 - sa;
|
||||
|
||||
uint8x8_t vda, visa, vsrc, vdst;
|
||||
|
||||
vda = vdup_n_u8(da);
|
||||
visa = vdup_n_u8(isa);
|
||||
|
||||
uint16x8_t vsrc_wide, vdst_wide;
|
||||
vsrc_wide = vmull_u8(vda, vreinterpret_u8_u32(vdup_n_u32(src)));
|
||||
vdst_wide = vmull_u8(visa, vreinterpret_u8_u32(vdup_n_u32(dst)));
|
||||
|
||||
vsrc_wide += vdupq_n_u16(128);
|
||||
vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
|
||||
|
||||
vdst_wide += vdupq_n_u16(128);
|
||||
vdst_wide += vshrq_n_u16(vdst_wide, 8);
|
||||
|
||||
vsrc = vshrn_n_u16(vsrc_wide, 8);
|
||||
vdst = vshrn_n_u16(vdst_wide, 8);
|
||||
|
||||
vsrc += vdst;
|
||||
vsrc = vset_lane_u8(da, vsrc, 3);
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
|
||||
}
|
||||
|
||||
// kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)]
|
||||
SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst) {
|
||||
unsigned sa = SkGetPackedA32(src);
|
||||
unsigned da = SkGetPackedA32(dst);
|
||||
unsigned ida = 255 - da;
|
||||
|
||||
uint8x8_t vsa, vida, vsrc, vdst;
|
||||
|
||||
vsa = vdup_n_u8(sa);
|
||||
vida = vdup_n_u8(ida);
|
||||
|
||||
uint16x8_t vsrc_wide, vdst_wide;
|
||||
vsrc_wide = vmull_u8(vida, vreinterpret_u8_u32(vdup_n_u32(src)));
|
||||
vdst_wide = vmull_u8(vsa, vreinterpret_u8_u32(vdup_n_u32(dst)));
|
||||
|
||||
vsrc_wide += vdupq_n_u16(128);
|
||||
vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
|
||||
|
||||
vdst_wide += vdupq_n_u16(128);
|
||||
vdst_wide += vshrq_n_u16(vdst_wide, 8);
|
||||
|
||||
vsrc = vshrn_n_u16(vsrc_wide, 8);
|
||||
vdst = vshrn_n_u16(vdst_wide, 8);
|
||||
|
||||
vsrc += vdst;
|
||||
vsrc = vset_lane_u8(sa, vsrc, 3);
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
|
||||
}
|
||||
|
||||
// kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc]
|
||||
SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst) {
|
||||
unsigned sa = SkGetPackedA32(src);
|
||||
unsigned da = SkGetPackedA32(dst);
|
||||
unsigned ret_alpha = sa + da - (SkAlphaMulAlpha(sa, da) << 1);
|
||||
unsigned isa = 255 - sa;
|
||||
unsigned ida = 255 - da;
|
||||
|
||||
uint8x8_t vsrc, vdst, visa, vida;
|
||||
uint16x8_t vsrc_wide, vdst_wide;
|
||||
|
||||
visa = vdup_n_u8(isa);
|
||||
vida = vdup_n_u8(ida);
|
||||
vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
|
||||
vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
|
||||
|
||||
vsrc_wide = vmull_u8(vsrc, vida);
|
||||
vdst_wide = vmull_u8(vdst, visa);
|
||||
|
||||
vsrc_wide += vdupq_n_u16(128);
|
||||
vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
|
||||
|
||||
vdst_wide += vdupq_n_u16(128);
|
||||
vdst_wide += vshrq_n_u16(vdst_wide, 8);
|
||||
|
||||
vsrc = vshrn_n_u16(vsrc_wide, 8);
|
||||
vdst = vshrn_n_u16(vdst_wide, 8);
|
||||
|
||||
vsrc += vdst;
|
||||
|
||||
vsrc = vset_lane_u8(ret_alpha, vsrc, 3);
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
|
||||
}
|
||||
|
||||
// kPlus_Mode
|
||||
SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst) {
|
||||
uint8x8_t vsrc, vdst;
|
||||
vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
|
||||
vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
|
||||
vsrc = vqadd_u8(vsrc, vdst);
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
|
||||
}
|
||||
|
||||
// kModulate_Mode
|
||||
SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst) {
|
||||
uint8x8_t vsrc, vdst, vres;
|
||||
uint16x8_t vres_wide;
|
||||
|
||||
vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
|
||||
vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
|
||||
|
||||
vres_wide = vmull_u8(vsrc, vdst);
|
||||
|
||||
vres_wide += vdupq_n_u16(128);
|
||||
vres_wide += vshrq_n_u16(vres_wide, 8);
|
||||
|
||||
vres = vshrn_n_u16(vres_wide, 8);
|
||||
|
||||
return vget_lane_u32(vreinterpret_u32_u8(vres), 0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// 8 pixels modeprocs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -755,6 +882,45 @@ SK_COMPILE_ASSERT(
|
||||
mode_count_arm
|
||||
);
|
||||
|
||||
SkXfermodeProc gNEONXfermodeProcs1[] = {
|
||||
NULL, // kClear_Mode
|
||||
NULL, // kSrc_Mode
|
||||
NULL, // kDst_Mode
|
||||
NULL, // kSrcOver_Mode
|
||||
NULL, // kDstOver_Mode
|
||||
NULL, // kSrcIn_Mode
|
||||
NULL, // kDstIn_Mode
|
||||
NULL, // kSrcOut_Mode
|
||||
NULL, // kDstOut_Mode
|
||||
srcatop_modeproc_neon,
|
||||
dstatop_modeproc_neon,
|
||||
xor_modeproc_neon,
|
||||
plus_modeproc_neon,
|
||||
modulate_modeproc_neon,
|
||||
NULL, // kScreen_Mode
|
||||
|
||||
NULL, // kOverlay_Mode
|
||||
NULL, // kDarken_Mode
|
||||
NULL, // kLighten_Mode
|
||||
NULL, // kColorDodge_Mode
|
||||
NULL, // kColorBurn_Mode
|
||||
NULL, // kHardLight_Mode
|
||||
NULL, // kSoftLight_Mode
|
||||
NULL, // kDifference_Mode
|
||||
NULL, // kExclusion_Mode
|
||||
NULL, // kMultiply_Mode
|
||||
|
||||
NULL, // kHue_Mode
|
||||
NULL, // kSaturation_Mode
|
||||
NULL, // kColor_Mode
|
||||
NULL, // kLuminosity_Mode
|
||||
};
|
||||
|
||||
SK_COMPILE_ASSERT(
|
||||
SK_ARRAY_COUNT(gNEONXfermodeProcs1) == SkXfermode::kLastMode + 1,
|
||||
mode1_count_arm
|
||||
);
|
||||
|
||||
SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec,
|
||||
SkXfermode::Mode mode) {
|
||||
|
||||
@ -765,3 +931,7 @@ SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec,
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) {
|
||||
return gNEONXfermodeProcs1[mode];
|
||||
}
|
||||
|
@ -26,4 +26,10 @@ private:
|
||||
typedef SkProcCoeffXfermode INHERITED;
|
||||
};
|
||||
|
||||
extern SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst);
|
||||
extern SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst);
|
||||
extern SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst);
|
||||
extern SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst);
|
||||
extern SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst);
|
||||
|
||||
#endif //#ifdef SkXfermode_opts_arm_neon_DEFINED
|
||||
|
@ -1,11 +1,17 @@
|
||||
#include "SkXfermode.h"
|
||||
#include "SkXfermode_proccoeff.h"
|
||||
|
||||
// The prototype below is for Clang
|
||||
// The prototypes below are for Clang
|
||||
extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
|
||||
SkXfermode::Mode mode);
|
||||
|
||||
extern SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
|
||||
|
||||
SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
|
||||
SkXfermode::Mode mode) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
|
||||
return NULL;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user