From 1d8e198d75d57ec8f6dae0c1498df69cd6769cb6 Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Fri, 13 Jan 2017 20:10:09 +0000 Subject: [PATCH] Revert "Optimize SkBlend by using NEON intrinsics" This reverts commit 7adde145d3913cfd67b90bf83a9ea54386a285a7. Reason for revert: may be breaking our Android One test bots. Original change's description: > Optimize SkBlend by using NEON intrinsics > > Use NEON intrinsics to check the alpha channel of the pixels. > > In some case, it's about 14 times faster than the original implementation. > > $ ./bin/droid out/arm64_release/nanobench --samples 300 --nompd --match LinearSrcOver -v > neon_opt.log > $ ./bin/compare neon_opt.log clean.log > LinearSrcOver_yellow_rose.pngVSkOptsDefault 1.8ms -> 24.9ms 13.8x > LinearSrcOver_iconstrip.pngVSkOptsDefault 5.71ms -> 69.8ms 12.2x > LinearSrcOver_plane.pngVSkOptsDefault 1.45ms -> 11ms 7.62x > LinearSrcOver_baby_tux.pngVSkOptsDefault 1.88ms -> 9.96ms 5.29x > LinearSrcOver_mandrill_512.pngVSkOptsDefault 1.41ms -> 4.62ms 3.29x > LinearSrcOver_yellow_rose.pngVSkOptsTrivial 24.9ms -> 24.9ms 1x > LinearSrcOver_yellow_rose.pngVSkOptsNonSimdCore 2.17ms -> 2.18ms 1x > LinearSrcOver_plane.pngVSkOptsTrivial 11.1ms -> 11.1ms 1x > LinearSrcOver_plane.pngVSkOptsNonSimdCore 1.5ms -> 1.5ms 1x > LinearSrcOver_mandrill_512.pngVSkOptsNonSimdCore 2.39ms -> 2.39ms 1x > LinearSrcOver_iconstrip.pngVSkOptsNonSimdCore 6.43ms -> 6.43ms 1x > LinearSrcOver_baby_tux.pngVSkOptsBruteForce 22.3ms -> 22.3ms 1x > LinearSrcOver_yellow_rose.pngVSkOptsBruteForce 45.5ms -> 45.5ms 1x > LinearSrcOver_baby_tux.pngVSkOptsNonSimdCore 2.02ms -> 2.02ms 1x > LinearSrcOver_iconstrip.pngVSkOptsTrivial 69.7ms -> 69.7ms 1x > LinearSrcOver_baby_tux.pngVSkOptsTrivial 9.96ms -> 9.95ms 1x > LinearSrcOver_mandrill_512.pngVSkOptsBruteForce 99.3ms -> 99.2ms 1x > > BUG=skia: > > CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD > > Change-Id: Ia576365578d65b771440da65fdf41f090ccf0541 > Reviewed-on: https://skia-review.googlesource.com/6860 > Reviewed-by: Mike Klein > Commit-Queue: Mike Klein > TBR=mtklein@chromium.org,bsalomon@google.com,joel.liang@arm.com,reviews@skia.org BUG=skia: NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true Change-Id: Ie40eb5a7c27807aaf396429a82a1a2dd328c2b5b Reviewed-on: https://skia-review.googlesource.com/7036 Commit-Queue: Mike Klein Reviewed-by: Mike Klein --- src/opts/SkBlend_opts.h | 85 ----------------------------------------- 1 file changed, 85 deletions(-) diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h index e5c5b85410..1da4c4fb04 100644 --- a/src/opts/SkBlend_opts.h +++ b/src/opts/SkBlend_opts.h @@ -17,8 +17,6 @@ ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 #include -#elif defined(SK_ARM_HAS_NEON) - #include #endif namespace SK_OPTS_NS { @@ -27,8 +25,6 @@ static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { if (src >= 0xFF000000) { *dst = src; return; - } else if (src <= 0x00FFFFFF) { - return; } auto d = Sk4f_fromS32(*dst), s = Sk4f_fromS32( src); @@ -170,87 +166,6 @@ static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { } } #endif -#elif defined(SK_ARM_HAS_NEON) - static inline uint32x4_t load(const uint32_t* p) { - return vld1q_u32(p); - } - - static inline void store(uint32_t* p, uint32x4_t v) { - vst1q_u32(p, v); - } - - static inline bool check_opaque_alphas(uint32x4_t pixels) { - uint64_t mask = - vget_lane_u64( - vreinterpret_u64_u16( - vmovn_u32( - vcltq_u32(pixels, vdupq_n_u32(0xFF000000)))), - 0); - return mask == 0; - } - - static inline bool check_transparent_alphas(uint32x4_t pixels) { - uint64_t mask = - vget_lane_u64( - vreinterpret_u64_u16( - vmovn_u32( - vcgtq_u32(pixels, vdupq_n_u32(0x00FFFFFF)))), - 0); - return mask == 0; - } - - static inline bool check_partial_alphas(uint32x4_t pixels) { - uint32x4_t opaque = vcltq_u32(pixels, vdupq_n_u32(0xFF000000)); - uint32x4_t transparent = vcgtq_u32(pixels, vdupq_n_u32(0x00FFFFFF)); - uint64_t mask = - vget_lane_u64( - vreinterpret_u64_u16( - vmovn_u32( - veorq_u32(opaque, transparent))), - 0); - return mask == 0; - } - - static void srcover_srgb_srgb( - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { - while (ndst > 0) { - int count = SkTMin(ndst, nsrc); - ndst -= count; - const uint32_t* src = srcStart; - const uint32_t* end = dst + (count & ~3); - const ptrdiff_t delta = src - dst; - - uint32x4_t pixels = load(src); - do { - if (check_opaque_alphas(pixels)) { - uint32_t* start = dst; - do { - store(dst, pixels); - dst += 4; - } while (dst < end && check_opaque_alphas((pixels = load(dst + delta)))); - src += dst - start; - } else if (check_transparent_alphas(pixels)) { - const uint32_t* start = dst; - do { - dst += 4; - } while (dst < end && check_transparent_alphas(pixels = load(dst + delta))); - src += dst - start; - } else { - const uint32_t* start = dst; - do { - srcover_srgb_srgb_4(dst, dst + delta); - dst += 4; - } while (dst < end && check_partial_alphas(pixels = load(dst + delta))); - src += dst - start; - } - } while (dst < end); - - count = count & 3; - while (count-- > 0) { - srcover_srgb_srgb_1(dst++, *src++); - } - } - } #else static void srcover_srgb_srgb(