From 2eff71c9b5f984b58961e5a6b4e66774c4385224 Mon Sep 17 00:00:00 2001 From: msarett Date: Tue, 2 Feb 2016 12:59:45 -0800 Subject: [PATCH] NEON optimizations for gray -> RGBA (or BGRA) conversions Swizzle Bench Runtime Nexus 6P 0.32x Nexus 9 0.89x PNG Decode Time (for test set of gray encoded PNGs) Nexus 6P 0.88x Nexus 9 0.91x BUG=skia:4767 GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1656383002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review URL: https://codereview.chromium.org/1656383002 --- bench/SwizzleBench.cpp | 1 + src/codec/SkSwizzler.cpp | 14 +++++++++ src/core/SkOpts.cpp | 1 + src/core/SkOpts.h | 3 +- src/opts/SkOpts_neon.cpp | 1 + src/opts/SkOpts_ssse3.cpp | 1 + src/opts/SkSwizzler_opts.h | 59 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 79 insertions(+), 1 deletion(-) diff --git a/bench/SwizzleBench.cpp b/bench/SwizzleBench.cpp index c78f2c977d..0f85b59481 100644 --- a/bench/SwizzleBench.cpp +++ b/bench/SwizzleBench.cpp @@ -32,3 +32,4 @@ DEF_BENCH(return new SwizzleBench("SkOpts::RGBA_to_bgrA", SkOpts::RGBA_to_bgrA)) DEF_BENCH(return new SwizzleBench("SkOpts::RGBA_to_BGRA", SkOpts::RGBA_to_BGRA)); DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_RGB1", SkOpts::RGB_to_RGB1)); DEF_BENCH(return new SwizzleBench("SkOpts::RGB_to_BGR1", SkOpts::RGB_to_BGR1)); +DEF_BENCH(return new SwizzleBench("SkOpts::gray_to_RGB1", SkOpts::gray_to_RGB1)); diff --git a/src/codec/SkSwizzler.cpp b/src/codec/SkSwizzler.cpp index 7865184cce..fa93a6e928 100644 --- a/src/codec/SkSwizzler.cpp +++ b/src/codec/SkSwizzler.cpp @@ -270,6 +270,19 @@ static void swizzle_gray_to_n32( } } +static void fast_swizzle_gray_to_n32( + void* dst, const uint8_t* src, int width, int bpp, int deltaSrc, int offset, + const SkPMColor ctable[]) { + + // This function must not be called if we are sampling. If we are not + // sampling, deltaSrc should equal bpp. + SkASSERT(deltaSrc == bpp); + + // Note that there is no need to distinguish between RGB and BGR. + // Each color channel will get the same value. + SkOpts::gray_to_RGB1((uint32_t*) dst, src + offset, width); +} + static void swizzle_gray_to_565( void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth, int bytesPerPixel, int deltaSrc, int offset, const SkPMColor ctable[]) { @@ -639,6 +652,7 @@ SkSwizzler* SkSwizzler::CreateSwizzler(SkSwizzler::SrcConfig sc, switch (dstInfo.colorType()) { case kN32_SkColorType: proc = &swizzle_gray_to_n32; + fastProc = &fast_swizzle_gray_to_n32; break; case kGray_8_SkColorType: proc = &sample1; diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 669401b417..bce6ee1d57 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -84,6 +84,7 @@ namespace SkOpts { decltype(RGBA_to_bgrA) RGBA_to_bgrA = sk_default::RGBA_to_bgrA; decltype(RGB_to_RGB1) RGB_to_RGB1 = sk_default::RGB_to_RGB1; decltype(RGB_to_BGR1) RGB_to_BGR1 = sk_default::RGB_to_BGR1; + decltype(gray_to_RGB1) gray_to_RGB1 = sk_default::gray_to_RGB1; // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 41ad8ebfe8..b5286e4ec9 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -61,7 +61,8 @@ namespace SkOpts { RGBA_to_rgbA, // i.e. just premultiply RGBA_to_bgrA, // i.e. swap RB and premultiply RGB_to_RGB1, // i.e. insert an opaque alpha - RGB_to_BGR1; // i.e. swap RB and insert an opaque alpha + RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha + gray_to_RGB1; // i.e. set color channels to same value + an opaque alpha } #endif//SkOpts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index dcb057e1fe..79d3140af5 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -52,5 +52,6 @@ namespace SkOpts { RGBA_to_bgrA = sk_neon::RGBA_to_bgrA; RGB_to_RGB1 = sk_neon::RGB_to_RGB1; RGB_to_BGR1 = sk_neon::RGB_to_BGR1; + gray_to_RGB1 = sk_neon::gray_to_RGB1; } } diff --git a/src/opts/SkOpts_ssse3.cpp b/src/opts/SkOpts_ssse3.cpp index 23fdffbffa..22eda588de 100644 --- a/src/opts/SkOpts_ssse3.cpp +++ b/src/opts/SkOpts_ssse3.cpp @@ -23,5 +23,6 @@ namespace SkOpts { RGBA_to_bgrA = sk_ssse3::RGBA_to_bgrA; RGB_to_RGB1 = sk_ssse3::RGB_to_RGB1; RGB_to_BGR1 = sk_ssse3::RGB_to_BGR1; + gray_to_RGB1 = sk_ssse3::gray_to_RGB1; } } diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h index 14960f3b8f..612700e837 100644 --- a/src/opts/SkSwizzler_opts.h +++ b/src/opts/SkSwizzler_opts.h @@ -88,6 +88,16 @@ static void RGB_to_BGR1_portable(uint32_t dst[], const void* vsrc, int count) { } } +static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { + const uint8_t* src = (const uint8_t*)vsrc; + for (int i = 0; i < count; i++) { + dst[i] = (uint32_t)0xFF << 24 + | (uint32_t)src[i] << 16 + | (uint32_t)src[i] << 8 + | (uint32_t)src[i] << 0; + } +} + #if defined(SK_ARM_HAS_NEON) // Rounded divide by 255, (x + 127) / 255 @@ -260,6 +270,47 @@ static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { insert_alpha_should_swaprb(dst, src, count); } +static void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) { + const uint8_t* src = (const uint8_t*) vsrc; + while (count >= 16) { + // Load 16 pixels. + uint8x16_t gray = vld1q_u8(src); + + // Set each of the color channels. + uint8x16x4_t rgba; + rgba.val[0] = gray; + rgba.val[1] = gray; + rgba.val[2] = gray; + rgba.val[3] = vdupq_n_u8(0xFF); + + // Store 16 pixels. + vst4q_u8((uint8_t*) dst, rgba); + src += 16; + dst += 16; + count -= 16; + } + + if (count >= 8) { + // Load 8 pixels. + uint8x8_t gray = vld1_u8(src); + + // Set each of the color channels. + uint8x8x4_t rgba; + rgba.val[0] = gray; + rgba.val[1] = gray; + rgba.val[2] = gray; + rgba.val[3] = vdup_n_u8(0xFF); + + // Store 8 pixels. + vst4_u8((uint8_t*) dst, rgba); + src += 8; + dst += 8; + count -= 8; + } + + gray_to_RGB1_portable(dst, src, count); +} + #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 template @@ -401,6 +452,10 @@ static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { insert_alpha_should_swaprb(dst, src, count); } +static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { + gray_to_RGB1_portable(dst, src, count); +} + #else static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { @@ -423,6 +478,10 @@ static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { RGB_to_BGR1_portable(dst, src, count); } +static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { + gray_to_RGB1_portable(dst, src, count); +} + #endif }