From c5091b5b6c4b8a7aef8c12db9ea2a85e907b01c4 Mon Sep 17 00:00:00 2001 From: mtklein Date: Mon, 2 May 2016 11:48:42 -0700 Subject: [PATCH] Add a hook for CPU-optimized sRGB-sRGB srcover. Herb's really starting to get serious about tweaking this, which becomes a lot easier when you've got SkOpts' runtime CPU detection. We should be able to optimize this usefully for SSSE3, SSE4.1, AVX, AVX2, or NEON. (We can of course implement a subset.) This function takes two counts to give us flexibility to write src patterns: nsrc >= ndst -> the usual srcover function nsrc < ndst -> repeat src until it fills dst nsrc << ndst -> possibly preprocess src into registers nsrc == 1 -> equivalent of blitrow_color32, srcover_1, etc. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1939783003 Review-Url: https://codereview.chromium.org/1939783003 --- src/core/SkOpts.cpp | 3 +++ src/core/SkOpts.h | 4 +++ src/opts/SkBlend_opts.h | 58 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 src/opts/SkBlend_opts.h diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index f854033316..54463b250c 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -11,6 +11,7 @@ #include "SkOpts.h" #define SK_OPTS_NS sk_default +#include "SkBlend_opts.h" #include "SkBlitMask_opts.h" #include "SkBlitRow_opts.h" #include "SkBlurImageFilter_opts.h" @@ -73,6 +74,8 @@ namespace SkOpts { decltype(half_to_float) half_to_float = sk_default::half_to_float; decltype(float_to_half) float_to_half = sk_default::float_to_half; + decltype(srcover_srgb_srgb) srcover_srgb_srgb = sk_default::srcover_srgb_srgb; + // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); void Init_sse41(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 04c9f00de3..b8aea4aa34 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -64,6 +64,10 @@ namespace SkOpts { extern void (*half_to_float)(float[], const uint16_t[], int); extern void (*float_to_half)(uint16_t[], const float[], int); + + // Blend ndst src pixels over dst, where both src and dst point to sRGB pixels (RGBA or BGRA). + // If nsrc < ndst, we loop over src to create a pattern. + extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc); } #endif//SkOpts_DEFINED diff --git a/src/opts/SkBlend_opts.h b/src/opts/SkBlend_opts.h new file mode 100644 index 0000000000..a1067407be --- /dev/null +++ b/src/opts/SkBlend_opts.h @@ -0,0 +1,58 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkBlend_opts_DEFINED +#define SkBlend_opts_DEFINED + +namespace SK_OPTS_NS { + +#if 0 + +#else + + static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { + switch (src >> 24) { + case 0x00: return; + case 0xff: *dst = src; return; + } + + Sk4f d = SkNx_cast(Sk4b::Load( dst)), + s = SkNx_cast(Sk4b::Load(&src)); + + // Approximate sRGB gamma as 2.0. + Sk4f d_sq = d*d, + s_sq = s*s; + d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]}; + s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]}; + + // SrcOver. + Sk4f invA = 1.0f - s[3]*(1/255.0f); + d = s + d * invA; + + // Re-apply approximate sRGB gamma. + Sk4f d_sqrt = d.sqrt(); + d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]}; + + SkNx_cast(d).store(dst); + } + + static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { + while (ndst > 0) { + int n = SkTMin(ndst, nsrc); + + for (int i = 0; i < n; i++) { + srcover_srgb_srgb_1(dst++, src[i]); + } + ndst -= n; + } + } + +#endif + +} // namespace SK_OPTS_NS + +#endif//SkBlend_opts_DEFINED