Add a hook for CPU-optimized sRGB-sRGB srcover.

Herb's really starting to get serious about tweaking this, which becomes
a lot easier when you've got SkOpts' runtime CPU detection. We should be
able to optimize this usefully for SSSE3, SSE4.1, AVX, AVX2, or NEON.
(We can of course implement a subset.)

This function takes two counts to give us flexibility to write src patterns:
   nsrc >= ndst -> the usual srcover function
   nsrc <  ndst -> repeat src until it fills dst
   nsrc << ndst -> possibly preprocess src into registers
   nsrc == 1    -> equivalent of blitrow_color32, srcover_1, etc.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1939783003

Review-Url: https://codereview.chromium.org/1939783003
This commit is contained in:
mtklein 2016-05-02 11:48:42 -07:00 committed by Commit bot
parent ffc2aea3cb
commit c5091b5b6c
3 changed files with 65 additions and 0 deletions

View File

@ -11,6 +11,7 @@
#include "SkOpts.h" #include "SkOpts.h"
#define SK_OPTS_NS sk_default #define SK_OPTS_NS sk_default
#include "SkBlend_opts.h"
#include "SkBlitMask_opts.h" #include "SkBlitMask_opts.h"
#include "SkBlitRow_opts.h" #include "SkBlitRow_opts.h"
#include "SkBlurImageFilter_opts.h" #include "SkBlurImageFilter_opts.h"
@ -73,6 +74,8 @@ namespace SkOpts {
decltype(half_to_float) half_to_float = sk_default::half_to_float; decltype(half_to_float) half_to_float = sk_default::half_to_float;
decltype(float_to_half) float_to_half = sk_default::float_to_half; decltype(float_to_half) float_to_half = sk_default::float_to_half;
decltype(srcover_srgb_srgb) srcover_srgb_srgb = sk_default::srcover_srgb_srgb;
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
void Init_ssse3(); void Init_ssse3();
void Init_sse41(); void Init_sse41();

View File

@ -64,6 +64,10 @@ namespace SkOpts {
extern void (*half_to_float)(float[], const uint16_t[], int); extern void (*half_to_float)(float[], const uint16_t[], int);
extern void (*float_to_half)(uint16_t[], const float[], int); extern void (*float_to_half)(uint16_t[], const float[], int);
// Blend ndst src pixels over dst, where both src and dst point to sRGB pixels (RGBA or BGRA).
// If nsrc < ndst, we loop over src to create a pattern.
extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc);
} }
#endif//SkOpts_DEFINED #endif//SkOpts_DEFINED

58
src/opts/SkBlend_opts.h Normal file
View File

@ -0,0 +1,58 @@
/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBlend_opts_DEFINED
#define SkBlend_opts_DEFINED
namespace SK_OPTS_NS {
#if 0
#else
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
switch (src >> 24) {
case 0x00: return;
case 0xff: *dst = src; return;
}
Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)),
s = SkNx_cast<float>(Sk4b::Load(&src));
// Approximate sRGB gamma as 2.0.
Sk4f d_sq = d*d,
s_sq = s*s;
d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]};
s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]};
// SrcOver.
Sk4f invA = 1.0f - s[3]*(1/255.0f);
d = s + d * invA;
// Re-apply approximate sRGB gamma.
Sk4f d_sqrt = d.sqrt();
d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]};
SkNx_cast<uint8_t>(d).store(dst);
}
static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
#endif
} // namespace SK_OPTS_NS
#endif//SkBlend_opts_DEFINED