Add a hook for CPU-optimized sRGB-sRGB srcover.
Herb's really starting to get serious about tweaking this, which becomes a lot easier when you've got SkOpts' runtime CPU detection. We should be able to optimize this usefully for SSSE3, SSE4.1, AVX, AVX2, or NEON. (We can of course implement a subset.) This function takes two counts to give us flexibility to write src patterns: nsrc >= ndst -> the usual srcover function nsrc < ndst -> repeat src until it fills dst nsrc << ndst -> possibly preprocess src into registers nsrc == 1 -> equivalent of blitrow_color32, srcover_1, etc. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1939783003 Review-Url: https://codereview.chromium.org/1939783003
This commit is contained in:
parent
ffc2aea3cb
commit
c5091b5b6c
@ -11,6 +11,7 @@
|
||||
#include "SkOpts.h"
|
||||
|
||||
#define SK_OPTS_NS sk_default
|
||||
#include "SkBlend_opts.h"
|
||||
#include "SkBlitMask_opts.h"
|
||||
#include "SkBlitRow_opts.h"
|
||||
#include "SkBlurImageFilter_opts.h"
|
||||
@ -73,6 +74,8 @@ namespace SkOpts {
|
||||
decltype(half_to_float) half_to_float = sk_default::half_to_float;
|
||||
decltype(float_to_half) float_to_half = sk_default::float_to_half;
|
||||
|
||||
decltype(srcover_srgb_srgb) srcover_srgb_srgb = sk_default::srcover_srgb_srgb;
|
||||
|
||||
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
|
||||
void Init_ssse3();
|
||||
void Init_sse41();
|
||||
|
@ -64,6 +64,10 @@ namespace SkOpts {
|
||||
|
||||
extern void (*half_to_float)(float[], const uint16_t[], int);
|
||||
extern void (*float_to_half)(uint16_t[], const float[], int);
|
||||
|
||||
// Blend ndst src pixels over dst, where both src and dst point to sRGB pixels (RGBA or BGRA).
|
||||
// If nsrc < ndst, we loop over src to create a pattern.
|
||||
extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc);
|
||||
}
|
||||
|
||||
#endif//SkOpts_DEFINED
|
||||
|
58
src/opts/SkBlend_opts.h
Normal file
58
src/opts/SkBlend_opts.h
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2016 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkBlend_opts_DEFINED
|
||||
#define SkBlend_opts_DEFINED
|
||||
|
||||
namespace SK_OPTS_NS {
|
||||
|
||||
#if 0
|
||||
|
||||
#else
|
||||
|
||||
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
|
||||
switch (src >> 24) {
|
||||
case 0x00: return;
|
||||
case 0xff: *dst = src; return;
|
||||
}
|
||||
|
||||
Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)),
|
||||
s = SkNx_cast<float>(Sk4b::Load(&src));
|
||||
|
||||
// Approximate sRGB gamma as 2.0.
|
||||
Sk4f d_sq = d*d,
|
||||
s_sq = s*s;
|
||||
d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]};
|
||||
s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]};
|
||||
|
||||
// SrcOver.
|
||||
Sk4f invA = 1.0f - s[3]*(1/255.0f);
|
||||
d = s + d * invA;
|
||||
|
||||
// Re-apply approximate sRGB gamma.
|
||||
Sk4f d_sqrt = d.sqrt();
|
||||
d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]};
|
||||
|
||||
SkNx_cast<uint8_t>(d).store(dst);
|
||||
}
|
||||
|
||||
static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
|
||||
while (ndst > 0) {
|
||||
int n = SkTMin(ndst, nsrc);
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
srcover_srgb_srgb_1(dst++, src[i]);
|
||||
}
|
||||
ndst -= n;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace SK_OPTS_NS
|
||||
|
||||
#endif//SkBlend_opts_DEFINED
|
Loading…
Reference in New Issue
Block a user