sk_linear_from_srgb_math
Looks great (imperceptibly different) but ~10% slower on both ARMv8 and x86-64. Probably need to hide the table-or-math logic behind Sk4f/Sk8f unless we find faster math. I do like the new look of the pipeline stages though. A lot clearer. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2880 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Change-Id: I44952237d56ba167445b07d4830eb8959c4d47b7 Reviewed-on: https://skia-review.googlesource.com/2880 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Matt Sarett <msarett@google.com>
This commit is contained in:
parent
ab87d7abf1
commit
418aa61fea
@ -62,4 +62,23 @@ static inline Sk4i sk_linear_to_srgb_noclamp(const Sk4f& x) {
|
||||
return SkNx_cast<int>(f);
|
||||
}
|
||||
|
||||
// sRGB -> linear, using math instead of table lookups, scaling better to larger SIMD vectors.
|
||||
static inline Sk4f sk_linear_from_srgb_math(const Sk4i& s) {
|
||||
auto x = SkNx_cast<float>(s);
|
||||
|
||||
const float u = 1/255.0f; // x is [0,255], so x^n needs scaling by u^n.
|
||||
|
||||
// Non-linear segment of sRGB curve approximated by
|
||||
// l = 0.0025 + 0.6975x^2 + 0.3x^3
|
||||
const float k0 = 0.0025f,
|
||||
k2 = 0.6975f * u*u,
|
||||
k3 = 0.3000f * u*u*u;
|
||||
auto hi = k0 + (k2 + k3*x) * (x*x);
|
||||
|
||||
// Linear segment of sRGB curve: the normal slope, extended a little further than normal.
|
||||
auto lo = x * (u/12.92f);
|
||||
|
||||
return (x < 14.025f).thenElse(lo, hi);
|
||||
}
|
||||
|
||||
#endif//SkSRGB_DEFINED
|
||||
|
@ -244,79 +244,21 @@ namespace SK_OPTS_NS {
|
||||
KERNEL_Sk4f(load_d_srgb) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
if (tail) {
|
||||
float rs[] = {0,0,0,0},
|
||||
gs[] = {0,0,0,0},
|
||||
bs[] = {0,0,0,0},
|
||||
as[] = {0,0,0,0};
|
||||
for (size_t i = 0; i < tail; i++) {
|
||||
rs[i] = sk_linear_from_srgb[(ptr[i] >> SK_R32_SHIFT) & 0xff];
|
||||
gs[i] = sk_linear_from_srgb[(ptr[i] >> SK_G32_SHIFT) & 0xff];
|
||||
bs[i] = sk_linear_from_srgb[(ptr[i] >> SK_B32_SHIFT) & 0xff];
|
||||
as[i] = (1/255.0f) * (ptr[i] >> SK_A32_SHIFT) ;
|
||||
}
|
||||
dr = Sk4f::Load(rs);
|
||||
dg = Sk4f::Load(gs);
|
||||
db = Sk4f::Load(bs);
|
||||
da = Sk4f::Load(as);
|
||||
return;
|
||||
}
|
||||
|
||||
dr = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[3] >> SK_R32_SHIFT) & 0xff] };
|
||||
|
||||
dg = { sk_linear_from_srgb[(ptr[0] >> SK_G32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> SK_G32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> SK_G32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[3] >> SK_G32_SHIFT) & 0xff] };
|
||||
|
||||
db = { sk_linear_from_srgb[(ptr[0] >> SK_B32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> SK_B32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> SK_B32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[3] >> SK_B32_SHIFT) & 0xff] };
|
||||
|
||||
da = SkNx_cast<float>(Sk4u::Load(ptr) >> SK_A32_SHIFT) * (1/255.0f);
|
||||
auto px = load_tail(tail, (const int*)ptr);
|
||||
dr = sk_linear_from_srgb_math((px >> SK_R32_SHIFT) & 0xff);
|
||||
dg = sk_linear_from_srgb_math((px >> SK_G32_SHIFT) & 0xff);
|
||||
db = sk_linear_from_srgb_math((px >> SK_B32_SHIFT) & 0xff);
|
||||
da = (1/255.0f)*SkNx_cast<float>((px >> SK_A32_SHIFT) & 0xff);
|
||||
}
|
||||
|
||||
KERNEL_Sk4f(load_s_srgb) {
|
||||
auto ptr = (const uint32_t*)ctx + x;
|
||||
|
||||
if (tail) {
|
||||
float rs[] = {0,0,0,0},
|
||||
gs[] = {0,0,0,0},
|
||||
bs[] = {0,0,0,0},
|
||||
as[] = {0,0,0,0};
|
||||
for (size_t i = 0; i < tail; i++) {
|
||||
rs[i] = sk_linear_from_srgb[(ptr[i] >> SK_R32_SHIFT) & 0xff];
|
||||
gs[i] = sk_linear_from_srgb[(ptr[i] >> SK_G32_SHIFT) & 0xff];
|
||||
bs[i] = sk_linear_from_srgb[(ptr[i] >> SK_B32_SHIFT) & 0xff];
|
||||
as[i] = (1/255.0f) * (ptr[i] >> SK_A32_SHIFT) ;
|
||||
}
|
||||
r = Sk4f::Load(rs);
|
||||
g = Sk4f::Load(gs);
|
||||
b = Sk4f::Load(bs);
|
||||
a = Sk4f::Load(as);
|
||||
return;
|
||||
}
|
||||
|
||||
r = { sk_linear_from_srgb[(ptr[0] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> SK_R32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[3] >> SK_R32_SHIFT) & 0xff] };
|
||||
|
||||
g = { sk_linear_from_srgb[(ptr[0] >> SK_G32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> SK_G32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> SK_G32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[3] >> SK_G32_SHIFT) & 0xff] };
|
||||
|
||||
b = { sk_linear_from_srgb[(ptr[0] >> SK_B32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[1] >> SK_B32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[2] >> SK_B32_SHIFT) & 0xff],
|
||||
sk_linear_from_srgb[(ptr[3] >> SK_B32_SHIFT) & 0xff] };
|
||||
|
||||
a = SkNx_cast<float>(Sk4u::Load(ptr) >> SK_A32_SHIFT) * (1/255.0f);
|
||||
auto px = load_tail(tail, (const int*)ptr);
|
||||
r = sk_linear_from_srgb_math((px >> SK_R32_SHIFT) & 0xff);
|
||||
g = sk_linear_from_srgb_math((px >> SK_G32_SHIFT) & 0xff);
|
||||
b = sk_linear_from_srgb_math((px >> SK_B32_SHIFT) & 0xff);
|
||||
a = (1/255.0f)*SkNx_cast<float>((px >> SK_A32_SHIFT) & 0xff);
|
||||
}
|
||||
|
||||
KERNEL_Sk4f(store_srgb) {
|
||||
|
Loading…
Reference in New Issue
Block a user