Evenly space gradient stage.
This seems like an experiment at this point because I don't know how to do this kind of thing on arm. Numbers from Skylake... Before: ./out/Release/nanobench --config srgb \ --match gradient_linear_clamp_3color gradient_linear_clamp_hicolor -q 19:48:13 Timer overhead: 36.7ns ! -> high variance, ? -> moderate variance micros bench 439.92 ? gradient_linear_clamp_3color srgb 2697.60 gradient_linear_clamp_hicolor srgb 437.28 gradient_linear_clamp_3color_4f srgb 2700.50 gradient_linear_clamp_hicolor_4f srgb After: micros bench 382.35 gradient_linear_clamp_3color srgb 593.49 gradient_linear_clamp_hicolor srgb 382.36 gradient_linear_clamp_3color_4f srgb 565.60 gradient_linear_clamp_hicolor_4f srgb Numbers on my Mac Trashcan are about even; there is no speedup or slowdown between master and this change. Change-Id: I04402452e23c0888512362fd1d6d5436cea61719 Reviewed-on: https://skia-review.googlesource.com/15960 Commit-Queue: Herb Derby <herb@google.com> Reviewed-by: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
d95236dab0
commit
892501d09b
@ -96,6 +96,7 @@
|
||||
M(bicubic_n3x) M(bicubic_n1x) M(bicubic_p1x) M(bicubic_p3x) \
|
||||
M(bicubic_n3y) M(bicubic_n1y) M(bicubic_p1y) M(bicubic_p3y) \
|
||||
M(save_xy) M(accumulate) \
|
||||
M(evenly_spaced_linear_gradient) \
|
||||
M(linear_gradient) \
|
||||
M(linear_gradient_2stops) \
|
||||
M(xy_to_polar_unit) \
|
||||
|
@ -5,6 +5,7 @@
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include "Sk4fLinearGradient.h"
|
||||
#include "SkColorSpace_XYZ.h"
|
||||
#include "SkGradientShaderPriv.h"
|
||||
@ -406,51 +407,74 @@ bool SkGradientShaderBase::onAppendStages(SkRasterPipeline* p,
|
||||
|
||||
p->append(SkRasterPipeline::linear_gradient_2stops, f_and_b);
|
||||
} else {
|
||||
|
||||
struct Stop { float t; SkPM4f f, b; };
|
||||
struct Ctx { size_t n; Stop* stops; SkPM4f start; };
|
||||
|
||||
auto* ctx = alloc->make<Ctx>();
|
||||
ctx->start = prepareColor(0);
|
||||
|
||||
// For each stop we calculate a bias B and a scale factor F, such that
|
||||
// for any t between stops n and n+1, the color we want is B[n] + F[n]*t.
|
||||
auto init_stop = [](float t_l, float t_r, SkPM4f c_l, SkPM4f c_r, Stop *stop) {
|
||||
auto F = SkPM4f::From4f((c_r.to4f() - c_l.to4f()) / (t_r - t_l));
|
||||
auto B = SkPM4f::From4f(c_l.to4f() - (F.to4f() * t_l));
|
||||
*stop = {t_l, F, B};
|
||||
};
|
||||
|
||||
if (fOrigPos == nullptr) {
|
||||
// Handle evenly distributed stops.
|
||||
|
||||
float dt = 1.0f / (fColorCount - 1);
|
||||
struct Ctx {
|
||||
size_t stopCount;
|
||||
float* fs[4];
|
||||
float* bs[4];
|
||||
};
|
||||
|
||||
auto* ctx = alloc->make<Ctx>();
|
||||
int stopCount = fColorCount;
|
||||
float gapCount = stopCount - 1;
|
||||
|
||||
// In the evenly distributed case, fColorCount is the number of stops. There are no
|
||||
// dummy entries.
|
||||
auto* stopsArray = alloc->makeArrayDefault<Stop>(fColorCount);
|
||||
// dummy entries. So, there are fColorCount - 1 FBs.
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
||||
float t_l = 0;
|
||||
SkPM4f c_l = ctx->start;
|
||||
// Pad up to 8 in case we hit the AVX2 special case.
|
||||
ctx->fs[i] = alloc->makeArray<float>(std::max(stopCount, 8));
|
||||
ctx->bs[i] = alloc->makeArray<float>(std::max(stopCount, 8));
|
||||
}
|
||||
|
||||
auto add_stop = [&](int stop, SkPM4f Fs, SkPM4f Bs) {
|
||||
(ctx->fs[0])[stop] = Fs.r();
|
||||
(ctx->fs[1])[stop] = Fs.g();
|
||||
(ctx->fs[2])[stop] = Fs.b();
|
||||
(ctx->fs[3])[stop] = Fs.a();
|
||||
(ctx->bs[0])[stop] = Bs.r();
|
||||
(ctx->bs[1])[stop] = Bs.g();
|
||||
(ctx->bs[2])[stop] = Bs.b();
|
||||
(ctx->bs[3])[stop] = Bs.a();
|
||||
};
|
||||
auto init_stop = [&](int stop, SkPM4f c_l, SkPM4f c_r) {
|
||||
auto Fs = SkPM4f::From4f((c_r.to4f() - c_l.to4f()) * gapCount);
|
||||
auto Bs = SkPM4f::From4f(c_l.to4f() - (Fs.to4f() * (stop / gapCount)));
|
||||
add_stop(stop, Fs, Bs);
|
||||
};
|
||||
|
||||
SkPM4f c_l = prepareColor(0);
|
||||
for (int i = 0; i < fColorCount - 1; i++) {
|
||||
// Use multiply instead of accumulating error using repeated addition.
|
||||
float t_r = (i + 1) * dt;
|
||||
SkPM4f c_r = prepareColor(i + 1);
|
||||
init_stop(t_l, t_r, c_l, c_r, &stopsArray[i]);
|
||||
|
||||
t_l = t_r;
|
||||
init_stop(i, c_l, c_r);
|
||||
c_l = c_r;
|
||||
}
|
||||
|
||||
// Force the last stop.
|
||||
stopsArray[fColorCount - 1].t = 1;
|
||||
stopsArray[fColorCount - 1].f = SkPM4f::From4f(Sk4f{0});
|
||||
stopsArray[fColorCount - 1].b = prepareColor(fColorCount - 1);
|
||||
// Add the last stop.
|
||||
add_stop(stopCount - 1, SkPM4f::FromPremulRGBA(0,0,0,0), c_l);
|
||||
|
||||
ctx->n = fColorCount;
|
||||
ctx->stops = stopsArray;
|
||||
ctx->stopCount = stopCount;
|
||||
|
||||
p->append(SkRasterPipeline::evenly_spaced_linear_gradient, ctx);
|
||||
} else {
|
||||
// Handle arbitrary stops.
|
||||
|
||||
struct Stop { float t; SkPM4f f, b; };
|
||||
struct Ctx { size_t n; Stop* stops; SkPM4f start; };
|
||||
|
||||
auto* ctx = alloc->make<Ctx>();
|
||||
ctx->start = prepareColor(0);
|
||||
|
||||
// For each stop we calculate a bias B and a scale factor F, such that
|
||||
// for any t between stops n and n+1, the color we want is B[n] + F[n]*t.
|
||||
auto init_stop = [](float t_l, float t_r, SkPM4f c_l, SkPM4f c_r, Stop *stop) {
|
||||
auto F = SkPM4f::From4f((c_r.to4f() - c_l.to4f()) / (t_r - t_l));
|
||||
auto B = SkPM4f::From4f(c_l.to4f() - (F.to4f() * t_l));
|
||||
*stop = {t_l, F, B};
|
||||
};
|
||||
|
||||
// Remove the dummy stops inserted by SkGradientShaderBase::SkGradientShaderBase
|
||||
// because they are naturally handled by the search method.
|
||||
int firstStop;
|
||||
@ -491,9 +515,8 @@ bool SkGradientShaderBase::onAppendStages(SkRasterPipeline* p,
|
||||
|
||||
ctx->n = stopCount;
|
||||
ctx->stops = stopsArray;
|
||||
p->append(SkRasterPipeline::linear_gradient, ctx);
|
||||
}
|
||||
|
||||
p->append(SkRasterPipeline::linear_gradient, ctx);
|
||||
}
|
||||
|
||||
if (!premulGrad && !this->colorsAreOpaque()) {
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1034,6 +1034,52 @@ STAGE(matrix_perspective) {
|
||||
g = G * rcp(Z);
|
||||
}
|
||||
|
||||
STAGE(evenly_spaced_linear_gradient) {
|
||||
struct Ctx {
|
||||
size_t stopCount;
|
||||
float* fs[4];
|
||||
float* bs[4];
|
||||
};
|
||||
|
||||
auto c = (const Ctx*)ctx;
|
||||
auto t = r;
|
||||
auto i = trunc_(t*(c->stopCount - 1));
|
||||
|
||||
#if defined(JUMPER) && defined(__AVX2__)
|
||||
if (c->stopCount <=8) {
|
||||
auto fr = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[0]), i);
|
||||
auto br = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[0]), i);
|
||||
auto fg = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[1]), i);
|
||||
auto bg = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[1]), i);
|
||||
auto fb = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[2]), i);
|
||||
auto bb = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[2]), i);
|
||||
auto fa = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->fs[3]), i);
|
||||
auto ba = _mm256_permutevar8x32_ps(_mm256_loadu_ps(c->bs[3]), i);
|
||||
r = mad(t, fr, br);
|
||||
g = mad(t, fg, bg);
|
||||
b = mad(t, fb, bb);
|
||||
a = mad(t, fa, ba);
|
||||
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
auto fr = gather(c->fs[0], i);
|
||||
auto br = gather(c->bs[0], i);
|
||||
auto fg = gather(c->fs[1], i);
|
||||
auto bg = gather(c->bs[1], i);
|
||||
auto fb = gather(c->fs[2], i);
|
||||
auto bb = gather(c->bs[2], i);
|
||||
auto fa = gather(c->fs[3], i);
|
||||
auto ba = gather(c->bs[3], i);
|
||||
|
||||
r = mad(t, fr, br);
|
||||
g = mad(t, fg, bg);
|
||||
b = mad(t, fb, bb);
|
||||
a = mad(t, fa, ba);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
STAGE(linear_gradient) {
|
||||
struct Stop { float pos; float f[4], b[4]; };
|
||||
struct Ctx { size_t n; Stop *stops; float start[4]; };
|
||||
|
Loading…
Reference in New Issue
Block a user