Reland "Create looping binary-search gradient colorizer."
This is a reland of e2fa96ba4a
Original change's description:
> Create looping binary-search gradient colorizer.
>
> This allows us to dramatically increase the number of gradient stops
> before falling back to sampling from a texture (which smears hardstops
> and shows artifacts in extreme edge cases). The analytic colorizer
> doesn't suffer from these artifacts and blurriness effects.
>
> In nanobench, this change comes at a performance penalty for some tests:
> http://go/paste/6302350793768960
>
> The texture path might have a bit of an unfair advantage here, if the
> gradient texture can just be uploaded once and reused from the cache
> repeatedly. Presumably the setup cost of texture generation and upload
> is fairly expensive, but nanobench is testing just the steady-state
> render performance. In comparison, the analytic colorizer doesn't have
> a large setup cost.
>
> Change-Id: I71baa539a2c7f9e311ef8125de4ede2fdbf0c2d0
> Bug: skia:8401
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/457499
> Auto-Submit: John Stiles <johnstiles@google.com>
> Commit-Queue: Michael Ludwig <michaelludwig@google.com>
> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Bug: skia:8401
Change-Id: I389f79909bc1424909481b06d70db285b55648fe
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/458277
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
This commit is contained in:
parent
9a2adfec2c
commit
c9f160b8dd
@ -10,6 +10,7 @@
|
|||||||
#include "src/gpu/gradients/GrGradientBitmapCache.h"
|
#include "src/gpu/gradients/GrGradientBitmapCache.h"
|
||||||
|
|
||||||
#include "include/gpu/GrRecordingContext.h"
|
#include "include/gpu/GrRecordingContext.h"
|
||||||
|
#include "src/core/SkMathPriv.h"
|
||||||
#include "src/core/SkRuntimeEffectPriv.h"
|
#include "src/core/SkRuntimeEffectPriv.h"
|
||||||
#include "src/gpu/GrCaps.h"
|
#include "src/gpu/GrCaps.h"
|
||||||
#include "src/gpu/GrColor.h"
|
#include "src/gpu/GrColor.h"
|
||||||
@ -128,7 +129,7 @@ static std::unique_ptr<GrFragmentProcessor> make_dual_interval_colorizer(const S
|
|||||||
// This works on ES2 hardware that doesn't support non-constant array indexes.
|
// This works on ES2 hardware that doesn't support non-constant array indexes.
|
||||||
// However, to keep code size under control, we are limited to a small number of stops.
|
// However, to keep code size under control, we are limited to a small number of stops.
|
||||||
static constexpr int kMaxUnrolledColorCount = 16;
|
static constexpr int kMaxUnrolledColorCount = 16;
|
||||||
static constexpr int kMaxUnrolledIntervalCount = 8;
|
static constexpr int kMaxUnrolledIntervalCount = kMaxUnrolledColorCount / 2;
|
||||||
|
|
||||||
static std::unique_ptr<GrFragmentProcessor> make_unrolled_colorizer(int intervalCount,
|
static std::unique_ptr<GrFragmentProcessor> make_unrolled_colorizer(int intervalCount,
|
||||||
const SkPMColor4f* scale,
|
const SkPMColor4f* scale,
|
||||||
@ -238,6 +239,97 @@ static std::unique_ptr<GrFragmentProcessor> make_unrolled_colorizer(int interval
|
|||||||
"bias", SkMakeSpan(bias, intervalCount));
|
"bias", SkMakeSpan(bias, intervalCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The "looping" colorizer uses a real loop to binary-search the array of gradient stops.
|
||||||
|
static constexpr int kMaxLoopingColorCount = 128;
|
||||||
|
static constexpr int kMaxLoopingIntervalCount = kMaxLoopingColorCount / 2;
|
||||||
|
|
||||||
|
static std::unique_ptr<GrFragmentProcessor> make_looping_colorizer(int intervalCount,
|
||||||
|
const SkPMColor4f* scale,
|
||||||
|
const SkPMColor4f* bias,
|
||||||
|
const SkScalar* thresholds) {
|
||||||
|
SkASSERT(intervalCount >= 1 && intervalCount <= kMaxLoopingIntervalCount);
|
||||||
|
SkASSERT((intervalCount & 3) == 0); // intervals are required to come in groups of four
|
||||||
|
int intervalChunks = intervalCount / 4;
|
||||||
|
int cacheIndex = (size_t)intervalChunks - 1;
|
||||||
|
|
||||||
|
struct EffectCacheEntry {
|
||||||
|
SkOnce once;
|
||||||
|
sk_sp<SkRuntimeEffect> effect;
|
||||||
|
};
|
||||||
|
|
||||||
|
static EffectCacheEntry effectCache[kMaxLoopingIntervalCount / 4];
|
||||||
|
SkASSERT(cacheIndex >= 0 && cacheIndex < (int)SK_ARRAY_COUNT(effectCache));
|
||||||
|
EffectCacheEntry* cacheEntry = &effectCache[cacheIndex];
|
||||||
|
|
||||||
|
cacheEntry->once([intervalCount, intervalChunks, cacheEntry] {
|
||||||
|
SkString sksl;
|
||||||
|
|
||||||
|
// Binary search for the interval that `t` falls within. We can precalculate the number of
|
||||||
|
// loop iterations we need, and we know `t` will always be in range, so we can just loop a
|
||||||
|
// fixed number of times and can be guaranteed to have found the proper element.
|
||||||
|
//
|
||||||
|
// Threshold values are stored in half4s to keep them compact, so the last two rounds of
|
||||||
|
// binary search are hand-unrolled to allow them to use swizzles.
|
||||||
|
//
|
||||||
|
// Note that this colorizer is also designed to handle the case of exactly 4 intervals (a
|
||||||
|
// single chunk). In this case, the binary search for-loop will optimize away entirely, as
|
||||||
|
// it can be proven to execute zero times. We also optimize away the calculation of `4 *
|
||||||
|
// chunk` near the end via an @if statement, as the result will always be in chunk 0.
|
||||||
|
int loopCount = SkNextLog2(intervalChunks);
|
||||||
|
sksl.appendf(R"(
|
||||||
|
uniform half4 thresholds[%d];
|
||||||
|
uniform float4 scale[%d];
|
||||||
|
uniform float4 bias[%d];
|
||||||
|
|
||||||
|
half4 main(float2 coord) {
|
||||||
|
half t = half(coord.x);
|
||||||
|
|
||||||
|
// Choose a chunk from thresholds via binary search in a loop.
|
||||||
|
int low = 0;
|
||||||
|
int high = %d;
|
||||||
|
int chunk = %d;
|
||||||
|
for (int loop = 0; loop < %d; ++loop) {
|
||||||
|
if (t < thresholds[chunk].w) {
|
||||||
|
high = chunk;
|
||||||
|
} else {
|
||||||
|
low = chunk + 1;
|
||||||
|
}
|
||||||
|
chunk = (low + high) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Choose the final position via explicit 4-way binary search.
|
||||||
|
int pos;
|
||||||
|
if (t < thresholds[chunk].y) {
|
||||||
|
pos = (t < thresholds[chunk].x) ? 0 : 1;
|
||||||
|
} else {
|
||||||
|
pos = (t < thresholds[chunk].z) ? 2 : 3;
|
||||||
|
}
|
||||||
|
@if (%d > 0) {
|
||||||
|
pos += 4 * chunk;
|
||||||
|
}
|
||||||
|
return t * scale[pos] + bias[pos];
|
||||||
|
}
|
||||||
|
)", /* thresholds: */ intervalChunks,
|
||||||
|
/* scale: */ intervalCount,
|
||||||
|
/* bias: */ intervalCount,
|
||||||
|
/* high: */ intervalChunks - 1,
|
||||||
|
/* chunk: */ (intervalChunks - 1) / 2,
|
||||||
|
/* loopCount: */ loopCount,
|
||||||
|
/* @if (loopCount > 0): */ loopCount);
|
||||||
|
|
||||||
|
auto result = SkRuntimeEffect::MakeForShader(std::move(sksl),
|
||||||
|
SkRuntimeEffectPriv::ES3Options());
|
||||||
|
SkASSERTF(result.effect, "%s", result.errorText.c_str());
|
||||||
|
cacheEntry->effect = std::move(result.effect);
|
||||||
|
});
|
||||||
|
|
||||||
|
return GrSkSLFP::Make(cacheEntry->effect, "LoopingBinaryColorizer",
|
||||||
|
/*inputFP=*/nullptr, GrSkSLFP::OptFlags::kNone,
|
||||||
|
"thresholds", SkMakeSpan((const SkV4*)thresholds, intervalChunks),
|
||||||
|
"scale", SkMakeSpan(scale, intervalCount),
|
||||||
|
"bias", SkMakeSpan(bias, intervalCount));
|
||||||
|
}
|
||||||
|
|
||||||
// Converts an input array of {colors, positions} into an array of {scales, biases, thresholds}.
|
// Converts an input array of {colors, positions} into an array of {scales, biases, thresholds}.
|
||||||
// The length of the result array may differ from the input due to hard-stops or empty intervals.
|
// The length of the result array may differ from the input due to hard-stops or empty intervals.
|
||||||
int build_intervals(int inputLength,
|
int build_intervals(int inputLength,
|
||||||
@ -305,6 +397,37 @@ static std::unique_ptr<GrFragmentProcessor> make_unrolled_binary_colorizer(
|
|||||||
return make_unrolled_colorizer(intervalCount, scales, biases, thresholds1_7, thresholds9_13);
|
return make_unrolled_colorizer(intervalCount, scales, biases, thresholds1_7, thresholds9_13);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::unique_ptr<GrFragmentProcessor> make_looping_binary_colorizer(const SkPMColor4f* colors,
|
||||||
|
const SkScalar* positions,
|
||||||
|
int count) {
|
||||||
|
if (count > kMaxLoopingColorCount) {
|
||||||
|
// Definitely cannot represent this gradient configuration
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
SkPMColor4f scales[kMaxLoopingIntervalCount];
|
||||||
|
SkPMColor4f biases[kMaxLoopingIntervalCount];
|
||||||
|
SkScalar thresholds[kMaxLoopingIntervalCount] = {};
|
||||||
|
int intervalCount = build_intervals(count, colors, positions,
|
||||||
|
kMaxLoopingIntervalCount, scales, biases, thresholds);
|
||||||
|
if (intervalCount <= 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We round up the number of intervals to the next power of two. This reduces the number of
|
||||||
|
// unique shaders and doesn't require any additional GPU processing power, but this does waste a
|
||||||
|
// handful of uniforms.
|
||||||
|
int roundedSize = std::max(4, SkNextPow2(intervalCount));
|
||||||
|
SkASSERT(roundedSize <= kMaxLoopingIntervalCount);
|
||||||
|
for (; intervalCount < roundedSize; ++intervalCount) {
|
||||||
|
thresholds[intervalCount] = thresholds[intervalCount - 1];
|
||||||
|
scales[intervalCount] = scales[intervalCount - 1];
|
||||||
|
biases[intervalCount] = biases[intervalCount - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return make_looping_colorizer(intervalCount, scales, biases, thresholds);
|
||||||
|
}
|
||||||
|
|
||||||
// Analyze the shader's color stops and positions and chooses an appropriate colorizer to represent
|
// Analyze the shader's color stops and positions and chooses an appropriate colorizer to represent
|
||||||
// the gradient.
|
// the gradient.
|
||||||
static std::unique_ptr<GrFragmentProcessor> make_colorizer(const SkPMColor4f* colors,
|
static std::unique_ptr<GrFragmentProcessor> make_colorizer(const SkPMColor4f* colors,
|
||||||
@ -376,15 +499,29 @@ static std::unique_ptr<GrFragmentProcessor> make_colorizer(const SkPMColor4f* co
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Attempt to create an analytic colorizer.
|
if (caps->nonconstantArrayIndexSupport()) {
|
||||||
if ((count <= kMaxUnrolledColorCount) && !intervalsExceedPrecisionLimit()) {
|
// Attempt to create an analytic colorizer that uses a binary-search loop.
|
||||||
std::unique_ptr<GrFragmentProcessor> colorizer = makeDualIntervalColorizer();
|
if ((count <= kMaxLoopingColorCount) && !intervalsExceedPrecisionLimit()) {
|
||||||
if (colorizer) {
|
std::unique_ptr<GrFragmentProcessor> colorizer = makeDualIntervalColorizer();
|
||||||
return colorizer;
|
if (colorizer) {
|
||||||
|
return colorizer;
|
||||||
|
}
|
||||||
|
colorizer = make_looping_binary_colorizer(colors + offset, positions + offset, count);
|
||||||
|
if (colorizer) {
|
||||||
|
return colorizer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
colorizer = make_unrolled_binary_colorizer(colors + offset, positions + offset, count);
|
} else {
|
||||||
if (colorizer) {
|
// Attempt to create an analytic colorizer that conforms to ES2 loop limitations.
|
||||||
return colorizer;
|
if ((count <= kMaxUnrolledColorCount) && !intervalsExceedPrecisionLimit()) {
|
||||||
|
std::unique_ptr<GrFragmentProcessor> colorizer = makeDualIntervalColorizer();
|
||||||
|
if (colorizer) {
|
||||||
|
return colorizer;
|
||||||
|
}
|
||||||
|
colorizer = make_unrolled_binary_colorizer(colors + offset, positions + offset, count);
|
||||||
|
if (colorizer) {
|
||||||
|
return colorizer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user