Reland "Revert "Interpreter: Support striped inputs for less overhead""

This reverts commit edc42b9971.

Reason for revert: Sigh

Original change's description:
> Revert "Revert "Interpreter: Support striped inputs for less overhead""
> 
> This reverts commit 645fe10313.
> 
> Change-Id: If74a15479f89f49ac33c0b6241bb0db92bc11083
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/221721
> Reviewed-by: Mike Klein <mtklein@google.com>
> Commit-Queue: Brian Osman <brianosman@google.com>

TBR=mtklein@google.com,brianosman@google.com,reed@google.com

Change-Id: I651d56821bbde8b91887aa885bcf2cb202707388
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/221897
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
This commit is contained in:
Brian Osman 2019-06-19 00:19:23 +00:00 committed by Skia Commit-Bot
parent e5f73281f9
commit 2c59b4e9ea
9 changed files with 27 additions and 125 deletions

View File

@ -12,11 +12,10 @@
// Benchmarks the interpreter with a function that has a color-filter style signature
class SkSLInterpreterCFBench : public Benchmark {
public:
SkSLInterpreterCFBench(SkSL::String name, int pixels, bool striped, const char* src)
: fName(SkStringPrintf("sksl_interp_cf_%d_%d_%s", pixels, striped ? 1 : 0, name.c_str()))
SkSLInterpreterCFBench(SkSL::String name, int pixels, const char* src)
: fName(SkStringPrintf("sksl_interp_cf_%d_%s", pixels, name.c_str()))
, fSrc(src)
, fCount(pixels)
, fStriped(striped) {}
, fCount(pixels) {}
protected:
const char* onGetName() override {
@ -45,18 +44,7 @@ protected:
void onDraw(int loops, SkCanvas*) override {
for (int i = 0; i < loops; i++) {
if (fStriped) {
float* args[] = {
fPixels.data() + 0 * fCount,
fPixels.data() + 1 * fCount,
fPixels.data() + 2 * fCount,
fPixels.data() + 3 * fCount,
};
fByteCode->runStriped(fMain, args, 4, fCount, nullptr, 0);
} else {
fByteCode->run(fMain, fPixels.data(), nullptr, fCount, nullptr, 0);
}
fByteCode->run(fMain, fPixels.data(), nullptr, fCount, nullptr, 0);
}
}
@ -67,7 +55,6 @@ private:
const SkSL::ByteCodeFunction* fMain;
int fCount;
bool fStriped;
std::vector<float> fPixels;
typedef Benchmark INHERITED;
@ -75,16 +62,16 @@ private:
///////////////////////////////////////////////////////////////////////////////
const char* kLumaToAlphaSrc = R"(
DEF_BENCH(return new SkSLInterpreterCFBench("lumaToAlpha", 256, R"(
void main(inout float4 color) {
color.a = color.r*0.3 + color.g*0.6 + color.b*0.1;
color.r = 0;
color.g = 0;
color.b = 0;
}
)";
)"));
const char* kHighContrastFilterSrc = R"(
DEF_BENCH(return new SkSLInterpreterCFBench("hcf", 256, R"(
half ucontrast_Stage2;
half hue2rgb_Stage2(half p, half q, half t) {
if (t < 0) t += 1;
@ -142,13 +129,7 @@ const char* kHighContrastFilterSrc = R"(
color.rgb = sqrt(color.rgb);
color.rgb *= color.a;
}
)";
DEF_BENCH(return new SkSLInterpreterCFBench("lumaToAlpha", 256, false, kLumaToAlphaSrc));
DEF_BENCH(return new SkSLInterpreterCFBench("lumaToAlpha", 256, true, kLumaToAlphaSrc));
DEF_BENCH(return new SkSLInterpreterCFBench("hcf", 256, false, kHighContrastFilterSrc));
DEF_BENCH(return new SkSLInterpreterCFBench("hcf", 256, true, kHighContrastFilterSrc));
)"));
class SkSLInterpreterSortBench : public Benchmark {
public:

View File

@ -404,7 +404,13 @@ public:
};
rec.fPipeline->append(SkRasterPipeline::callback, ctx);
} else {
auto ctx = rec.fAlloc->make<SkRasterPipeline_InterpreterCtx>();
struct InterpreterCtx : public SkRasterPipeline_CallbackCtx {
SkSL::ByteCode* byteCode;
SkSL::ByteCodeFunction* main;
const void* inputs;
int ninputs;
};
auto ctx = rec.fAlloc->make<InterpreterCtx>();
ctx->inputs = fInputs->data();
ctx->ninputs = fInputs->size() / 4;
@ -421,8 +427,13 @@ public:
fByteCode = c.toByteCode(*prog);
}
ctx->byteCode = fByteCode.get();
ctx->fn = ctx->byteCode->fFunctions[0].get();
rec.fPipeline->append(SkRasterPipeline::interpreter, ctx);
ctx->main = ctx->byteCode->fFunctions[0].get();
ctx->fn = [](SkRasterPipeline_CallbackCtx* arg, int active_pixels) {
auto ctx = (InterpreterCtx*)arg;
ctx->byteCode->run(ctx->main, ctx->rgba, nullptr, active_pixels,
(float*)ctx->inputs, ctx->ninputs);
};
rec.fPipeline->append(SkRasterPipeline::callback, ctx);
}
return true;
}

View File

@ -33,7 +33,7 @@
*/
#define SK_RASTER_PIPELINE_STAGES(M) \
M(callback) M(interpreter) \
M(callback) \
M(move_src_dst) M(move_dst_src) \
M(clamp_0) M(clamp_1) M(clamp_a) M(clamp_gamut) \
M(unpremul) M(premul) M(premul_dst) \
@ -150,19 +150,6 @@ struct SkRasterPipeline_CallbackCtx {
float* read_from = rgba;
};
namespace SkSL {
struct ByteCode;
struct ByteCodeFunction;
}
struct SkRasterPipeline_InterpreterCtx {
SkSL::ByteCode* byteCode;
SkSL::ByteCodeFunction* fn;
const void* inputs;
int ninputs;
};
struct SkRasterPipeline_GradientCtx {
size_t stopCount;
float* fs[4];

View File

@ -10,7 +10,6 @@
#include "include/core/SkTypes.h"
#include "src/core/SkUtils.h" // unaligned_{load,store}
#include "src/sksl/SkSLByteCode.h"
// Every function in this file should be marked static and inline using SI.
#if defined(__clang__)
@ -2553,27 +2552,6 @@ STAGE(callback, SkRasterPipeline_CallbackCtx* c) {
load4(c->read_from,0, &r,&g,&b,&a);
}
STAGE(interpreter, SkRasterPipeline_InterpreterCtx* c) {
float rr[N];
float gg[N];
float bb[N];
float aa[N];
sk_unaligned_store(rr, r);
sk_unaligned_store(gg, g);
sk_unaligned_store(bb, b);
sk_unaligned_store(aa, a);
float* args[] = { rr, gg, bb, aa };
c->byteCode->runStriped(c->fn, args, SK_ARRAY_COUNT(args), tail ? tail : N,
(const float*)c->inputs, c->ninputs);
r = sk_unaligned_load<F>(rr);
g = sk_unaligned_load<F>(gg);
b = sk_unaligned_load<F>(bb);
a = sk_unaligned_load<F>(aa);
}
STAGE(gauss_a_to_rgba, Ctx::None) {
// x = 1 - x;
// exp(-x * x * 4) - 0.018f;
@ -3852,7 +3830,6 @@ STAGE_PP(swizzle, void* ctx) {
// If a pipeline uses these stages, it'll boot it out of lowp into highp.
#define NOT_IMPLEMENTED(st) static void (*st)(void) = nullptr;
NOT_IMPLEMENTED(callback)
NOT_IMPLEMENTED(interpreter)
NOT_IMPLEMENTED(unbounded_set_rgb)
NOT_IMPLEMENTED(unbounded_uniform_color)
NOT_IMPLEMENTED(unpremul)

View File

@ -1041,57 +1041,6 @@ void ByteCode::run(const ByteCodeFunction* f, float* args, float* outReturn, int
}
}
void ByteCode::runStriped(const ByteCodeFunction* f, float* args[], int nargs, int N,
const float* uniforms, int uniformCount) const {
#ifdef TRACE
disassemble(f);
#endif
Interpreter::VValue stack[128];
// Needs to be the first N non-negative integers, at least as large as VecWidth
static const Interpreter::I32 gLanes = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
SkASSERT(f->fReturnCount == 0);
SkASSERT(nargs == f->fParameterCount);
SkASSERT(uniformCount == (int)fInputSlots.size());
Interpreter::VValue globals[32];
SkASSERT((int)SK_ARRAY_COUNT(globals) >= fGlobalCount);
for (uint8_t slot : fInputSlots) {
globals[slot].fFloat = *uniforms++;
}
while (N) {
int w = std::min(N, Interpreter::VecWidth);
// Copy args into stack
for (int i = 0; i < nargs; ++i) {
memcpy(stack + i, args[i], w * sizeof(float));
}
auto mask = w > gLanes;
innerRun(this, f, stack, nullptr, mask, globals);
// Copy out parameters back
int slot = 0;
for (const auto& p : f->fParameters) {
if (p.fIsOutParameter) {
for (int i = slot; i < slot + p.fSlotCount; ++i) {
memcpy(args[i], stack + i, w * sizeof(float));
}
}
slot += p.fSlotCount;
}
// Step each argument pointer ahead
for (int i = 0; i < nargs; ++i) {
args[i] += w;
}
N -= w;
}
}
} // namespace SkSL
#endif

View File

@ -195,9 +195,6 @@ struct SK_API ByteCode {
*/
void run(const ByteCodeFunction*, float* args, float* outReturn, int N,
const float* uniforms, int uniformCount) const;
void runStriped(const ByteCodeFunction*, float* args[], int nargs, int N,
const float* uniforms, int uniformCount) const;
};
}

View File

@ -41,7 +41,4 @@
#define NORETURN __attribute__((__noreturn__))
#endif
using SKSL_INT = int32_t;
using SKSL_FLOAT = float;
#endif

View File

@ -8,7 +8,7 @@
#ifndef SKSL_STRING
#define SKSL_STRING
#include "src/sksl/SkSLDefines.h"
#include "src/sksl/SkSLUtil.h"
#include <cstring>
#include <stdarg.h>
#include <string>

View File

@ -23,6 +23,9 @@
#endif // SK_SUPPORT_GPU
#endif // SKSL_STANDALONE
using SKSL_INT = int32_t;
using SKSL_FLOAT = float;
class GrShaderCaps;
namespace SkSL {