From 2c59b4e9ea856231e6c75608b66f202d16201679 Mon Sep 17 00:00:00 2001 From: Brian Osman Date: Wed, 19 Jun 2019 00:19:23 +0000 Subject: [PATCH] Reland "Revert "Interpreter: Support striped inputs for less overhead"" This reverts commit edc42b9971dcdc1dd7eda4fb2e104a7782d3289f. Reason for revert: Sigh Original change's description: > Revert "Revert "Interpreter: Support striped inputs for less overhead"" > > This reverts commit 645fe1031382e9de3aaf152e673ca08703cdc80a. > > Change-Id: If74a15479f89f49ac33c0b6241bb0db92bc11083 > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/221721 > Reviewed-by: Mike Klein > Commit-Queue: Brian Osman TBR=mtklein@google.com,brianosman@google.com,reed@google.com Change-Id: I651d56821bbde8b91887aa885bcf2cb202707388 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://skia-review.googlesource.com/c/skia/+/221897 Reviewed-by: Brian Osman Commit-Queue: Brian Osman --- bench/SkSLInterpreterBench.cpp | 35 +++++----------------- src/core/SkColorFilter.cpp | 17 +++++++++-- src/core/SkRasterPipeline.h | 15 +--------- src/opts/SkRasterPipeline_opts.h | 23 -------------- src/sksl/SkSLByteCode.cpp | 51 -------------------------------- src/sksl/SkSLByteCode.h | 3 -- src/sksl/SkSLDefines.h | 3 -- src/sksl/SkSLString.h | 2 +- src/sksl/SkSLUtil.h | 3 ++ 9 files changed, 27 insertions(+), 125 deletions(-) diff --git a/bench/SkSLInterpreterBench.cpp b/bench/SkSLInterpreterBench.cpp index 4146f8dee5..646d59f9b3 100644 --- a/bench/SkSLInterpreterBench.cpp +++ b/bench/SkSLInterpreterBench.cpp @@ -12,11 +12,10 @@ // Benchmarks the interpreter with a function that has a color-filter style signature class SkSLInterpreterCFBench : public Benchmark { public: - SkSLInterpreterCFBench(SkSL::String name, int pixels, bool striped, const char* src) - : fName(SkStringPrintf("sksl_interp_cf_%d_%d_%s", pixels, striped ? 1 : 0, name.c_str())) + SkSLInterpreterCFBench(SkSL::String name, int pixels, const char* src) + : fName(SkStringPrintf("sksl_interp_cf_%d_%s", pixels, name.c_str())) , fSrc(src) - , fCount(pixels) - , fStriped(striped) {} + , fCount(pixels) {} protected: const char* onGetName() override { @@ -45,18 +44,7 @@ protected: void onDraw(int loops, SkCanvas*) override { for (int i = 0; i < loops; i++) { - if (fStriped) { - float* args[] = { - fPixels.data() + 0 * fCount, - fPixels.data() + 1 * fCount, - fPixels.data() + 2 * fCount, - fPixels.data() + 3 * fCount, - }; - - fByteCode->runStriped(fMain, args, 4, fCount, nullptr, 0); - } else { - fByteCode->run(fMain, fPixels.data(), nullptr, fCount, nullptr, 0); - } + fByteCode->run(fMain, fPixels.data(), nullptr, fCount, nullptr, 0); } } @@ -67,7 +55,6 @@ private: const SkSL::ByteCodeFunction* fMain; int fCount; - bool fStriped; std::vector fPixels; typedef Benchmark INHERITED; @@ -75,16 +62,16 @@ private: /////////////////////////////////////////////////////////////////////////////// -const char* kLumaToAlphaSrc = R"( +DEF_BENCH(return new SkSLInterpreterCFBench("lumaToAlpha", 256, R"( void main(inout float4 color) { color.a = color.r*0.3 + color.g*0.6 + color.b*0.1; color.r = 0; color.g = 0; color.b = 0; } -)"; +)")); -const char* kHighContrastFilterSrc = R"( +DEF_BENCH(return new SkSLInterpreterCFBench("hcf", 256, R"( half ucontrast_Stage2; half hue2rgb_Stage2(half p, half q, half t) { if (t < 0) t += 1; @@ -142,13 +129,7 @@ const char* kHighContrastFilterSrc = R"( color.rgb = sqrt(color.rgb); color.rgb *= color.a; } -)"; - -DEF_BENCH(return new SkSLInterpreterCFBench("lumaToAlpha", 256, false, kLumaToAlphaSrc)); -DEF_BENCH(return new SkSLInterpreterCFBench("lumaToAlpha", 256, true, kLumaToAlphaSrc)); - -DEF_BENCH(return new SkSLInterpreterCFBench("hcf", 256, false, kHighContrastFilterSrc)); -DEF_BENCH(return new SkSLInterpreterCFBench("hcf", 256, true, kHighContrastFilterSrc)); +)")); class SkSLInterpreterSortBench : public Benchmark { public: diff --git a/src/core/SkColorFilter.cpp b/src/core/SkColorFilter.cpp index a59df873e8..6874cb5823 100644 --- a/src/core/SkColorFilter.cpp +++ b/src/core/SkColorFilter.cpp @@ -404,7 +404,13 @@ public: }; rec.fPipeline->append(SkRasterPipeline::callback, ctx); } else { - auto ctx = rec.fAlloc->make(); + struct InterpreterCtx : public SkRasterPipeline_CallbackCtx { + SkSL::ByteCode* byteCode; + SkSL::ByteCodeFunction* main; + const void* inputs; + int ninputs; + }; + auto ctx = rec.fAlloc->make(); ctx->inputs = fInputs->data(); ctx->ninputs = fInputs->size() / 4; @@ -421,8 +427,13 @@ public: fByteCode = c.toByteCode(*prog); } ctx->byteCode = fByteCode.get(); - ctx->fn = ctx->byteCode->fFunctions[0].get(); - rec.fPipeline->append(SkRasterPipeline::interpreter, ctx); + ctx->main = ctx->byteCode->fFunctions[0].get(); + ctx->fn = [](SkRasterPipeline_CallbackCtx* arg, int active_pixels) { + auto ctx = (InterpreterCtx*)arg; + ctx->byteCode->run(ctx->main, ctx->rgba, nullptr, active_pixels, + (float*)ctx->inputs, ctx->ninputs); + }; + rec.fPipeline->append(SkRasterPipeline::callback, ctx); } return true; } diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h index c40033129f..490acd262d 100644 --- a/src/core/SkRasterPipeline.h +++ b/src/core/SkRasterPipeline.h @@ -33,7 +33,7 @@ */ #define SK_RASTER_PIPELINE_STAGES(M) \ - M(callback) M(interpreter) \ + M(callback) \ M(move_src_dst) M(move_dst_src) \ M(clamp_0) M(clamp_1) M(clamp_a) M(clamp_gamut) \ M(unpremul) M(premul) M(premul_dst) \ @@ -150,19 +150,6 @@ struct SkRasterPipeline_CallbackCtx { float* read_from = rgba; }; -namespace SkSL { -struct ByteCode; -struct ByteCodeFunction; -} - -struct SkRasterPipeline_InterpreterCtx { - SkSL::ByteCode* byteCode; - SkSL::ByteCodeFunction* fn; - - const void* inputs; - int ninputs; -}; - struct SkRasterPipeline_GradientCtx { size_t stopCount; float* fs[4]; diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index a214ae017f..6689066e73 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -10,7 +10,6 @@ #include "include/core/SkTypes.h" #include "src/core/SkUtils.h" // unaligned_{load,store} -#include "src/sksl/SkSLByteCode.h" // Every function in this file should be marked static and inline using SI. #if defined(__clang__) @@ -2553,27 +2552,6 @@ STAGE(callback, SkRasterPipeline_CallbackCtx* c) { load4(c->read_from,0, &r,&g,&b,&a); } -STAGE(interpreter, SkRasterPipeline_InterpreterCtx* c) { - float rr[N]; - float gg[N]; - float bb[N]; - float aa[N]; - - sk_unaligned_store(rr, r); - sk_unaligned_store(gg, g); - sk_unaligned_store(bb, b); - sk_unaligned_store(aa, a); - - float* args[] = { rr, gg, bb, aa }; - c->byteCode->runStriped(c->fn, args, SK_ARRAY_COUNT(args), tail ? tail : N, - (const float*)c->inputs, c->ninputs); - - r = sk_unaligned_load(rr); - g = sk_unaligned_load(gg); - b = sk_unaligned_load(bb); - a = sk_unaligned_load(aa); -} - STAGE(gauss_a_to_rgba, Ctx::None) { // x = 1 - x; // exp(-x * x * 4) - 0.018f; @@ -3852,7 +3830,6 @@ STAGE_PP(swizzle, void* ctx) { // If a pipeline uses these stages, it'll boot it out of lowp into highp. #define NOT_IMPLEMENTED(st) static void (*st)(void) = nullptr; NOT_IMPLEMENTED(callback) - NOT_IMPLEMENTED(interpreter) NOT_IMPLEMENTED(unbounded_set_rgb) NOT_IMPLEMENTED(unbounded_uniform_color) NOT_IMPLEMENTED(unpremul) diff --git a/src/sksl/SkSLByteCode.cpp b/src/sksl/SkSLByteCode.cpp index 23730c0ec5..8b9fc3e48b 100644 --- a/src/sksl/SkSLByteCode.cpp +++ b/src/sksl/SkSLByteCode.cpp @@ -1041,57 +1041,6 @@ void ByteCode::run(const ByteCodeFunction* f, float* args, float* outReturn, int } } -void ByteCode::runStriped(const ByteCodeFunction* f, float* args[], int nargs, int N, - const float* uniforms, int uniformCount) const { -#ifdef TRACE - disassemble(f); -#endif - Interpreter::VValue stack[128]; - - // Needs to be the first N non-negative integers, at least as large as VecWidth - static const Interpreter::I32 gLanes = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - }; - - SkASSERT(f->fReturnCount == 0); - SkASSERT(nargs == f->fParameterCount); - SkASSERT(uniformCount == (int)fInputSlots.size()); - Interpreter::VValue globals[32]; - SkASSERT((int)SK_ARRAY_COUNT(globals) >= fGlobalCount); - for (uint8_t slot : fInputSlots) { - globals[slot].fFloat = *uniforms++; - } - - while (N) { - int w = std::min(N, Interpreter::VecWidth); - - // Copy args into stack - for (int i = 0; i < nargs; ++i) { - memcpy(stack + i, args[i], w * sizeof(float)); - } - - auto mask = w > gLanes; - innerRun(this, f, stack, nullptr, mask, globals); - - // Copy out parameters back - int slot = 0; - for (const auto& p : f->fParameters) { - if (p.fIsOutParameter) { - for (int i = slot; i < slot + p.fSlotCount; ++i) { - memcpy(args[i], stack + i, w * sizeof(float)); - } - } - slot += p.fSlotCount; - } - - // Step each argument pointer ahead - for (int i = 0; i < nargs; ++i) { - args[i] += w; - } - N -= w; - } -} - } // namespace SkSL #endif diff --git a/src/sksl/SkSLByteCode.h b/src/sksl/SkSLByteCode.h index 0c5b7f6a3b..c4d8549620 100644 --- a/src/sksl/SkSLByteCode.h +++ b/src/sksl/SkSLByteCode.h @@ -195,9 +195,6 @@ struct SK_API ByteCode { */ void run(const ByteCodeFunction*, float* args, float* outReturn, int N, const float* uniforms, int uniformCount) const; - - void runStriped(const ByteCodeFunction*, float* args[], int nargs, int N, - const float* uniforms, int uniformCount) const; }; } diff --git a/src/sksl/SkSLDefines.h b/src/sksl/SkSLDefines.h index 7a3b3e09be..74b794f355 100644 --- a/src/sksl/SkSLDefines.h +++ b/src/sksl/SkSLDefines.h @@ -41,7 +41,4 @@ #define NORETURN __attribute__((__noreturn__)) #endif -using SKSL_INT = int32_t; -using SKSL_FLOAT = float; - #endif diff --git a/src/sksl/SkSLString.h b/src/sksl/SkSLString.h index ef5826b8e7..5ac035e70f 100644 --- a/src/sksl/SkSLString.h +++ b/src/sksl/SkSLString.h @@ -8,7 +8,7 @@ #ifndef SKSL_STRING #define SKSL_STRING -#include "src/sksl/SkSLDefines.h" +#include "src/sksl/SkSLUtil.h" #include #include #include diff --git a/src/sksl/SkSLUtil.h b/src/sksl/SkSLUtil.h index 1320804c28..aed8af272c 100644 --- a/src/sksl/SkSLUtil.h +++ b/src/sksl/SkSLUtil.h @@ -23,6 +23,9 @@ #endif // SK_SUPPORT_GPU #endif // SKSL_STANDALONE +using SKSL_INT = int32_t; +using SKSL_FLOAT = float; + class GrShaderCaps; namespace SkSL {