diff --git a/bench/SkSLInterpreterBench.cpp b/bench/SkSLInterpreterBench.cpp index 22afd28458..568495578d 100644 --- a/bench/SkSLInterpreterBench.cpp +++ b/bench/SkSLInterpreterBench.cpp @@ -9,6 +9,7 @@ #include "include/utils/SkRandom.h" #include "src/sksl/SkSLByteCode.h" #include "src/sksl/SkSLCompiler.h" +#include "src/sksl/SkSLInterpreter.h" // Without this build flag, this bench isn't runnable. #if defined(SK_ENABLE_SKSL_INTERPRETER) @@ -22,6 +23,8 @@ public: , fCount(pixels) {} protected: + static constexpr int VecWidth = 16; + const char* onGetName() override { return fName.c_str(); } @@ -35,9 +38,10 @@ protected: SkSL::Program::Settings settings; auto program = compiler.convertProgram(SkSL::Program::kGeneric_Kind, fSrc, settings); SkASSERT(compiler.errorCount() == 0); - fByteCode = compiler.toByteCode(*program); + std::unique_ptr byteCode = compiler.toByteCode(*program); + fMain = byteCode->getFunction("main"); + fInterpreter.reset(new SkSL::Interpreter(std::move(byteCode))); SkASSERT(compiler.errorCount() == 0); - fMain = fByteCode->getFunction("main"); SkRandom rnd; fPixels.resize(fCount * 4); @@ -55,14 +59,14 @@ protected: fPixels.data() + 3 * fCount, }; - SkAssertResult(fByteCode->runStriped(fMain, fCount, args, 4, nullptr, 0, nullptr, 0)); + fInterpreter->runStriped(fMain, fCount, (float**) args); } } private: SkString fName; SkSL::String fSrc; - std::unique_ptr fByteCode; + std::unique_ptr> fInterpreter; const SkSL::ByteCodeFunction* fMain; int fCount; diff --git a/gn/sksl.gni b/gn/sksl.gni index acc3242215..182d312c8b 100644 --- a/gn/sksl.gni +++ b/gn/sksl.gni @@ -8,7 +8,6 @@ _src = get_path_info("../src", "abspath") skia_sksl_sources = [ "$_src/sksl/SkSLASTNode.cpp", - "$_src/sksl/SkSLByteCode.cpp", "$_src/sksl/SkSLByteCodeGenerator.cpp", "$_src/sksl/SkSLCFGGenerator.cpp", "$_src/sksl/SkSLCompiler.cpp", diff --git a/modules/particles/include/SkParticleEffect.h b/modules/particles/include/SkParticleEffect.h index b19ce2f635..b28eac8311 100644 --- a/modules/particles/include/SkParticleEffect.h +++ b/modules/particles/include/SkParticleEffect.h @@ -16,6 +16,7 @@ #include "include/private/SkTemplates.h" #include "include/utils/SkRandom.h" #include "modules/particles/include/SkParticleData.h" +#include "src/sksl/SkSLInterpreter.h" #include @@ -25,6 +26,8 @@ class SkParticleBinding; class SkParticleDrawable; class SkParticleExternalValue; +static constexpr int INTERPRETER_WIDTH = 8; + namespace skresources { class ResourceProvider; } @@ -122,13 +125,16 @@ private: friend class SkParticleEffect; // Cached + template struct Program { - std::unique_ptr fByteCode; + std::unique_ptr> fInterpreter; SkTArray> fExternalValues; }; - Program fEffectProgram; - Program fParticleProgram; + // for performance it would be better to run this with a Program<1>, but for code-size reasons + // we stick to INTERPRETER_WIDTH + Program fEffectProgram; + Program fParticleProgram; }; class SkParticleEffect : public SkRefCnt { @@ -183,8 +189,17 @@ public: void setFrame (float f) { fState.fFrame = f; } void setFlags (uint32_t f) { fState.fFlags = f; } - const SkSL::ByteCode* effectCode() const { return fParams->fEffectProgram.fByteCode.get(); } - const SkSL::ByteCode* particleCode() const { return fParams->fParticleProgram.fByteCode.get(); } + const SkSL::ByteCode* effectCode() const { + return fParams->fEffectProgram.fInterpreter ? + &fParams->fEffectProgram.fInterpreter->getCode() : + nullptr; + } + + const SkSL::ByteCode* particleCode() const { + return fParams->fParticleProgram.fInterpreter ? + &fParams->fParticleProgram.fInterpreter->getCode() : + nullptr; + } float* effectUniforms() { return fEffectUniforms.data(); } float* particleUniforms() { return fParticleUniforms.data(); } diff --git a/modules/particles/src/SkParticleEffect.cpp b/modules/particles/src/SkParticleEffect.cpp index a1d39d0724..ab4e4e1979 100644 --- a/modules/particles/src/SkParticleEffect.cpp +++ b/modules/particles/src/SkParticleEffect.cpp @@ -119,7 +119,9 @@ void SkParticleEffectParams::prepare(const skresources::ResourceProvider* resour fDrawable->prepare(resourceProvider); } - auto buildProgram = [this](const SkSL::String& code, Program* p) { + auto buildProgram = [this](const SkSL::String& code) -> + std::pair, + SkTArray>> { SkSL::Compiler compiler; SkSL::Program::Settings settings; @@ -140,17 +142,15 @@ void SkParticleEffectParams::prepare(const skresources::ResourceProvider* resour auto program = compiler.convertProgram(SkSL::Program::kGeneric_Kind, code, settings); if (!program) { SkDebugf("%s\n", compiler.errorText().c_str()); - return; + return std::make_pair(nullptr, std::move(externalValues)); } auto byteCode = compiler.toByteCode(*program); if (!byteCode) { SkDebugf("%s\n", compiler.errorText().c_str()); - return; + return std::make_pair(nullptr, std::move(externalValues)); } - - p->fByteCode = std::move(byteCode); - p->fExternalValues.swap(externalValues); + return std::make_pair(std::move(byteCode), std::move(externalValues)); }; SkSL::String effectCode(kCommonHeader); @@ -160,8 +160,15 @@ void SkParticleEffectParams::prepare(const skresources::ResourceProvider* resour particleCode.append(kParticleHeader); particleCode.append(fParticleCode.c_str()); - buildProgram(effectCode, &fEffectProgram); - buildProgram(particleCode, &fParticleProgram); + auto effectProgram = buildProgram(effectCode); + fEffectProgram.fInterpreter.reset(new SkSL::Interpreter( + std::move(effectProgram.first))); + fEffectProgram.fExternalValues.swap(effectProgram.second); + + auto particleProgram = buildProgram(particleCode); + fParticleProgram.fInterpreter.reset(new SkSL::Interpreter( + std::move(particleProgram.first))); + fParticleProgram.fExternalValues.swap(particleProgram.second); } SkParticleEffect::SkParticleEffect(sk_sp params, const SkRandom& random) @@ -222,15 +229,22 @@ void SkParticleEffect::processEffectSpawnRequests(double now) { } void SkParticleEffect::runEffectScript(double now, const char* entry) { - if (const auto& byteCode = fParams->fEffectProgram.fByteCode) { - if (auto fun = byteCode->getFunction(entry)) { + SkSL::Interpreter* interpreter = fParams->fEffectProgram.fInterpreter.get(); + if (interpreter) { + const auto& byteCode = interpreter->getCode(); + if (auto fun = byteCode.getFunction(entry)) { for (const auto& value : fParams->fEffectProgram.fExternalValues) { value->setRandom(&fRandom); value->setEffect(this); } - SkAssertResult(byteCode->run(fun, &fState.fAge, sizeof(EffectState) / sizeof(float), - nullptr, 0, - fEffectUniforms.data(), fEffectUniforms.count())); + interpreter->setUniforms(fEffectUniforms.data()); + static constexpr int numChannels = sizeof(EffectState) / sizeof(float); + SkASSERT(numChannels == fun->getParameterSlotCount()); + float* args[numChannels]; + for (int i = 0; i < numChannels; ++i) { + args[i] = &fState.fAge + i; + } + SkAssertResult(interpreter->runStriped(fun, 1, args)); this->processEffectSpawnRequests(now); } } @@ -263,8 +277,11 @@ void SkParticleEffect::processParticleSpawnRequests(double now, int start) { } void SkParticleEffect::runParticleScript(double now, const char* entry, int start, int count) { - if (const auto& byteCode = fParams->fParticleProgram.fByteCode) { - if (auto fun = byteCode->getFunction(entry)) { + SkSL::Interpreter* interpreter = + fParams->fParticleProgram.fInterpreter.get(); + if (interpreter) { + const auto& byteCode = interpreter->getCode(); + if (auto fun = byteCode.getFunction(entry)) { float* args[SkParticles::kNumChannels]; for (int i = 0; i < SkParticles::kNumChannels; ++i) { args[i] = fParticles.fData[i].get() + start; @@ -275,10 +292,8 @@ void SkParticleEffect::runParticleScript(double now, const char* entry, int star value->setEffect(this); } memcpy(&fParticleUniforms[1], &fState.fAge, sizeof(EffectState)); - SkAssertResult(byteCode->runStriped(fun, count, args, SkParticles::kNumChannels, - nullptr, 0, - fParticleUniforms.data(), - fParticleUniforms.count())); + interpreter->setUniforms(fParticleUniforms.data()); + SkAssertResult(interpreter->runStriped(fun, count, (float**) args)); this->processParticleSpawnRequests(now, start); } } diff --git a/src/core/SkColorFilter.cpp b/src/core/SkColorFilter.cpp index 26836d4486..dfd34d27b2 100644 --- a/src/core/SkColorFilter.cpp +++ b/src/core/SkColorFilter.cpp @@ -19,6 +19,7 @@ #include "src/core/SkReadBuffer.h" #include "src/core/SkVM.h" #include "src/core/SkWriteBuffer.h" +#include "src/sksl/SkSLInterpreter.h" #if SK_SUPPORT_GPU #include "src/gpu/GrFragmentProcessor.h" @@ -420,17 +421,20 @@ public: ctx->ninputs = fEffect->uniformSize() / 4; ctx->shaderConvention = false; - SkAutoMutexExclusive ama(fByteCodeMutex); - if (!fByteCode) { + SkAutoMutexExclusive ama(fInterpreterMutex); + if (!fInterpreter) { auto [byteCode, errorText] = fEffect->toByteCode(fInputs->data()); if (!byteCode) { SkDebugf("%s\n", errorText.c_str()); return false; } - fByteCode = std::move(byteCode); + fMain = byteCode->getFunction("main"); + fInterpreter.reset( + new SkSL::Interpreter( + std::move(byteCode))); } - ctx->byteCode = fByteCode.get(); - ctx->fn = ctx->byteCode->getFunction("main"); + ctx->fn = fMain; + ctx->interpreter = fInterpreter.get(); rec.fPipeline->append(SkRasterPipeline::interpreter, ctx); return true; } @@ -453,8 +457,10 @@ private: sk_sp fEffect; sk_sp fInputs; - mutable SkMutex fByteCodeMutex; - mutable std::unique_ptr fByteCode; + mutable SkMutex fInterpreterMutex; + mutable std::unique_ptr> + fInterpreter; + mutable const SkSL::ByteCodeFunction* fMain; friend class SkColorFilter; diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h index 98b009ac0d..1a6e582db2 100644 --- a/src/core/SkRasterPipeline.h +++ b/src/core/SkRasterPipeline.h @@ -161,12 +161,15 @@ struct SkRasterPipeline_CallbackCtx { }; namespace SkSL { -class ByteCode; class ByteCodeFunction; + +template +class Interpreter; } struct SkRasterPipeline_InterpreterCtx { - const SkSL::ByteCode* byteCode; + static constexpr int VECTOR_WIDTH = 8; + SkSL::Interpreter* interpreter; const SkSL::ByteCodeFunction* fn; SkColor4f paintColor; diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index 00e2b67e93..0f75d54b04 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -10,7 +10,7 @@ #include "include/core/SkTypes.h" #include "src/core/SkUtils.h" // unaligned_{load,store} -#include "src/sksl/SkSLByteCode.h" +#include "src/sksl/SkSLInterpreter.h" // Every function in this file should be marked static and inline using SI. #if defined(__clang__) @@ -2711,7 +2711,6 @@ STAGE(interpreter, SkRasterPipeline_InterpreterCtx* c) { float* args[] = { xx, yy, rr, gg, bb, aa }; float** in_args = args; - int in_count = 6; if (c->shaderConvention) { // our caller must have called seed_shader to set these @@ -2723,15 +2722,14 @@ STAGE(interpreter, SkRasterPipeline_InterpreterCtx* c) { sk_unaligned_store(aa, F(c->paintColor.fA)); } else { in_args += 2; // skip x,y - in_count = 4; sk_unaligned_store(rr, r); sk_unaligned_store(gg, g); sk_unaligned_store(bb, b); sk_unaligned_store(aa, a); } - SkAssertResult(c->byteCode->runStriped(c->fn, tail ? tail : N, in_args, in_count, - nullptr, 0, (const float*)c->inputs, c->ninputs)); + c->interpreter->setUniforms((float*) c->inputs); + SkAssertResult(c->interpreter->runStriped(c->fn, tail ? tail : N, (float**) in_args)); r = sk_unaligned_load(rr); g = sk_unaligned_load(gg); diff --git a/src/shaders/SkRTShader.cpp b/src/shaders/SkRTShader.cpp index ce823eb3b8..90aeb1f789 100644 --- a/src/shaders/SkRTShader.cpp +++ b/src/shaders/SkRTShader.cpp @@ -14,6 +14,8 @@ #include "src/shaders/SkRTShader.h" #include "src/sksl/SkSLByteCode.h" +#include "src/sksl/SkSLCompiler.h" +#include "src/sksl/SkSLInterpreter.h" #if SK_SUPPORT_GPU #include "src/gpu/GrColorInfo.h" @@ -45,17 +47,19 @@ bool SkRTShader::onAppendStages(const SkStageRec& rec) const { ctx->ninputs = fEffect->uniformSize() / 4; ctx->shaderConvention = true; - SkAutoMutexExclusive ama(fByteCodeMutex); - if (!fByteCode) { + SkAutoMutexExclusive ama(fInterpreterMutex); + if (!fInterpreter) { auto [byteCode, errorText] = fEffect->toByteCode(fInputs->data()); if (!byteCode) { SkDebugf("%s\n", errorText.c_str()); return false; } - fByteCode = std::move(byteCode); + fMain = byteCode->getFunction("main"); + fInterpreter.reset(new SkSL::Interpreter( + std::move(byteCode))); } - ctx->byteCode = fByteCode.get(); - ctx->fn = ctx->byteCode->getFunction("main"); + ctx->fn = fMain; + ctx->interpreter = fInterpreter.get(); rec.fPipeline->append(SkRasterPipeline::seed_shader); rec.fPipeline->append_matrix(rec.fAlloc, inverse); diff --git a/src/shaders/SkRTShader.h b/src/shaders/SkRTShader.h index 5d44840b77..1ac56ec5f9 100644 --- a/src/shaders/SkRTShader.h +++ b/src/shaders/SkRTShader.h @@ -18,7 +18,12 @@ class SkData; class SkMatrix; class SkRuntimeEffect; -namespace SkSL { class ByteCode; } +namespace SkSL { + class ByteCodeFunction; + + template + class Interpreter; +} class SkRTShader : public SkShaderBase { public: @@ -37,6 +42,8 @@ protected: bool onAppendStages(const SkStageRec& rec) const override; private: + static constexpr int VECTOR_WIDTH = 8; + SK_FLATTENABLE_HOOKS(SkRTShader) sk_sp fEffect; @@ -45,8 +52,9 @@ private: sk_sp fInputs; std::vector> fChildren; - mutable SkMutex fByteCodeMutex; - mutable std::unique_ptr fByteCode; + mutable SkMutex fInterpreterMutex; + mutable std::unique_ptr> fInterpreter; + mutable const SkSL::ByteCodeFunction* fMain; typedef SkShaderBase INHERITED; }; diff --git a/src/sksl/SkSLByteCode.cpp b/src/sksl/SkSLByteCode.cpp deleted file mode 100644 index a9c3480900..0000000000 --- a/src/sksl/SkSLByteCode.cpp +++ /dev/null @@ -1,1760 +0,0 @@ -/* - * Copyright 2018 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_STANDALONE - -#include "include/core/SkPoint3.h" -#include "include/private/SkVx.h" -#include "src/core/SkUtils.h" // sk_unaligned_load -#include "src/sksl/SkSLByteCode.h" -#include "src/sksl/SkSLByteCodeGenerator.h" -#include "src/sksl/SkSLExternalValue.h" - -#include - -namespace SkSL { - -#if defined(SK_ENABLE_SKSL_INTERPRETER) - -constexpr int VecWidth = ByteCode::kVecWidth; - -struct Interpreter { - -using F32 = skvx::Vec; -using I32 = skvx::Vec; -using U32 = skvx::Vec; - -#define READ8() (*(ip++)) -#define READ16() (ip += 2, sk_unaligned_load(ip - 2)) -#define READ32() (ip += 4, sk_unaligned_load(ip - 4)) -#define READ_INST() (ip += sizeof(instruction), \ - sk_unaligned_load(ip - sizeof(instruction))) - -#define VECTOR_DISASSEMBLE(op, text) \ - case ByteCodeInstruction::op: printf(text); ++ip; break; \ - case ByteCodeInstruction::op##2: printf(text "2"); ++ip; break; \ - case ByteCodeInstruction::op##3: printf(text "3"); ++ip; break; \ - case ByteCodeInstruction::op##4: printf(text "4"); ++ip; break; - -#define VECTOR_DISASSEMBLE_NO_COUNT(op, text) \ - case ByteCodeInstruction::op: printf(text); break; \ - case ByteCodeInstruction::op##2: printf(text "2"); break; \ - case ByteCodeInstruction::op##3: printf(text "3"); break; \ - case ByteCodeInstruction::op##4: printf(text "4"); break; - -#define VECTOR_MATRIX_DISASSEMBLE(op, text) \ - VECTOR_DISASSEMBLE(op, text) \ - case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break; - -#define VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(op, text) \ - VECTOR_DISASSEMBLE_NO_COUNT(op, text) \ - case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break; - -static const uint8_t* DisassembleInstruction(const uint8_t* ip) { - switch ((ByteCodeInstruction) (intptr_t) READ_INST()) { - VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf") - VECTOR_DISASSEMBLE(kAddI, "addi") - case ByteCodeInstruction::kAndB: printf("andb"); break; - case ByteCodeInstruction::kBranch: printf("branch %d", READ16()); break; - case ByteCodeInstruction::kCall: printf("call %d", READ8()); break; - case ByteCodeInstruction::kCallExternal: { - int argumentCount = READ8(); - int returnCount = READ8(); - int externalValue = READ8(); - printf("callexternal %d, %d, %d", argumentCount, returnCount, externalValue); - break; - } - case ByteCodeInstruction::kClampIndex: printf("clampindex %d", READ8()); break; - VECTOR_DISASSEMBLE(kCompareIEQ, "compareieq") - VECTOR_DISASSEMBLE(kCompareINEQ, "compareineq") - VECTOR_MATRIX_DISASSEMBLE(kCompareFEQ, "comparefeq") - VECTOR_MATRIX_DISASSEMBLE(kCompareFNEQ, "comparefneq") - VECTOR_DISASSEMBLE(kCompareFGT, "comparefgt") - VECTOR_DISASSEMBLE(kCompareFGTEQ, "comparefgteq") - VECTOR_DISASSEMBLE(kCompareFLT, "compareflt") - VECTOR_DISASSEMBLE(kCompareFLTEQ, "compareflteq") - VECTOR_DISASSEMBLE(kCompareSGT, "comparesgt") - VECTOR_DISASSEMBLE(kCompareSGTEQ, "comparesgteq") - VECTOR_DISASSEMBLE(kCompareSLT, "compareslt") - VECTOR_DISASSEMBLE(kCompareSLTEQ, "compareslteq") - VECTOR_DISASSEMBLE(kCompareUGT, "compareugt") - VECTOR_DISASSEMBLE(kCompareUGTEQ, "compareugteq") - VECTOR_DISASSEMBLE(kCompareULT, "compareult") - VECTOR_DISASSEMBLE(kCompareULTEQ, "compareulteq") - VECTOR_DISASSEMBLE_NO_COUNT(kConvertFtoI, "convertftoi") - VECTOR_DISASSEMBLE_NO_COUNT(kConvertStoF, "convertstof") - VECTOR_DISASSEMBLE_NO_COUNT(kConvertUtoF, "convertutof") - VECTOR_DISASSEMBLE(kCos, "cos") - VECTOR_MATRIX_DISASSEMBLE(kDivideF, "dividef") - VECTOR_DISASSEMBLE(kDivideS, "divideS") - VECTOR_DISASSEMBLE(kDivideU, "divideu") - VECTOR_MATRIX_DISASSEMBLE(kDup, "dup") - case ByteCodeInstruction::kInverse2x2: printf("inverse2x2"); break; - case ByteCodeInstruction::kInverse3x3: printf("inverse3x3"); break; - case ByteCodeInstruction::kInverse4x4: printf("inverse4x4"); break; - case ByteCodeInstruction::kLoad: printf("load %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoad2: printf("load2 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoad3: printf("load3 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoad4: printf("load4 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadGlobal: printf("loadglobal %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadGlobal2: printf("loadglobal2 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadGlobal3: printf("loadglobal3 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadGlobal4: printf("loadglobal4 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadUniform: printf("loaduniform %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadUniform2: printf("loaduniform2 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadUniform3: printf("loaduniform3 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadUniform4: printf("loaduniform4 %d", READ16() >> 8); break; - case ByteCodeInstruction::kLoadSwizzle: { - int target = READ8(); - int count = READ8(); - printf("loadswizzle %d %d", target, count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - case ByteCodeInstruction::kLoadSwizzleGlobal: { - int target = READ8(); - int count = READ8(); - printf("loadswizzleglobal %d %d", target, count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - case ByteCodeInstruction::kLoadSwizzleUniform: { - int target = READ8(); - int count = READ8(); - printf("loadswizzleuniform %d %d", target, count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - case ByteCodeInstruction::kLoadExtended: printf("loadextended %d", READ8()); break; - case ByteCodeInstruction::kLoadExtendedGlobal: printf("loadextendedglobal %d", READ8()); - break; - case ByteCodeInstruction::kLoadExtendedUniform: printf("loadextendeduniform %d", READ8()); - break; - case ByteCodeInstruction::kMatrixToMatrix: { - int srcCols = READ8(); - int srcRows = READ8(); - int dstCols = READ8(); - int dstRows = READ8(); - printf("matrixtomatrix %dx%d %dx%d", srcCols, srcRows, dstCols, dstRows); - break; - } - case ByteCodeInstruction::kMatrixMultiply: { - int lCols = READ8(); - int lRows = READ8(); - int rCols = READ8(); - printf("matrixmultiply %dx%d %dx%d", lCols, lRows, rCols, lCols); - break; - } - VECTOR_MATRIX_DISASSEMBLE(kMultiplyF, "multiplyf") - VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi") - VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kNegateF, "negatef") - VECTOR_DISASSEMBLE_NO_COUNT(kNegateI, "negatei") - case ByteCodeInstruction::kNotB: printf("notb"); break; - case ByteCodeInstruction::kOrB: printf("orb"); break; - VECTOR_MATRIX_DISASSEMBLE_NO_COUNT(kPop, "pop") - case ByteCodeInstruction::kPushImmediate: { - uint32_t v = READ32(); - union { uint32_t u; float f; } pun = { v }; - printf("pushimmediate %s", (to_string(v) + "(" + to_string(pun.f) + ")").c_str()); - break; - } - case ByteCodeInstruction::kReadExternal: printf("readexternal %d", READ16() >> 8); break; - case ByteCodeInstruction::kReadExternal2: printf("readexternal2 %d", READ16() >> 8); break; - case ByteCodeInstruction::kReadExternal3: printf("readexternal3 %d", READ16() >> 8); break; - case ByteCodeInstruction::kReadExternal4: printf("readexternal4 %d", READ16() >> 8); break; - VECTOR_DISASSEMBLE(kRemainderF, "remainderf") - VECTOR_DISASSEMBLE(kRemainderS, "remainders") - VECTOR_DISASSEMBLE(kRemainderU, "remainderu") - case ByteCodeInstruction::kReserve: printf("reserve %d", READ8()); break; - case ByteCodeInstruction::kReturn: printf("return %d", READ8()); break; - case ByteCodeInstruction::kScalarToMatrix: { - int cols = READ8(); - int rows = READ8(); - printf("scalartomatrix %dx%d", cols, rows); - break; - } - case ByteCodeInstruction::kShiftLeft: printf("shl %d", READ8()); break; - case ByteCodeInstruction::kShiftRightS: printf("shrs %d", READ8()); break; - case ByteCodeInstruction::kShiftRightU: printf("shru %d", READ8()); break; - VECTOR_DISASSEMBLE(kSin, "sin") - VECTOR_DISASSEMBLE_NO_COUNT(kSqrt, "sqrt") - case ByteCodeInstruction::kStore: printf("store %d", READ8()); break; - case ByteCodeInstruction::kStore2: printf("store2 %d", READ8()); break; - case ByteCodeInstruction::kStore3: printf("store3 %d", READ8()); break; - case ByteCodeInstruction::kStore4: printf("store4 %d", READ8()); break; - case ByteCodeInstruction::kStoreGlobal: printf("storeglobal %d", READ8()); break; - case ByteCodeInstruction::kStoreGlobal2: printf("storeglobal2 %d", READ8()); break; - case ByteCodeInstruction::kStoreGlobal3: printf("storeglobal3 %d", READ8()); break; - case ByteCodeInstruction::kStoreGlobal4: printf("storeglobal4 %d", READ8()); break; - case ByteCodeInstruction::kStoreSwizzle: { - int target = READ8(); - int count = READ8(); - printf("storeswizzle %d %d", target, count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - case ByteCodeInstruction::kStoreSwizzleGlobal: { - int target = READ8(); - int count = READ8(); - printf("storeswizzleglobal %d %d", target, count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - case ByteCodeInstruction::kStoreSwizzleIndirect: { - int count = READ8(); - printf("storeswizzleindirect %d", count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: { - int count = READ8(); - printf("storeswizzleindirectglobal %d", count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - case ByteCodeInstruction::kStoreExtended: printf("storeextended %d", READ8()); break; - case ByteCodeInstruction::kStoreExtendedGlobal: printf("storeextendedglobal %d", READ8()); - break; - VECTOR_MATRIX_DISASSEMBLE(kSubtractF, "subtractf") - VECTOR_DISASSEMBLE(kSubtractI, "subtracti") - case ByteCodeInstruction::kSwizzle: { - printf("swizzle %d, ", READ8()); - int count = READ8(); - printf("%d", count); - for (int i = 0; i < count; ++i) { - printf(", %d", READ8()); - } - break; - } - VECTOR_DISASSEMBLE(kTan, "tan") - case ByteCodeInstruction::kWriteExternal: printf("writeexternal %d", READ16() >> 8); break; - case ByteCodeInstruction::kWriteExternal2: printf("writeexternal2 %d", READ16() >> 8); break; - case ByteCodeInstruction::kWriteExternal3: printf("writeexternal3 %d", READ16() >> 8); break; - case ByteCodeInstruction::kWriteExternal4: printf("writeexternal4 %d", READ16() >> 8); break; - case ByteCodeInstruction::kXorB: printf("xorb"); break; - case ByteCodeInstruction::kMaskPush: printf("maskpush"); break; - case ByteCodeInstruction::kMaskPop: printf("maskpop"); break; - case ByteCodeInstruction::kMaskNegate: printf("masknegate"); break; - case ByteCodeInstruction::kMaskBlend: printf("maskblend %d", READ8()); break; - case ByteCodeInstruction::kBranchIfAllFalse: - printf("branchifallfalse %d", READ16()); - break; - case ByteCodeInstruction::kLoopBegin: printf("loopbegin"); break; - case ByteCodeInstruction::kLoopNext: printf("loopnext"); break; - case ByteCodeInstruction::kLoopMask: printf("loopmask"); break; - case ByteCodeInstruction::kLoopEnd: printf("loopend"); break; - case ByteCodeInstruction::kLoopContinue: printf("loopcontinue"); break; - case ByteCodeInstruction::kLoopBreak: printf("loopbreak"); break; - default: - ip -= sizeof(instruction); - printf("unknown(%d)\n", (int) (intptr_t) READ_INST()); - SkASSERT(false); - } - return ip; -} - -#ifdef SKSLC_THREADED_CODE - #define LABEL(name) name: - #ifdef TRACE - #define NEXT() goto next - #else - #define NEXT() goto *READ_INST() - #endif -#else - #define LABEL(name) case ByteCodeInstruction::name: - #define NEXT() continue -#endif - -#define VECTOR_BINARY_OP(base, field, op) \ - LABEL(base ## 4) \ - sp[-4] = sp[-4].field op sp[0].field; \ - POP(); \ - /* fall through */ \ - LABEL(base ## 3) { \ - sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \ - POP(); \ - } /* fall through */ \ - LABEL(base ## 2) { \ - sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \ - POP(); \ - } /* fall through */ \ - LABEL(base) { \ - sp[-ip[0]] = sp[-ip[0]].field op sp[0].field; \ - POP(); \ - ++ip; \ - NEXT(); \ - } - -// A naive implementation of / or % using skvx operations will likely crash with a divide by zero -// in inactive vector lanesm, so we need to be sure to avoid masked-off lanes. -#define VECTOR_BINARY_MASKED_OP(base, field, op) \ - LABEL(base ## 4) \ - for (int i = 0; i < VecWidth; ++i) { \ - if (mask()[i]) { \ - sp[-4].field[i] op ## = sp[0].field[i]; \ - } \ - } \ - POP(); \ - /* fall through */ \ - LABEL(base ## 3) { \ - for (int i = 0; i < VecWidth; ++i) { \ - if (mask()[i]) { \ - sp[-ip[0]].field[i] op ## = sp[0].field[i]; \ - } \ - } \ - POP(); \ - } /* fall through */ \ - LABEL(base ## 2) { \ - for (int i = 0; i < VecWidth; ++i) { \ - if (mask()[i]) { \ - sp[-ip[0]].field[i] op ## = sp[0].field[i]; \ - } \ - } \ - POP(); \ - } /* fall through */ \ - LABEL(base) { \ - for (int i = 0; i < VecWidth; ++i) { \ - if (mask()[i]) { \ - sp[-ip[0]].field[i] op ## = sp[0].field[i]; \ - } \ - } \ - POP(); \ - ++ip; \ - NEXT(); \ - } - - -#define VECTOR_MATRIX_BINARY_OP(base, field, op) \ - VECTOR_BINARY_OP(base, field, op) \ - LABEL(base ## N) { \ - int count = READ8(); \ - for (int i = count; i > 0; --i) { \ - sp[-count] = sp[-count].field op sp[0].field; \ - POP(); \ - } \ - NEXT(); \ - } - -#define VECTOR_BINARY_FN(base, field, fn) \ - LABEL(base ## 4) \ - sp[-4] = fn(sp[-4].field, sp[0].field); \ - POP(); \ - /* fall through */ \ - LABEL(base ## 3) { \ - sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \ - POP(); \ - } /* fall through */ \ - LABEL(base ## 2) { \ - sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \ - POP(); \ - } /* fall through */ \ - LABEL(base) { \ - sp[-ip[0]] = fn(sp[-ip[0]].field, sp[0].field); \ - POP(); \ - ++ip; \ - NEXT(); \ - } - -#define VECTOR_UNARY_FN(base, fn, field) \ - LABEL(base ## 4) sp[-3] = fn(sp[-3].field); \ - LABEL(base ## 3) sp[-2] = fn(sp[-2].field); \ - LABEL(base ## 2) sp[-1] = fn(sp[-1].field); \ - LABEL(base) sp[ 0] = fn(sp[ 0].field); \ - NEXT(); - -#define VECTOR_UNARY_FN_VEC(base, fn) \ - LABEL(base ## 4) \ - LABEL(base ## 3) \ - LABEL(base ## 2) \ - LABEL(base) { \ - int count = READ8(); \ - float* v = (float*)sp - count + 1; \ - for (int i = VecWidth * count; i > 0; --i, ++v) { \ - *v = fn(*v); \ - } \ - NEXT(); \ - } - -#define VECTOR_LABELS(base) \ - &&base ## 4, \ - &&base ## 3, \ - &&base ## 2, \ - &&base - -#define VECTOR_MATRIX_LABELS(base) \ - VECTOR_LABELS(base), \ - &&base ## N - -// If you trip this assert, it means that the order of the opcodes listed in ByteCodeInstruction -// does not match the order of the opcodes listed in the 'labels' array in innerRun(). -#define CHECK_LABEL(name) \ - SkASSERT(labels[(int) ByteCodeInstruction::name] == &&name) - -#define CHECK_VECTOR_LABELS(name) \ - CHECK_LABEL(name ## 4); \ - CHECK_LABEL(name ## 3); \ - CHECK_LABEL(name ## 2); \ - CHECK_LABEL(name) - -#define CHECK_VECTOR_MATRIX_LABELS(name) \ - CHECK_VECTOR_LABELS(name); \ - CHECK_LABEL(name ## N) - -union VValue { - VValue() {} - VValue(F32 f) : fFloat(f) {} - VValue(I32 s) : fSigned(s) {} - VValue(U32 u) : fUnsigned(u) {} - - F32 fFloat; - I32 fSigned; - U32 fUnsigned; -}; - -struct StackFrame { - const uint8_t* fCode; - const uint8_t* fIP; - VValue* fStack; - int fParameterCount; -}; - -static F32 VecMod(F32 a, F32 b) { - return a - skvx::trunc(a / b) * b; -} - -#define spf(index) sp[index].fFloat - -static void CallExternal(const ByteCode* byteCode, const uint8_t*& ip, VValue*& sp, - int baseIndex, I32 mask) { - int argumentCount = READ8(); - int returnCount = READ8(); - int target = READ8(); - ExternalValue* v = byteCode->fExternalValues[target]; - sp -= argumentCount - 1; - - float tmpArgs[4]; - float tmpReturn[4]; - SkASSERT(argumentCount <= (int)SK_ARRAY_COUNT(tmpArgs)); - SkASSERT(returnCount <= (int)SK_ARRAY_COUNT(tmpReturn)); - - for (int i = 0; i < VecWidth; ++i) { - if (mask[i]) { - for (int j = 0; j < argumentCount; ++j) { - tmpArgs[j] = sp[j].fFloat[i]; - } - v->call(baseIndex + i, tmpArgs, tmpReturn); - for (int j = 0; j < returnCount; ++j) { - sp[j].fFloat[i] = tmpReturn[j]; - } - } - } - sp += returnCount - 1; -} - -static void Inverse2x2(VValue* sp) { - F32 a = sp[-3].fFloat, - b = sp[-2].fFloat, - c = sp[-1].fFloat, - d = sp[ 0].fFloat; - F32 idet = F32(1) / (a*d - b*c); - sp[-3].fFloat = d * idet; - sp[-2].fFloat = -b * idet; - sp[-1].fFloat = -c * idet; - sp[ 0].fFloat = a * idet; -} - -static void Inverse3x3(VValue* sp) { - F32 a11 = sp[-8].fFloat, a12 = sp[-5].fFloat, a13 = sp[-2].fFloat, - a21 = sp[-7].fFloat, a22 = sp[-4].fFloat, a23 = sp[-1].fFloat, - a31 = sp[-6].fFloat, a32 = sp[-3].fFloat, a33 = sp[ 0].fFloat; - F32 idet = F32(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - - a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31); - sp[-8].fFloat = (a22 * a33 - a23 * a32) * idet; - sp[-7].fFloat = (a23 * a31 - a21 * a33) * idet; - sp[-6].fFloat = (a21 * a32 - a22 * a31) * idet; - sp[-5].fFloat = (a13 * a32 - a12 * a33) * idet; - sp[-4].fFloat = (a11 * a33 - a13 * a31) * idet; - sp[-3].fFloat = (a12 * a31 - a11 * a32) * idet; - sp[-2].fFloat = (a12 * a23 - a13 * a22) * idet; - sp[-1].fFloat = (a13 * a21 - a11 * a23) * idet; - sp[ 0].fFloat = (a11 * a22 - a12 * a21) * idet; -} - -static void Inverse4x4(VValue* sp) { - F32 a00 = spf(-15), a10 = spf(-11), a20 = spf( -7), a30 = spf( -3), - a01 = spf(-14), a11 = spf(-10), a21 = spf( -6), a31 = spf( -2), - a02 = spf(-13), a12 = spf( -9), a22 = spf( -5), a32 = spf( -1), - a03 = spf(-12), a13 = spf( -8), a23 = spf( -4), a33 = spf( 0); - - F32 b00 = a00 * a11 - a01 * a10, - b01 = a00 * a12 - a02 * a10, - b02 = a00 * a13 - a03 * a10, - b03 = a01 * a12 - a02 * a11, - b04 = a01 * a13 - a03 * a11, - b05 = a02 * a13 - a03 * a12, - b06 = a20 * a31 - a21 * a30, - b07 = a20 * a32 - a22 * a30, - b08 = a20 * a33 - a23 * a30, - b09 = a21 * a32 - a22 * a31, - b10 = a21 * a33 - a23 * a31, - b11 = a22 * a33 - a23 * a32; - - F32 idet = F32(1) / - (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); - - b00 *= idet; - b01 *= idet; - b02 *= idet; - b03 *= idet; - b04 *= idet; - b05 *= idet; - b06 *= idet; - b07 *= idet; - b08 *= idet; - b09 *= idet; - b10 *= idet; - b11 *= idet; - - spf(-15) = a11 * b11 - a12 * b10 + a13 * b09; - spf(-14) = a02 * b10 - a01 * b11 - a03 * b09; - spf(-13) = a31 * b05 - a32 * b04 + a33 * b03; - spf(-12) = a22 * b04 - a21 * b05 - a23 * b03; - spf(-11) = a12 * b08 - a10 * b11 - a13 * b07; - spf(-10) = a00 * b11 - a02 * b08 + a03 * b07; - spf( -9) = a32 * b02 - a30 * b05 - a33 * b01; - spf( -8) = a20 * b05 - a22 * b02 + a23 * b01; - spf( -7) = a10 * b10 - a11 * b08 + a13 * b06; - spf( -6) = a01 * b08 - a00 * b10 - a03 * b06; - spf( -5) = a30 * b04 - a31 * b02 + a33 * b00; - spf( -4) = a21 * b02 - a20 * b04 - a23 * b00; - spf( -3) = a11 * b07 - a10 * b09 - a12 * b06; - spf( -2) = a00 * b09 - a01 * b07 + a02 * b06; - spf( -1) = a31 * b01 - a30 * b03 - a32 * b00; - spf( 0) = a20 * b03 - a21 * b01 + a22 * b00; -} - -static bool InnerRun(const ByteCode* byteCode, const ByteCodeFunction* f, VValue* stack, - float* outReturn[], VValue globals[], const float uniforms[], - bool stripedOutput, int N, int baseIndex) { -#ifdef SKSLC_THREADED_CODE - static const void* labels[] = { - // If you aren't familiar with it, the &&label syntax is the GCC / Clang "labels as values" - // extension. If you add anything to this array, be sure to add the corresponding - // CHECK_LABEL() or CHECK_*_LABELS() assert below. - VECTOR_MATRIX_LABELS(kAddF), - VECTOR_LABELS(kAddI), - &&kAndB, - &&kBranch, - &&kCall, - &&kCallExternal, - &&kClampIndex, - VECTOR_LABELS(kCompareIEQ), - VECTOR_LABELS(kCompareINEQ), - VECTOR_MATRIX_LABELS(kCompareFEQ), - VECTOR_MATRIX_LABELS(kCompareFNEQ), - VECTOR_LABELS(kCompareFGT), - VECTOR_LABELS(kCompareFGTEQ), - VECTOR_LABELS(kCompareFLT), - VECTOR_LABELS(kCompareFLTEQ), - VECTOR_LABELS(kCompareSGT), - VECTOR_LABELS(kCompareSGTEQ), - VECTOR_LABELS(kCompareSLT), - VECTOR_LABELS(kCompareSLTEQ), - VECTOR_LABELS(kCompareUGT), - VECTOR_LABELS(kCompareUGTEQ), - VECTOR_LABELS(kCompareULT), - VECTOR_LABELS(kCompareULTEQ), - VECTOR_LABELS(kConvertFtoI), - VECTOR_LABELS(kConvertStoF), - VECTOR_LABELS(kConvertUtoF), - VECTOR_LABELS(kCos), - VECTOR_MATRIX_LABELS(kDivideF), - VECTOR_LABELS(kDivideS), - VECTOR_LABELS(kDivideU), - VECTOR_MATRIX_LABELS(kDup), - &&kInverse2x2, - &&kInverse3x3, - &&kInverse4x4, - VECTOR_LABELS(kLoad), - VECTOR_LABELS(kLoadGlobal), - VECTOR_LABELS(kLoadUniform), - &&kLoadSwizzle, - &&kLoadSwizzleGlobal, - &&kLoadSwizzleUniform, - &&kLoadExtended, - &&kLoadExtendedGlobal, - &&kLoadExtendedUniform, - &&kMatrixToMatrix, - &&kMatrixMultiply, - VECTOR_MATRIX_LABELS(kNegateF), - VECTOR_LABELS(kNegateI), - VECTOR_MATRIX_LABELS(kMultiplyF), - VECTOR_LABELS(kMultiplyI), - &&kNotB, - &&kOrB, - VECTOR_MATRIX_LABELS(kPop), - &&kPushImmediate, - VECTOR_LABELS(kReadExternal), - VECTOR_LABELS(kRemainderF), - VECTOR_LABELS(kRemainderS), - VECTOR_LABELS(kRemainderU), - &&kReserve, - &&kReturn, - &&kScalarToMatrix, - &&kShiftLeft, - &&kShiftRightS, - &&kShiftRightU, - VECTOR_LABELS(kSin), - VECTOR_LABELS(kSqrt), - VECTOR_LABELS(kStore), - VECTOR_LABELS(kStoreGlobal), - &&kStoreExtended, - &&kStoreExtendedGlobal, - &&kStoreSwizzle, - &&kStoreSwizzleGlobal, - &&kStoreSwizzleIndirect, - &&kStoreSwizzleIndirectGlobal, - &&kSwizzle, - VECTOR_MATRIX_LABELS(kSubtractF), - VECTOR_LABELS(kSubtractI), - VECTOR_LABELS(kTan), - VECTOR_LABELS(kWriteExternal), - &&kXorB, - - &&kMaskPush, - &&kMaskPop, - &&kMaskNegate, - &&kMaskBlend, - &&kBranchIfAllFalse, - - &&kLoopBegin, - &&kLoopNext, - &&kLoopMask, - &&kLoopEnd, - &&kLoopBreak, - &&kLoopContinue, - }; - // Verify that the order of the labels array matches the order of the ByteCodeInstruction enum. - CHECK_VECTOR_MATRIX_LABELS(kAddF); - CHECK_VECTOR_LABELS(kAddI); - CHECK_LABEL(kAndB); - CHECK_LABEL(kBranch); - CHECK_LABEL(kCall); - CHECK_LABEL(kCallExternal); - CHECK_LABEL(kClampIndex); - CHECK_VECTOR_LABELS(kCompareIEQ); - CHECK_VECTOR_LABELS(kCompareINEQ); - CHECK_VECTOR_MATRIX_LABELS(kCompareFEQ); - CHECK_VECTOR_MATRIX_LABELS(kCompareFNEQ); - CHECK_VECTOR_LABELS(kCompareFGT); - CHECK_VECTOR_LABELS(kCompareFGTEQ); - CHECK_VECTOR_LABELS(kCompareFLT); - CHECK_VECTOR_LABELS(kCompareFLTEQ); - CHECK_VECTOR_LABELS(kCompareSGT); - CHECK_VECTOR_LABELS(kCompareSGTEQ); - CHECK_VECTOR_LABELS(kCompareSLT); - CHECK_VECTOR_LABELS(kCompareSLTEQ); - CHECK_VECTOR_LABELS(kCompareUGT); - CHECK_VECTOR_LABELS(kCompareUGTEQ); - CHECK_VECTOR_LABELS(kCompareULT); - CHECK_VECTOR_LABELS(kCompareULTEQ); - CHECK_VECTOR_LABELS(kConvertFtoI); - CHECK_VECTOR_LABELS(kConvertStoF); - CHECK_VECTOR_LABELS(kConvertUtoF); - CHECK_VECTOR_LABELS(kCos); - CHECK_VECTOR_MATRIX_LABELS(kDivideF); - CHECK_VECTOR_LABELS(kDivideS); - CHECK_VECTOR_LABELS(kDivideU); - CHECK_VECTOR_MATRIX_LABELS(kDup); - CHECK_LABEL(kInverse2x2); - CHECK_LABEL(kInverse3x3); - CHECK_LABEL(kInverse4x4); - CHECK_VECTOR_LABELS(kLoad); - CHECK_VECTOR_LABELS(kLoadGlobal); - CHECK_VECTOR_LABELS(kLoadUniform); - CHECK_LABEL(kLoadSwizzle); - CHECK_LABEL(kLoadSwizzleGlobal); - CHECK_LABEL(kLoadSwizzleUniform); - CHECK_LABEL(kLoadExtended); - CHECK_LABEL(kLoadExtendedGlobal); - CHECK_LABEL(kLoadExtendedUniform); - CHECK_LABEL(kMatrixToMatrix); - CHECK_LABEL(kMatrixMultiply); - CHECK_VECTOR_MATRIX_LABELS(kNegateF); - CHECK_VECTOR_LABELS(kNegateI); - CHECK_VECTOR_MATRIX_LABELS(kMultiplyF); - CHECK_VECTOR_LABELS(kMultiplyI); - CHECK_LABEL(kNotB); - CHECK_LABEL(kOrB); - CHECK_VECTOR_MATRIX_LABELS(kPop); - CHECK_LABEL(kPushImmediate); - CHECK_VECTOR_LABELS(kReadExternal); - CHECK_VECTOR_LABELS(kRemainderF); - CHECK_VECTOR_LABELS(kRemainderS); - CHECK_VECTOR_LABELS(kRemainderU); - CHECK_LABEL(kReserve); - CHECK_LABEL(kReturn); - CHECK_LABEL(kScalarToMatrix); - CHECK_LABEL(kShiftLeft); - CHECK_LABEL(kShiftRightS); - CHECK_LABEL(kShiftRightU); - CHECK_VECTOR_LABELS(kSin); - CHECK_VECTOR_LABELS(kSqrt); - CHECK_VECTOR_LABELS(kStore); - CHECK_VECTOR_LABELS(kStoreGlobal); - CHECK_LABEL(kStoreExtended); - CHECK_LABEL(kStoreExtendedGlobal); - CHECK_LABEL(kStoreSwizzle); - CHECK_LABEL(kStoreSwizzleGlobal); - CHECK_LABEL(kStoreSwizzleIndirect); - CHECK_LABEL(kStoreSwizzleIndirectGlobal); - CHECK_LABEL(kSwizzle); - CHECK_VECTOR_MATRIX_LABELS(kSubtractF); - CHECK_VECTOR_LABELS(kSubtractI); - CHECK_VECTOR_LABELS(kTan); - CHECK_VECTOR_LABELS(kWriteExternal); - CHECK_LABEL(kXorB); - CHECK_LABEL(kMaskPush); - CHECK_LABEL(kMaskPop); - CHECK_LABEL(kMaskNegate); - CHECK_LABEL(kMaskBlend); - CHECK_LABEL(kBranchIfAllFalse); - CHECK_LABEL(kLoopBegin); - CHECK_LABEL(kLoopNext); - CHECK_LABEL(kLoopMask); - CHECK_LABEL(kLoopEnd); - CHECK_LABEL(kLoopBreak); - CHECK_LABEL(kLoopContinue); - f->fPreprocessOnce([f] { ((ByteCodeFunction*)f)->preprocess(labels); }); -#endif - - // Needs to be the first N non-negative integers, at least as large as VecWidth - static const Interpreter::I32 gLanes = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - }; - - VValue* sp = stack + f->fParameterCount + f->fLocalCount - 1; - - #define POP() (*(sp--)) - #define PUSH(v) (sp[1] = v, ++sp) - - const uint8_t* code = f->fCode.data(); - const uint8_t* ip = code; - std::vector frames; - - I32 condStack[16]; // Independent condition masks - I32 maskStack[16]; // Combined masks (eg maskStack[0] & maskStack[1] & ...) - I32 contStack[16]; // Continue flags for loops - I32 loopStack[16]; // Loop execution masks - condStack[0] = maskStack[0] = (gLanes < N); - contStack[0] = I32( 0); - loopStack[0] = I32(~0); - I32* condPtr = condStack; - I32* maskPtr = maskStack; - I32* contPtr = contStack; - I32* loopPtr = loopStack; - - if (f->fConditionCount + 1 > (int)SK_ARRAY_COUNT(condStack) || - f->fLoopCount + 1 > (int)SK_ARRAY_COUNT(loopStack)) { - return false; - } - - auto mask = [&]() { return *maskPtr & *loopPtr; }; - -#ifdef SKSLC_THREADED_CODE - // If the "labels as values" extension is available, we implement this using threaded code. - // Instead of opcodes, the code directly contains the addresses of the labels to jump to. Then - // the code for each opcode simply grabs the address of the next opcode and uses a goto to jump - // there. - NEXT(); -#else - // Otherwise, we have to use a switch statement and a loop to execute the right label. - for (;;) { - #ifdef TRACE - printf("at %3d ", (int) (ip - code)); - disassemble_instruction(ip); - printf(" (stack: %d)\n", (int) (sp - stack) + 1); - #endif - switch ((ByteCodeInstruction) READ16()) { -#endif - - VECTOR_MATRIX_BINARY_OP(kAddF, fFloat, +) - VECTOR_BINARY_OP(kAddI, fSigned, +) - - // Booleans are integer masks: 0/~0 for false/true. So bitwise ops do what we want: - LABEL(kAndB) - sp[-1] = sp[-1].fSigned & sp[0].fSigned; - POP(); - NEXT(); - LABEL(kNotB) - sp[0] = ~sp[0].fSigned; - NEXT(); - LABEL(kOrB) - sp[-1] = sp[-1].fSigned | sp[0].fSigned; - POP(); - NEXT(); - LABEL(kXorB) - sp[-1] = sp[-1].fSigned ^ sp[0].fSigned; - POP(); - NEXT(); - - LABEL(kBranch) - ip = code + READ16(); - NEXT(); - - LABEL(kCall) { - // Precursor code reserved space for the return value, and pushed all parameters to - // the stack. Update our bottom of stack to point at the first parameter, and our - // sp to point past those parameters (plus space for locals). - int target = READ8(); - const ByteCodeFunction* fun = byteCode->fFunctions[target].get(); -#ifdef SKSLC_THREADED_CODE - fun->fPreprocessOnce([fun] { ((ByteCodeFunction*)fun)->preprocess(labels); }); -#endif - if (skvx::any(mask())) { - frames.push_back({ code, ip, stack, fun->fParameterCount }); - ip = code = fun->fCode.data(); - stack = sp - fun->fParameterCount + 1; - sp = stack + fun->fParameterCount + fun->fLocalCount - 1; - } - NEXT(); - } - - LABEL(kCallExternal) { - CallExternal(byteCode, ip, sp, baseIndex, mask()); - NEXT(); - } - - LABEL(kClampIndex) { - int length = READ8(); - if (skvx::any(mask() & ((sp[0].fSigned < 0) | (sp[0].fSigned >= length)))) { - return false; - } - NEXT(); - } - - VECTOR_BINARY_OP(kCompareIEQ, fSigned, ==) - VECTOR_MATRIX_BINARY_OP(kCompareFEQ, fFloat, ==) - VECTOR_BINARY_OP(kCompareINEQ, fSigned, !=) - VECTOR_MATRIX_BINARY_OP(kCompareFNEQ, fFloat, !=) - VECTOR_BINARY_OP(kCompareSGT, fSigned, >) - VECTOR_BINARY_OP(kCompareUGT, fUnsigned, >) - VECTOR_BINARY_OP(kCompareFGT, fFloat, >) - VECTOR_BINARY_OP(kCompareSGTEQ, fSigned, >=) - VECTOR_BINARY_OP(kCompareUGTEQ, fUnsigned, >=) - VECTOR_BINARY_OP(kCompareFGTEQ, fFloat, >=) - VECTOR_BINARY_OP(kCompareSLT, fSigned, <) - VECTOR_BINARY_OP(kCompareULT, fUnsigned, <) - VECTOR_BINARY_OP(kCompareFLT, fFloat, <) - VECTOR_BINARY_OP(kCompareSLTEQ, fSigned, <=) - VECTOR_BINARY_OP(kCompareULTEQ, fUnsigned, <=) - VECTOR_BINARY_OP(kCompareFLTEQ, fFloat, <=) - - LABEL(kConvertFtoI4) sp[-3] = skvx::cast(sp[-3].fFloat); - LABEL(kConvertFtoI3) sp[-2] = skvx::cast(sp[-2].fFloat); - LABEL(kConvertFtoI2) sp[-1] = skvx::cast(sp[-1].fFloat); - LABEL(kConvertFtoI) sp[ 0] = skvx::cast(sp[ 0].fFloat); - NEXT(); - - LABEL(kConvertStoF4) sp[-3] = skvx::cast(sp[-3].fSigned); - LABEL(kConvertStoF3) sp[-2] = skvx::cast(sp[-2].fSigned); - LABEL(kConvertStoF2) sp[-1] = skvx::cast(sp[-1].fSigned); - LABEL(kConvertStoF) sp[ 0] = skvx::cast(sp[ 0].fSigned); - NEXT(); - - LABEL(kConvertUtoF4) sp[-3] = skvx::cast(sp[-3].fUnsigned); - LABEL(kConvertUtoF3) sp[-2] = skvx::cast(sp[-2].fUnsigned); - LABEL(kConvertUtoF2) sp[-1] = skvx::cast(sp[-1].fUnsigned); - LABEL(kConvertUtoF) sp[ 0] = skvx::cast(sp[ 0].fUnsigned); - NEXT(); - - VECTOR_UNARY_FN_VEC(kCos, cosf) - - VECTOR_BINARY_MASKED_OP(kDivideS, fSigned, /) - VECTOR_BINARY_MASKED_OP(kDivideU, fUnsigned, /) - VECTOR_MATRIX_BINARY_OP(kDivideF, fFloat, /) - - LABEL(kDup4) PUSH(sp[1 - ip[0]]); - LABEL(kDup3) PUSH(sp[1 - ip[0]]); - LABEL(kDup2) PUSH(sp[1 - ip[0]]); - LABEL(kDup) PUSH(sp[1 - ip[0]]); - ++ip; - NEXT(); - - LABEL(kDupN) { - int count = READ8(); - memcpy(sp + 1, sp - count + 1, count * sizeof(VValue)); - sp += count; - NEXT(); - } - - LABEL(kInverse2x2) { - Inverse2x2(sp); - NEXT(); - } - LABEL(kInverse3x3) { - Inverse3x3(sp); - NEXT(); - } - LABEL(kInverse4x4) { - Inverse4x4(sp); - NEXT(); - } - - LABEL(kLoad4) sp[4] = stack[ip[1] + 3]; - LABEL(kLoad3) sp[3] = stack[ip[1] + 2]; - LABEL(kLoad2) sp[2] = stack[ip[1] + 1]; - LABEL(kLoad) sp[1] = stack[ip[1] + 0]; - sp += ip[0]; - ip += 2; - NEXT(); - - LABEL(kLoadGlobal4) sp[4] = globals[ip[1] + 3]; - LABEL(kLoadGlobal3) sp[3] = globals[ip[1] + 2]; - LABEL(kLoadGlobal2) sp[2] = globals[ip[1] + 1]; - LABEL(kLoadGlobal) sp[1] = globals[ip[1] + 0]; - sp += ip[0]; - ip += 2; - NEXT(); - - LABEL(kLoadUniform4) sp[4].fFloat = uniforms[ip[1] + 3]; - LABEL(kLoadUniform3) sp[3].fFloat = uniforms[ip[1] + 2]; - LABEL(kLoadUniform2) sp[2].fFloat = uniforms[ip[1] + 1]; - LABEL(kLoadUniform) sp[1].fFloat = uniforms[ip[1] + 0]; - sp += ip[0]; - ip += 2; - NEXT(); - - LABEL(kLoadExtended) { - int count = READ8(); - I32 src = POP().fSigned; - I32 m = mask(); - for (int i = 0; i < count; ++i) { - for (int j = 0; j < VecWidth; ++j) { - if (m[j]) { - sp[i + 1].fSigned[j] = stack[src[j] + i].fSigned[j]; - } - } - } - sp += count; - NEXT(); - } - - LABEL(kLoadExtendedGlobal) { - int count = READ8(); - I32 src = POP().fSigned; - I32 m = mask(); - for (int i = 0; i < count; ++i) { - for (int j = 0; j < VecWidth; ++j) { - if (m[j]) { - sp[i + 1].fSigned[j] = globals[src[j] + i].fSigned[j]; - } - } - } - sp += count; - NEXT(); - } - - LABEL(kLoadExtendedUniform) { - int count = READ8(); - I32 src = POP().fSigned; - I32 m = mask(); - for (int i = 0; i < count; ++i) { - for (int j = 0; j < VecWidth; ++j) { - if (m[j]) { - sp[i + 1].fFloat[j] = uniforms[src[j] + i]; - } - } - } - sp += count; - NEXT(); - } - - LABEL(kLoadSwizzle) { - int src = READ8(); - int count = READ8(); - for (int i = 0; i < count; ++i) { - PUSH(stack[src + *(ip + i)]); - } - ip += count; - NEXT(); - } - - LABEL(kLoadSwizzleGlobal) { - int src = READ8(); - int count = READ8(); - for (int i = 0; i < count; ++i) { - PUSH(globals[src + *(ip + i)]); - } - ip += count; - NEXT(); - } - - LABEL(kLoadSwizzleUniform) { - int src = READ8(); - int count = READ8(); - for (int i = 0; i < count; ++i) { - PUSH(F32(uniforms[src + *(ip + i)])); - } - ip += count; - NEXT(); - } - - LABEL(kMatrixToMatrix) { - int srcCols = READ8(); - int srcRows = READ8(); - int dstCols = READ8(); - int dstRows = READ8(); - SkASSERT(srcCols >= 2 && srcCols <= 4); - SkASSERT(srcRows >= 2 && srcRows <= 4); - SkASSERT(dstCols >= 2 && dstCols <= 4); - SkASSERT(dstRows >= 2 && dstRows <= 4); - F32 tmp[16]; - memset(tmp, 0, sizeof(tmp)); - tmp[0] = tmp[5] = tmp[10] = tmp[15] = F32(1.0f); - for (int c = srcCols - 1; c >= 0; --c) { - for (int r = srcRows - 1; r >= 0; --r) { - tmp[c*4 + r] = POP().fFloat; - } - } - for (int c = 0; c < dstCols; ++c) { - for (int r = 0; r < dstRows; ++r) { - PUSH(tmp[c*4 + r]); - } - } - NEXT(); - } - - LABEL(kMatrixMultiply) { - int lCols = READ8(); - int lRows = READ8(); - int rCols = READ8(); - int rRows = lCols; - F32 tmp[16] = { 0.0f }; - F32* B = &(sp - (rCols * rRows) + 1)->fFloat; - F32* A = B - (lCols * lRows); - for (int c = 0; c < rCols; ++c) { - for (int r = 0; r < lRows; ++r) { - for (int j = 0; j < lCols; ++j) { - tmp[c*lRows + r] += A[j*lRows + r] * B[c*rRows + j]; - } - } - } - sp -= (lCols * lRows) + (rCols * rRows); - memcpy(sp + 1, tmp, rCols * lRows * sizeof(VValue)); - sp += (rCols * lRows); - NEXT(); - } - - VECTOR_BINARY_OP(kMultiplyI, fSigned, *) - VECTOR_MATRIX_BINARY_OP(kMultiplyF, fFloat, *) - - LABEL(kNegateF4) sp[-3] = -sp[-3].fFloat; - LABEL(kNegateF3) sp[-2] = -sp[-2].fFloat; - LABEL(kNegateF2) sp[-1] = -sp[-1].fFloat; - LABEL(kNegateF) sp[ 0] = -sp[ 0].fFloat; - NEXT(); - - LABEL(kNegateFN) { - int count = READ8(); - for (int i = count - 1; i >= 0; --i) { - sp[-i] = -sp[-i].fFloat; - } - NEXT(); - } - - LABEL(kNegateI4) sp[-3] = -sp[-3].fSigned; - LABEL(kNegateI3) sp[-2] = -sp[-2].fSigned; - LABEL(kNegateI2) sp[-1] = -sp[-1].fSigned; - LABEL(kNegateI) sp[ 0] = -sp[ 0].fSigned; - NEXT(); - - LABEL(kPop4) POP(); - LABEL(kPop3) POP(); - LABEL(kPop2) POP(); - LABEL(kPop) POP(); - NEXT(); - - LABEL(kPopN) - sp -= READ8(); - NEXT(); - - LABEL(kPushImmediate) - PUSH(U32(READ32())); - NEXT(); - - LABEL(kReadExternal) - LABEL(kReadExternal2) - LABEL(kReadExternal3) - LABEL(kReadExternal4) { - int count = READ8(); - int src = READ8(); - float tmp[4]; - I32 m = mask(); - for (int i = 0; i < VecWidth; ++i) { - if (m[i]) { - byteCode->fExternalValues[src]->read(baseIndex + i, tmp); - for (int j = 0; j < count; ++j) { - sp[j + 1].fFloat[i] = tmp[j]; - } - } - } - sp += count; - NEXT(); - } - - VECTOR_BINARY_FN(kRemainderF, fFloat, VecMod) - VECTOR_BINARY_MASKED_OP(kRemainderS, fSigned, %) - VECTOR_BINARY_MASKED_OP(kRemainderU, fUnsigned, %) - - LABEL(kReserve) - sp += READ8(); - NEXT(); - - LABEL(kReturn) { - int count = READ8(); - if (frames.empty()) { - if (outReturn) { - VValue* src = sp - count + 1; - if (stripedOutput) { - for (int i = 0; i < count; ++i) { - memcpy(outReturn[i], &src->fFloat, N * sizeof(float)); - ++src; - } - } else { - float* outPtr = outReturn[0]; - for (int i = 0; i < count; ++i) { - for (int j = 0; j < N; ++j) { - outPtr[count * j] = src->fFloat[j]; - } - ++outPtr; - ++src; - } - } - } - return true; - } else { - // When we were called, the caller reserved stack space for their copy of our - // return value, then 'stack' was positioned after that, where our parameters - // were placed. Copy our return values to their reserved area. - memcpy(stack - count, sp - count + 1, count * sizeof(VValue)); - - // Now move the stack pointer to the end of the passed-in parameters. This odd - // calling convention requires the caller to pop the arguments after calling, - // but allows them to store any out-parameters back during that unwinding. - // After that sequence finishes, the return value will be the top of the stack. - const StackFrame& frame(frames.back()); - sp = stack + frame.fParameterCount - 1; - stack = frame.fStack; - code = frame.fCode; - ip = frame.fIP; - frames.pop_back(); - NEXT(); - } - } - - LABEL(kScalarToMatrix) { - int cols = READ8(); - int rows = READ8(); - VValue v = POP(); - for (int c = 0; c < cols; ++c) { - for (int r = 0; r < rows; ++r) { - PUSH(c == r ? v : F32(0.0f)); - } - } - NEXT(); - } - - LABEL(kShiftLeft) - sp[0] = sp[0].fSigned << READ8(); - NEXT(); - LABEL(kShiftRightS) - sp[0] = sp[0].fSigned >> READ8(); - NEXT(); - LABEL(kShiftRightU) - sp[0] = sp[0].fUnsigned >> READ8(); - NEXT(); - - VECTOR_UNARY_FN_VEC(kSin, sinf) - VECTOR_UNARY_FN(kSqrt, skvx::sqrt, fFloat) - - LABEL(kStore4) - stack[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+3].fFloat); - LABEL(kStore3) - stack[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+2].fFloat); - LABEL(kStore2) - stack[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+1].fFloat); - LABEL(kStore) - stack[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, stack[*ip+0].fFloat); - ++ip; - NEXT(); - - LABEL(kStoreGlobal4) - globals[*ip+3] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+3].fFloat); - LABEL(kStoreGlobal3) - globals[*ip+2] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+2].fFloat); - LABEL(kStoreGlobal2) - globals[*ip+1] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+1].fFloat); - LABEL(kStoreGlobal) - globals[*ip+0] = skvx::if_then_else(mask(), POP().fFloat, globals[*ip+0].fFloat); - ++ip; - NEXT(); - - LABEL(kStoreExtended) { - int count = READ8(); - I32 target = POP().fSigned; - VValue* src = sp - count + 1; - I32 m = mask(); - for (int i = 0; i < count; ++i) { - for (int j = 0; j < VecWidth; ++j) { - if (m[j]) { - stack[target[j] + i].fSigned[j] = src[i].fSigned[j]; - } - } - } - sp -= count; - NEXT(); - } - LABEL(kStoreExtendedGlobal) { - int count = READ8(); - I32 target = POP().fSigned; - VValue* src = sp - count + 1; - I32 m = mask(); - for (int i = 0; i < count; ++i) { - for (int j = 0; j < VecWidth; ++j) { - if (m[j]) { - globals[target[j] + i].fSigned[j] = src[i].fSigned[j]; - } - } - } - sp -= count; - NEXT(); - } - - LABEL(kStoreSwizzle) { - int target = READ8(); - int count = READ8(); - for (int i = count - 1; i >= 0; --i) { - stack[target + *(ip + i)] = skvx::if_then_else( - mask(), POP().fFloat, stack[target + *(ip + i)].fFloat); - } - ip += count; - NEXT(); - } - - LABEL(kStoreSwizzleGlobal) { - int target = READ8(); - int count = READ8(); - for (int i = count - 1; i >= 0; --i) { - globals[target + *(ip + i)] = skvx::if_then_else( - mask(), POP().fFloat, globals[target + *(ip + i)].fFloat); - } - ip += count; - NEXT(); - } - - LABEL(kStoreSwizzleIndirect) { - int count = READ8(); - I32 target = POP().fSigned; - I32 m = mask(); - for (int i = count - 1; i >= 0; --i) { - I32 v = POP().fSigned; - for (int j = 0; j < VecWidth; ++j) { - if (m[j]) { - stack[target[j] + *(ip + i)].fSigned[j] = v[j]; - } - } - } - ip += count; - NEXT(); - } - - LABEL(kStoreSwizzleIndirectGlobal) { - int count = READ8(); - I32 target = POP().fSigned; - I32 m = mask(); - for (int i = count - 1; i >= 0; --i) { - I32 v = POP().fSigned; - for (int j = 0; j < VecWidth; ++j) { - if (m[j]) { - globals[target[j] + *(ip + i)].fSigned[j] = v[j]; - } - } - } - ip += count; - NEXT(); - } - - VECTOR_BINARY_OP(kSubtractI, fSigned, -) - VECTOR_MATRIX_BINARY_OP(kSubtractF, fFloat, -) - - LABEL(kSwizzle) { - VValue tmp[4]; - for (int i = READ8() - 1; i >= 0; --i) { - tmp[i] = POP(); - } - for (int i = READ8() - 1; i >= 0; --i) { - PUSH(tmp[READ8()]); - } - NEXT(); - } - - VECTOR_UNARY_FN_VEC(kTan, tanf) - - LABEL(kWriteExternal4) - LABEL(kWriteExternal3) - LABEL(kWriteExternal2) - LABEL(kWriteExternal) { - int count = READ8(); - int target = READ8(); - float tmp[4]; - I32 m = mask(); - sp -= count; - for (int i = 0; i < VecWidth; ++i) { - if (m[i]) { - for (int j = 0; j < count; ++j) { - tmp[j] = sp[j + 1].fFloat[i]; - } - byteCode->fExternalValues[target]->write(baseIndex + i, tmp); - } - } - NEXT(); - } - - LABEL(kMaskPush) - condPtr[1] = POP().fSigned; - maskPtr[1] = maskPtr[0] & condPtr[1]; - ++condPtr; ++maskPtr; - NEXT(); - LABEL(kMaskPop) - --condPtr; --maskPtr; - NEXT(); - LABEL(kMaskNegate) - maskPtr[0] = maskPtr[-1] & ~condPtr[0]; - NEXT(); - LABEL(kMaskBlend) { - int count = READ8(); - I32 m = condPtr[0]; - --condPtr; --maskPtr; - for (int i = 0; i < count; ++i) { - sp[-count] = skvx::if_then_else(m, sp[-count].fFloat, sp[0].fFloat); - --sp; - } - NEXT(); - } - LABEL(kBranchIfAllFalse) { - int target = READ16(); - if (!skvx::any(mask())) { - ip = code + target; - } - NEXT(); - } - - LABEL(kLoopBegin) - contPtr[1] = 0; - loopPtr[1] = loopPtr[0]; - ++contPtr; ++loopPtr; - NEXT(); - LABEL(kLoopNext) - *loopPtr |= *contPtr; - *contPtr = 0; - NEXT(); - LABEL(kLoopMask) - *loopPtr &= POP().fSigned; - NEXT(); - LABEL(kLoopEnd) - --contPtr; --loopPtr; - NEXT(); - LABEL(kLoopBreak) - *loopPtr &= ~mask(); - NEXT(); - LABEL(kLoopContinue) { - I32 m = mask(); - *contPtr |= m; - *loopPtr &= ~m; - NEXT(); - } -#ifdef SKSLC_THREADED_CODE - #ifdef TRACE - next: - printf("at %3d (stack: %d) (disable threaded code for disassembly)\n", - (int) (ip - code), (int) (sp - stack) + 1); - goto *READ_INST(); - #endif -#else - } - } -#endif -} - -}; // class Interpreter - -#endif // SK_ENABLE_SKSL_INTERPRETER - -#undef spf - -void ByteCodeFunction::disassemble() const { -#if defined(SK_ENABLE_SKSL_INTERPRETER) - const uint8_t* ip = fCode.data(); - while (ip < fCode.data() + fCode.size()) { - printf("%d: ", (int)(ip - fCode.data())); - ip = Interpreter::DisassembleInstruction(ip); - printf("\n"); - } -#endif -} - -#define VECTOR_PREPROCESS(base) \ - case ByteCodeInstruction::base ## 4: \ - case ByteCodeInstruction::base ## 3: \ - case ByteCodeInstruction::base ## 2: \ - case ByteCodeInstruction::base: READ8(); break; - -#define VECTOR_PREPROCESS_NO_COUNT(base) \ - case ByteCodeInstruction::base ## 4: \ - case ByteCodeInstruction::base ## 3: \ - case ByteCodeInstruction::base ## 2: \ - case ByteCodeInstruction::base: break; - -#define VECTOR_MATRIX_PREPROCESS(base) \ - VECTOR_PREPROCESS(base) \ - case ByteCodeInstruction::base ## N: READ8(); break; - -#define VECTOR_MATRIX_PREPROCESS_NO_COUNT(base) \ - VECTOR_PREPROCESS_NO_COUNT(base) \ - case ByteCodeInstruction::base ## N: READ8(); break; - -void ByteCodeFunction::preprocess(const void* labels[]) { -#if defined(SK_ENABLE_SKSL_INTERPRETER) -#ifdef TRACE - this->disassemble(); -#endif - uint8_t* ip = fCode.data(); - while (ip < fCode.data() + fCode.size()) { - ByteCodeInstruction inst = (ByteCodeInstruction) (intptr_t) READ_INST(); - const void* label = labels[(int) inst]; - memcpy(ip - sizeof(instruction), &label, sizeof(label)); - switch (inst) { - VECTOR_MATRIX_PREPROCESS(kAddF) - VECTOR_PREPROCESS(kAddI) - case ByteCodeInstruction::kAndB: break; - case ByteCodeInstruction::kBranch: READ16(); break; - case ByteCodeInstruction::kCall: READ8(); break; - case ByteCodeInstruction::kCallExternal: { - READ8(); - READ8(); - READ8(); - break; - } - case ByteCodeInstruction::kClampIndex: READ8(); break; - VECTOR_PREPROCESS(kCompareIEQ) - VECTOR_PREPROCESS(kCompareINEQ) - VECTOR_MATRIX_PREPROCESS(kCompareFEQ) - VECTOR_MATRIX_PREPROCESS(kCompareFNEQ) - VECTOR_PREPROCESS(kCompareFGT) - VECTOR_PREPROCESS(kCompareFGTEQ) - VECTOR_PREPROCESS(kCompareFLT) - VECTOR_PREPROCESS(kCompareFLTEQ) - VECTOR_PREPROCESS(kCompareSGT) - VECTOR_PREPROCESS(kCompareSGTEQ) - VECTOR_PREPROCESS(kCompareSLT) - VECTOR_PREPROCESS(kCompareSLTEQ) - VECTOR_PREPROCESS(kCompareUGT) - VECTOR_PREPROCESS(kCompareUGTEQ) - VECTOR_PREPROCESS(kCompareULT) - VECTOR_PREPROCESS(kCompareULTEQ) - VECTOR_PREPROCESS_NO_COUNT(kConvertFtoI) - VECTOR_PREPROCESS_NO_COUNT(kConvertStoF) - VECTOR_PREPROCESS_NO_COUNT(kConvertUtoF) - VECTOR_PREPROCESS(kCos) - VECTOR_MATRIX_PREPROCESS(kDivideF) - VECTOR_PREPROCESS(kDivideS) - VECTOR_PREPROCESS(kDivideU) - VECTOR_MATRIX_PREPROCESS(kDup) - - case ByteCodeInstruction::kInverse2x2: - case ByteCodeInstruction::kInverse3x3: - case ByteCodeInstruction::kInverse4x4: break; - - case ByteCodeInstruction::kLoad: - case ByteCodeInstruction::kLoad2: - case ByteCodeInstruction::kLoad3: - case ByteCodeInstruction::kLoad4: - case ByteCodeInstruction::kLoadGlobal: - case ByteCodeInstruction::kLoadGlobal2: - case ByteCodeInstruction::kLoadGlobal3: - case ByteCodeInstruction::kLoadGlobal4: - case ByteCodeInstruction::kLoadUniform: - case ByteCodeInstruction::kLoadUniform2: - case ByteCodeInstruction::kLoadUniform3: - case ByteCodeInstruction::kLoadUniform4: READ16(); break; - - case ByteCodeInstruction::kLoadSwizzle: - case ByteCodeInstruction::kLoadSwizzleGlobal: - case ByteCodeInstruction::kLoadSwizzleUniform: { - READ8(); - int count = READ8(); - ip += count; - break; - } - - case ByteCodeInstruction::kLoadExtended: - case ByteCodeInstruction::kLoadExtendedGlobal: - case ByteCodeInstruction::kLoadExtendedUniform: - READ8(); - break; - - case ByteCodeInstruction::kMatrixToMatrix: { - READ8(); - READ8(); - READ8(); - READ8(); - break; - } - case ByteCodeInstruction::kMatrixMultiply: { - READ8(); - READ8(); - READ8(); - break; - } - VECTOR_MATRIX_PREPROCESS(kMultiplyF) - VECTOR_PREPROCESS(kMultiplyI) - VECTOR_MATRIX_PREPROCESS_NO_COUNT(kNegateF) - VECTOR_PREPROCESS_NO_COUNT(kNegateI) - case ByteCodeInstruction::kNotB: break; - case ByteCodeInstruction::kOrB: break; - VECTOR_MATRIX_PREPROCESS_NO_COUNT(kPop) - case ByteCodeInstruction::kPushImmediate: READ32(); break; - - case ByteCodeInstruction::kReadExternal: - case ByteCodeInstruction::kReadExternal2: - case ByteCodeInstruction::kReadExternal3: - case ByteCodeInstruction::kReadExternal4: READ16(); break; - - VECTOR_PREPROCESS(kRemainderF) - VECTOR_PREPROCESS(kRemainderS) - VECTOR_PREPROCESS(kRemainderU) - case ByteCodeInstruction::kReserve: READ8(); break; - case ByteCodeInstruction::kReturn: READ8(); break; - case ByteCodeInstruction::kScalarToMatrix: READ8(); READ8(); break; - case ByteCodeInstruction::kShiftLeft: READ8(); break; - case ByteCodeInstruction::kShiftRightS: READ8(); break; - case ByteCodeInstruction::kShiftRightU: READ8(); break; - VECTOR_PREPROCESS(kSin) - VECTOR_PREPROCESS_NO_COUNT(kSqrt) - - case ByteCodeInstruction::kStore: - case ByteCodeInstruction::kStore2: - case ByteCodeInstruction::kStore3: - case ByteCodeInstruction::kStore4: - case ByteCodeInstruction::kStoreGlobal: - case ByteCodeInstruction::kStoreGlobal2: - case ByteCodeInstruction::kStoreGlobal3: - case ByteCodeInstruction::kStoreGlobal4: READ8(); break; - - case ByteCodeInstruction::kStoreSwizzle: - case ByteCodeInstruction::kStoreSwizzleGlobal: { - READ8(); - int count = READ8(); - ip += count; - break; - } - - case ByteCodeInstruction::kStoreSwizzleIndirect: - case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: { - int count = READ8(); - ip += count; - break; - } - - case ByteCodeInstruction::kStoreExtended: READ8(); break; - case ByteCodeInstruction::kStoreExtendedGlobal: READ8(); break; - - VECTOR_MATRIX_PREPROCESS(kSubtractF) - VECTOR_PREPROCESS(kSubtractI) - - case ByteCodeInstruction::kSwizzle: { - READ8(); - int count = READ8(); - ip += count; - break; - } - VECTOR_PREPROCESS(kTan) - case ByteCodeInstruction::kWriteExternal: - case ByteCodeInstruction::kWriteExternal2: - case ByteCodeInstruction::kWriteExternal3: - case ByteCodeInstruction::kWriteExternal4: READ16(); break; - - case ByteCodeInstruction::kXorB: break; - case ByteCodeInstruction::kMaskPush: break; - case ByteCodeInstruction::kMaskPop: break; - case ByteCodeInstruction::kMaskNegate: break; - case ByteCodeInstruction::kMaskBlend: READ8(); break; - case ByteCodeInstruction::kBranchIfAllFalse: READ16(); break; - case ByteCodeInstruction::kLoopBegin: break; - case ByteCodeInstruction::kLoopNext: break; - case ByteCodeInstruction::kLoopMask: break; - case ByteCodeInstruction::kLoopEnd: break; - case ByteCodeInstruction::kLoopContinue: break; - case ByteCodeInstruction::kLoopBreak: break; - default: - ip -= 2; - printf("unknown(%d)\n", READ16()); - SkASSERT(false); - } - } -#endif -} - -bool ByteCode::run(const ByteCodeFunction* f, - float* args, int argCount, - float* outReturn, int returnCount, - const float* uniforms, int uniformCount) const { -#if defined(SK_ENABLE_SKSL_INTERPRETER) - Interpreter::VValue stack[128]; - int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount; - if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) { - return false; - } - - if (argCount != f->fParameterCount || - returnCount != f->fReturnCount || - uniformCount != fUniformSlotCount) { - return false; - } - - Interpreter::VValue globals[32]; - if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) { - return false; - } - - // Transpose args into stack - { - float* src = args; - float* dst = (float*)stack; - for (int i = 0; i < argCount; ++i) { - *dst = *src++; - dst += VecWidth; - } - } - - bool stripedOutput = false; - float** outArray = outReturn ? &outReturn : nullptr; - if (!Interpreter::InnerRun(this, f, stack, outArray, globals, uniforms, stripedOutput, 1, 0)) { - return false; - } - - // Transpose out parameters back - { - float* dst = args; - float* src = (float*)stack; - for (const auto& p : f->fParameters) { - if (p.fIsOutParameter) { - for (int i = p.fSlotCount; i > 0; --i) { - *dst++ = *src; - src += VecWidth; - } - } else { - dst += p.fSlotCount; - src += p.fSlotCount * VecWidth; - } - } - } - - return true; -#else - SkDEBUGFAIL("ByteCode interpreter not enabled"); - return false; -#endif -} - -bool ByteCode::runStriped(const ByteCodeFunction* f, int N, - float* args[], int argCount, - float* outReturn[], int returnCount, - const float* uniforms, int uniformCount) const { -#if defined(SK_ENABLE_SKSL_INTERPRETER) - Interpreter::VValue stack[128]; - int stackNeeded = f->fParameterCount + f->fLocalCount + f->fStackCount; - if (stackNeeded > (int)SK_ARRAY_COUNT(stack)) { - return false; - } - - if (argCount != f->fParameterCount || - returnCount != f->fReturnCount || - uniformCount != fUniformSlotCount) { - return false; - } - - Interpreter::VValue globals[32]; - if (fGlobalSlotCount > (int)SK_ARRAY_COUNT(globals)) { - return false; - } - - // innerRun just takes outArgs, so clear it if the count is zero - if (returnCount == 0) { - outReturn = nullptr; - } - - int baseIndex = 0; - - while (N) { - int w = std::min(N, VecWidth); - - // Copy args into stack - for (int i = 0; i < argCount; ++i) { - memcpy((void*)(stack + i), args[i], w * sizeof(float)); - } - - bool stripedOutput = true; - if (!Interpreter::InnerRun(this, f, stack, outReturn, globals, uniforms, stripedOutput, w, - baseIndex)) { - return false; - } - - // Copy out parameters back - int slot = 0; - for (const auto& p : f->fParameters) { - if (p.fIsOutParameter) { - for (int i = slot; i < slot + p.fSlotCount; ++i) { - memcpy(args[i], stack + i, w * sizeof(float)); - } - } - slot += p.fSlotCount; - } - - // Step each argument pointer ahead - for (int i = 0; i < argCount; ++i) { - args[i] += w; - } - N -= w; - baseIndex += w; - } - - return true; -#else - SkDEBUGFAIL("ByteCode interpreter not enabled"); - return false; -#endif -} - -} // namespace SkSL - -#endif diff --git a/src/sksl/SkSLByteCode.h b/src/sksl/SkSLByteCode.h index f917eec49d..adc4a0bb5f 100644 --- a/src/sksl/SkSLByteCode.h +++ b/src/sksl/SkSLByteCode.h @@ -9,206 +9,59 @@ #define SKSL_BYTECODE #include "include/private/SkOnce.h" +#include "include/private/SkVx.h" #include "src/sksl/SkSLString.h" +#include "src/sksl/ir/SkSLFunctionDeclaration.h" #include #include namespace SkSL { -class ExternalValue; -struct FunctionDeclaration; - -// GCC and Clang support the "labels as values" extension which we need to implement the interpreter -// using threaded code. Otherwise, we fall back to using a switch statement in a for loop. -#if defined(__GNUC__) || defined(__clang__) - #define SKSLC_THREADED_CODE - using instruction = void*; -#else - using instruction = uint16_t; -#endif - -#define VECTOR(name) name ## 4, name ## 3, name ## 2, name -#define VECTOR_MATRIX(name) name ## 4, name ## 3, name ## 2, name, name ## N - -enum class ByteCodeInstruction : uint16_t { - // B = bool, F = float, I = int, S = signed, U = unsigned - // All binary VECTOR instructions (kAddF, KSubtractI, kCompareIEQ, etc.) are followed by a byte - // indicating the count, even though it is redundant due to the count appearing in the opcode. - // This is because the original opcodes are lost after we preprocess it into threaded code, and - // we need to still be able to access the count so as to permit the implementation to use opcode - // fallthrough. - VECTOR_MATRIX(kAddF), - VECTOR(kAddI), - kAndB, - kBranch, - // Followed by a byte indicating the index of the function to call - kCall, - // Followed by three bytes indicating: the number of argument slots, the number of return slots, - // and the index of the external value to call - kCallExternal, - // For dynamic array access: Followed by byte indicating length of array - kClampIndex, - VECTOR(kCompareIEQ), - VECTOR(kCompareINEQ), - VECTOR_MATRIX(kCompareFEQ), - VECTOR_MATRIX(kCompareFNEQ), - VECTOR(kCompareFGT), - VECTOR(kCompareFGTEQ), - VECTOR(kCompareFLT), - VECTOR(kCompareFLTEQ), - VECTOR(kCompareSGT), - VECTOR(kCompareSGTEQ), - VECTOR(kCompareSLT), - VECTOR(kCompareSLTEQ), - VECTOR(kCompareUGT), - VECTOR(kCompareUGTEQ), - VECTOR(kCompareULT), - VECTOR(kCompareULTEQ), - VECTOR(kConvertFtoI), - VECTOR(kConvertStoF), - VECTOR(kConvertUtoF), - // Followed by a (redundant) byte indicating the count - VECTOR(kCos), - VECTOR_MATRIX(kDivideF), - VECTOR(kDivideS), - VECTOR(kDivideU), - // Duplicates the top stack value. Followed by a (redundant) byte indicating the count. - VECTOR_MATRIX(kDup), - kInverse2x2, - kInverse3x3, - kInverse4x4, - // kLoad/kLoadGlobal are followed by a byte indicating the count, and a byte indicating the - // local/global slot to load - VECTOR(kLoad), - VECTOR(kLoadGlobal), - VECTOR(kLoadUniform), - // As kLoad/kLoadGlobal, then a count byte (1-4), and then one byte per swizzle component (0-3). - kLoadSwizzle, - kLoadSwizzleGlobal, - kLoadSwizzleUniform, - // kLoadExtended* are fallback load ops when we lack a specialization. They are followed by a - // count byte, and get the slot to load from the top of the stack. - kLoadExtended, - kLoadExtendedGlobal, - kLoadExtendedUniform, - // Followed by four bytes: srcCols, srcRows, dstCols, dstRows. Consumes the src matrix from the - // stack, and replaces it with the dst matrix. Per GLSL rules, there are no restrictions on - // dimensions. Any overlapping values are copied, and any other values are filled in with the - // identity matrix. - kMatrixToMatrix, - // Followed by three bytes: leftCols (== rightRows), leftRows, rightCols - kMatrixMultiply, - VECTOR_MATRIX(kNegateF), - VECTOR(kNegateI), - VECTOR_MATRIX(kMultiplyF), - VECTOR(kMultiplyI), - kNotB, - kOrB, - VECTOR_MATRIX(kPop), - // Followed by a 32 bit value containing the value to push - kPushImmediate, - // Followed by a byte indicating external value to read - VECTOR(kReadExternal), - VECTOR(kRemainderF), - VECTOR(kRemainderS), - VECTOR(kRemainderU), - // Followed by a byte indicating the number of slots to reserve on the stack (for later return) - kReserve, - // Followed by a byte indicating the number of slots being returned - kReturn, - // Followed by two bytes indicating columns and rows of matrix (2, 3, or 4 each). - // Takes a single value from the top of the stack, and converts to a CxR matrix with that value - // replicated along the diagonal (and zero elsewhere), per the GLSL matrix construction rules. - kScalarToMatrix, - // Followed by a byte indicating the number of bits to shift - kShiftLeft, - kShiftRightS, - kShiftRightU, - // Followed by a (redundant) byte indicating the count - VECTOR(kSin), - VECTOR(kSqrt), - // kStore/kStoreGlobal are followed by a byte indicating the local/global slot to store - VECTOR(kStore), - VECTOR(kStoreGlobal), - // Fallback stores. Followed by count byte, and get the slot to store from the top of the stack - kStoreExtended, - kStoreExtendedGlobal, - // As kStore/kStoreGlobal, then a count byte (1-4), then one byte per swizzle component (0-3). - // Expects the stack to look like: ... v1 v2 v3 v4, where the number of 'v's is equal to the - // number of swizzle components. After the store, all v's are popped from the stack. - kStoreSwizzle, - kStoreSwizzleGlobal, - // As above, but gets the store slot from the top of the stack (before values to be stored) - kStoreSwizzleIndirect, - kStoreSwizzleIndirectGlobal, - // Followed by two count bytes (1-4), and then one byte per swizzle component (0-3). The first - // count byte provides the current vector size (the vector is the top n stack elements), and the - // second count byte provides the swizzle component count. - kSwizzle, - VECTOR_MATRIX(kSubtractF), - VECTOR(kSubtractI), - // Followed by a (redundant) byte indicating the count - VECTOR(kTan), - // Followed by a byte indicating external value to write - VECTOR(kWriteExternal), - kXorB, - - kMaskPush, - kMaskPop, - kMaskNegate, - // Followed by count byte - kMaskBlend, - // Followed by address - kBranchIfAllFalse, - - kLoopBegin, - kLoopNext, - kLoopMask, - kLoopEnd, - kLoopBreak, - kLoopContinue, -}; -#undef VECTOR +class ByteCode; +class ExternalValue; class ByteCodeFunction { public: - int getParameterCount() const { return fParameterCount; } - int getReturnCount() const { return fReturnCount; } - - /** - * Print bytecode disassembly to stdout. - */ - void disassemble() const; - -private: - ByteCodeFunction(const FunctionDeclaration* declaration); - - friend class ByteCode; - friend class ByteCodeGenerator; - friend struct Interpreter; - + // all counts are of 32-bit values, so a float4 counts as 4 parameter or return slots struct Parameter { int fSlotCount; bool fIsOutParameter; }; - SkSL::String fName; - std::vector fParameters; - int fParameterCount; - int fReturnCount = 0; + /** + * Note that this is the actual number of parameters, not the number of parameter slots. + */ + int getParameterCount() const { return fParameters.size(); } + + Parameter getParameter(int idx) const { return fParameters[idx]; } + + int getParameterSlotCount() const { return fParameterSlotCount; } + + int getReturnSlotCount() const { return fReturnSlotCount; } + + void disassemble() const { } + +private: + ByteCodeFunction(const FunctionDeclaration* declaration) + : fName(declaration->fName) {} + + String fName; + + std::vector fParameters; + + int fParameterSlotCount; + + int fReturnSlotCount; + + int fStackSlotCount; - int fLocalCount = 0; - int fStackCount = 0; - int fConditionCount = 0; - int fLoopCount = 0; - mutable SkOnce fPreprocessOnce; std::vector fCode; - /** - * Replace each opcode with the corresponding entry from the labels array. - */ - void preprocess(const void* labels[]); + friend class ByteCode; + friend class ByteCodeGenerator; + template + friend class Interpreter; }; enum class TypeCategory { @@ -220,9 +73,260 @@ enum class TypeCategory { class SK_API ByteCode { public: - static constexpr int kVecWidth = 8; + template + union Vector { + skvx::Vec fFloat; + skvx::Vec fInt; + skvx::Vec fUInt; - ByteCode() = default; + Vector() = default; + + Vector(skvx::Vec f) + : fFloat(f) {} + + Vector(skvx::Vec i) + : fInt(i) {} + + Vector(skvx::Vec u) + : fUInt(u) {} + }; + + enum class Instruction : uint8_t { + // no parameters + kNop, + // no parameters + kAbort, + // Register target, Register src1, Register src2 + kAddF, + // Register target, Register src1, Register src2 + kAddI, + // Register target, Register src1, Register src2 + kAnd, + // Register index, int arrayLength + kBoundsCheck, + // Pointer target + kBranch, + // Pointer target + kBranchIfAllFalse, + // no parameters + kBreak, + // Register target, uint8_t functionIndex, Register parameters + kCall, + // Register target, uint8_t externalValueIndex, uint8_t targetSize, Register arguments, + // uint8_t argumentSize + kCallExternal, + // Register target, Register src1, Register src2 + kCompareEQF, + // Register target, Register src1, Register src2 + kCompareEQI, + // Register target, Register src1, Register src2 + kCompareNEQF, + // Register target, Register src1, Register src2 + kCompareNEQI, + // Register target, Register src1, Register src2 + kCompareGTF, + // Register target, Register src1, Register src2 + kCompareGTS, + // Register target, Register src1, Register src2 + kCompareGTU, + // Register target, Register src1, Register src2 + kCompareGTEQF, + // Register target, Register src1, Register src2 + kCompareGTEQS, + // Register target, Register src1, Register src2 + kCompareGTEQU, + // Register target, Register src1, Register src2 + kCompareLTF, + // Register target, Register src1, Register src2 + kCompareLTS, + // Register target, Register src1, Register src2 + kCompareLTU, + // Register target, Register src1, Register src2 + kCompareLTEQF, + // Register target, Register src1, Register src2 + kCompareLTEQS, + // Register target, Register src1, Register src2 + kCompareLTEQU, + // no parameters + kContinue, + // Register target, Register src + kCopy, + // Register target, Register src, + kCos, + // Register target, Register src1, Register src2 + kDivideF, + // Register target, Register src1, Register src2 + kDivideS, + // Register target, Register src1, Register src2 + kDivideU, + // Register target, Register src + kFloatToSigned, + // Register target, Register src + kFloatToUnsigned, + // Load a constant into a register + // Register target, Immediate value + kImmediate, + // Register target, Register src + kInverse2x2, + // Register target, Register src + kInverse3x3, + // Register target, Register src + kInverse4x4, + // Load the memory cell pointed to by srcPtr into a register + // Register target, Register srcPtr + kLoad, + // Load the memory cell pointed to by src into a register + // Register target, Pointer src + kLoadDirect, + // Load the parameter slot pointed to by srcPtr into a register + // Register target, Register srcPtr + kLoadParameter, + // Load the parameter slot pointed to by src into a register + // Register target, Pointer src + kLoadParameterDirect, + // Load the stack cell pointed to by srcPtr + sp into a register + // Register target, Register srcPtr + kLoadStack, + // Load the stack cell pointed to by src + sp into a register + // Register target, Pointer src + kLoadStackDirect, + // Pushes a new loop onto the loop and continue stacks + // no parameters + kLoopBegin, + // Pops the loop and continue stacks + // no parameters + kLoopEnd, + // Register mask + kLoopMask, + // no parameters + kLoopNext, + // no parameters + kMaskNegate, + // no parameters + kMaskPop, + // Register mask + kMaskPush, + // Register target, Register left, Register right, uint8_t leftColsAndRightRows, + // uint8_t leftRows, uint8_t rightCols + kMatrixMultiply, + // Register target, Register src, uint8_t srcColumns, uint8_t srcRows, uint8_t dstColumns, + // uint8_t dstRows + kMatrixToMatrix, + // Register target, Register src1, Register src2 + kMultiplyF, + // Register target, Register src1, Register src2 + kMultiplyI, + // Register target, Register src + kNegateF, + // Register target, Register src + kNegateS, + // Register target, Register src + kNot, + // Register target, Register src1, Register src2 + kOr, + // Register src + kPrint, + // Register target, uint8_t count, uint8_t index + kReadExternal, + // Register target, Register src1, Register src2 + kRemainderF, + // Register target, Register src1, Register src2 + kRemainderS, + // Register target, Register src1, Register src2 + kRemainderU, + // no parameters + kReturn, + // Register value + kReturnValue, + // Register target, Register src, uint8_t columns, uint8_t rows + kScalarToMatrix, + // Register target, Register test, Register ifTrue, Register ifFalse + kSelect, + // Register target, Register src, uint8_t count + kShiftLeft, + // Register target, Register src, uint8_t count + kShiftRightS, + // Register target, Register src, uint8_t count + kShiftRightU, + // Register target, Register src + kSignedToFloat, + // Register target, Register src, + kSin, + // Register target, Register src, + kSqrt, + // Store to the memory cell pointed to by dstPtr + // Register dstPtr, Register src + kStore, + // Store to the memory cell pointed to by dst + // Pointer dst, Register src + kStoreDirect, + // Store to the parameter slot pointed to by dstPtr + // Register dstPtr, Register src + kStoreParameter, + // Store to the parameter slot pointed to by dst + // Pointer dst, Register src + kStoreParameterDirect, + // Stores a register into the stack cell pointed to by dst + sp + // Register dst, Register src + kStoreStack, + // Stores a register into the stack cell pointed to by dstPtr + sp + // Pointer dst, Register src + kStoreStackDirect, + // Register target, Register src1, Register src2 + kSubtractF, + // Register target, Register src1, Register src2 + kSubtractI, + // Register target, Register src, + kTan, + // Register target, Register src, + kUnsignedToFloat, + // uint8_t index, uint8_t count, Register src + kWriteExternal, + // Register target, Register src1, Register src2 + kXor, + }; + + + // Compound values like vectors span multiple Registers or Pointer addresses. We always refer to + // them by the address of their first slot, so for instance if you add two float4's together, + // the resulting Register contains the first channel of the result, with the other three + // channels following in the next three Registers. + + struct Register { + uint16_t fIndex; + + Register operator+(uint16_t offset) const { + return Register{(uint16_t) (fIndex + offset)}; + } + }; + + struct Pointer { + uint16_t fAddress; + + Pointer operator+(uint16_t offset) const { + return Pointer{(uint16_t) (fAddress + offset)}; + } + }; + + union Immediate { + float fFloat; + int32_t fInt; + uint32_t fUInt; + + Immediate() {} + + Immediate(float f) + : fFloat(f) {} + + Immediate(int32_t i) + : fInt(i) {} + + Immediate(uint32_t u) + : fUInt(u) {} + }; + + static constexpr int kPointerMax = 65535; + static constexpr int kRegisterMax = 65535; const ByteCodeFunction* getFunction(const char* name) const { for (const auto& f : fFunctions) { @@ -233,36 +337,9 @@ public: return nullptr; } - /** - * Invokes the specified function once, with the given arguments. - * 'args', 'outReturn', and 'uniforms' are collections of 32-bit values (typically floats, - * but possibly int32_t or uint32_t, depending on the types used in the SkSL). - * Any 'out' or 'inout' parameters will result in the 'args' array being modified. - * The return value is stored in 'outReturn' (may be null, to discard the return value). - * 'uniforms' are mapped to 'uniform' globals, in order. - */ - bool SKSL_WARN_UNUSED_RESULT run(const ByteCodeFunction*, - float* args, int argCount, - float* outReturn, int returnCount, - const float* uniforms, int uniformCount) const; - - /** - * Invokes the specified function with the given arguments, 'N' times. 'args' and 'outReturn' - * are accepted and returned in structure-of-arrays form: - * args[0] points to an array of N values, the first argument for each invocation - * ... - * args[argCount - 1] points to an array of N values, the last argument for each invocation - * - * All values in 'args', 'outReturn', and 'uniforms' are 32-bit values (typically floats, - * but possibly int32_t or uint32_t, depending on the types used in the SkSL). - * Any 'out' or 'inout' parameters will result in the 'args' array being modified. - * The return value is stored in 'outReturn' (may be null, to discard the return value). - * 'uniforms' are mapped to 'uniform' globals, in order. - */ - bool SKSL_WARN_UNUSED_RESULT runStriped(const ByteCodeFunction*, int N, - float* args[], int argCount, - float* outReturn[], int returnCount, - const float* uniforms, int uniformCount) const; + int getGlobalSlotCount() const { + return fGlobalSlotCount; + } struct Uniform { SkSL::String fName; @@ -285,20 +362,19 @@ public: const Uniform& getUniform(int i) const { return fUniforms[i]; } private: - ByteCode(const ByteCode&) = delete; - ByteCode& operator=(const ByteCode&) = delete; + std::vector> fFunctions; + std::vector fExternalValues; - friend class ByteCodeGenerator; - friend struct Interpreter; + int fGlobalSlotCount; - int fGlobalSlotCount = 0; int fUniformSlotCount = 0; std::vector fUniforms; - std::vector> fFunctions; - std::vector fExternalValues; + friend class ByteCodeGenerator; + template + friend class Interpreter; }; -} +} // namespace #endif diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp index 36a1338de8..75b3d22a0e 100644 --- a/src/sksl/SkSLByteCodeGenerator.cpp +++ b/src/sksl/SkSLByteCodeGenerator.cpp @@ -7,50 +7,21 @@ #include "src/sksl/SkSLByteCodeGenerator.h" -#include - namespace SkSL { -static TypeCategory type_category(const Type& type) { - switch (type.kind()) { - case Type::Kind::kVector_Kind: - case Type::Kind::kMatrix_Kind: - return type_category(type.componentType()); - default: - if (type.fName == "bool") { - return TypeCategory::kBool; - } else if (type.fName == "int" || - type.fName == "short" || - type.fName == "$intLiteral") { - return TypeCategory::kSigned; - } else if (type.fName == "uint" || - type.fName == "ushort") { - return TypeCategory::kUnsigned; - } else { - SkASSERT(type.fName == "float" || - type.fName == "half" || - type.fName == "$floatLiteral"); - return TypeCategory::kFloat; - } - ABORT("unsupported type: %s\n", type.displayName().c_str()); - } -} - - -ByteCodeGenerator::ByteCodeGenerator(const Context* context, const Program* program, ErrorReporter* errors, - ByteCode* output) +ByteCodeGenerator::ByteCodeGenerator(const Program* program, ErrorReporter* errors, + ByteCode* output) : INHERITED(program, errors, nullptr) - , fContext(*context) , fOutput(output) , fIntrinsics { - { "cos", ByteCodeInstruction::kCos }, + { "cos", ByteCode::Instruction::kCos }, { "dot", SpecialIntrinsic::kDot }, - { "inverse", ByteCodeInstruction::kInverse2x2 }, - { "sin", ByteCodeInstruction::kSin }, - { "sqrt", ByteCodeInstruction::kSqrt }, - { "tan", ByteCodeInstruction::kTan }, - } {} - + { "inverse", SpecialIntrinsic::kInverse }, + { "print", ByteCode::Instruction::kPrint }, + { "sin", ByteCode::Instruction::kSin }, + { "sqrt", ByteCode::Instruction::kSqrt }, + { "tan", ByteCode::Instruction::kTan }, + } {} int ByteCodeGenerator::SlotCount(const Type& type) { if (type.kind() == Type::kOther_Kind) { @@ -80,89 +51,73 @@ static inline bool is_uniform(const SkSL::Variable& var) { static inline bool is_in(const SkSL::Variable& var) { return var.fModifiers.fFlags & Modifiers::kIn_Flag; } - -void ByteCodeGenerator::gatherUniforms(const Type& type, const String& name) { - if (type.kind() == Type::kOther_Kind) { - return; - } else if (type.kind() == Type::kStruct_Kind) { - for (const auto& f : type.fields()) { - this->gatherUniforms(*f.fType, name + "." + f.fName); - } - } else if (type.kind() == Type::kArray_Kind) { - for (int i = 0; i < type.columns(); ++i) { - this->gatherUniforms(type.componentType(), String::printf("%s[%d]", name.c_str(), i)); - } - } else { - fOutput->fUniforms.push_back({ name, type_category(type), type.rows(), type.columns(), - fOutput->fUniformSlotCount }); - fOutput->fUniformSlotCount += type.columns() * type.rows(); - } -} - -bool ByteCodeGenerator::generateCode() { - for (const auto& e : fProgram) { - switch (e.fKind) { - case ProgramElement::kFunction_Kind: { - std::unique_ptr f = this->writeFunction((FunctionDefinition&) e); - if (!f) { - return false; +ByteCodeGenerator::Location ByteCodeGenerator::getLocation(const Variable& var) { + // given that we seldom have more than a couple of variables, linear search is probably the most + // efficient way to handle lookups + switch (var.fStorage) { + case Variable::kLocal_Storage: { + for (int i = fLocals.size() - 1; i >= 0; --i) { + if (fLocals[i] == &var) { + return ByteCode::Pointer{(uint16_t) (i + fParameterCount)}; } - fOutput->fFunctions.push_back(std::move(f)); - fFunctions.push_back(&(FunctionDefinition&)e); - break; } - case ProgramElement::kVar_Kind: { - VarDeclarations& decl = (VarDeclarations&) e; - for (const auto& v : decl.fVars) { - const Variable* declVar = ((VarDeclaration&) *v).fVar; - if (declVar->fModifiers.fLayout.fBuiltin >= 0 || is_in(*declVar)) { - continue; - } - if (is_uniform(*declVar)) { - this->gatherUniforms(declVar->fType, declVar->fName); - } else { - fOutput->fGlobalSlotCount += SlotCount(declVar->fType); + int result = fLocals.size() + fParameterCount; + fLocals.push_back(&var); + for (int i = 0; i < SlotCount(var.fType) - 1; ++i) { + fLocals.push_back(nullptr); + } + SkASSERT(result <= ByteCode::kPointerMax); + return ByteCode::Pointer{(uint16_t) result}; + } + case Variable::kParameter_Storage: { + int offset = 0; + for (const auto& p : fFunction->fDeclaration.fParameters) { + if (p == &var) { + SkASSERT(offset <= ByteCode::kPointerMax); + return ByteCode::Pointer{(uint16_t) offset}; + } + offset += SlotCount(p->fType); + } + SkASSERT(false); + return ByteCode::Pointer{0}; + } + case Variable::kGlobal_Storage: { + if (is_in(var)) { + // If you trip this assert, it means the program is using raw 'in' variables. You + // should either specialize the program (Compiler::specialize) to bake in the final + // values of the 'in' variables, or not use 'in' variables (maybe you meant to use + // 'uniform' instead?). + SkASSERT(false); + return ByteCode::Pointer{0}; + } + bool isUniform = is_uniform(var); + int offset = isUniform ? fOutput->getGlobalSlotCount() : 0; + for (const auto& e : fProgram) { + if (e.fKind == ProgramElement::kVar_Kind) { + VarDeclarations& decl = (VarDeclarations&) e; + for (const auto& v : decl.fVars) { + const Variable* declVar = ((VarDeclaration&) *v).fVar; + if (declVar->fModifiers.fLayout.fBuiltin >= 0 || is_in(*declVar)) { + continue; + } + if (isUniform != is_uniform(*declVar)) { + continue; + } + if (declVar == &var) { + SkASSERT(offset <= ByteCode::kPointerMax); + return ByteCode::Pointer{(uint16_t) offset}; + } + offset += SlotCount(declVar->fType); } } - break; } - default: - ; // ignore + SkASSERT(false); + return ByteCode::Pointer{0}; } + default: + SkASSERT(false); + return ByteCode::Pointer{0}; } - return 0 == fErrors.errorCount(); -} - -std::unique_ptr ByteCodeGenerator::writeFunction(const FunctionDefinition& f) { - fFunction = &f; - std::unique_ptr result(new ByteCodeFunction(&f.fDeclaration)); - fParameterCount = result->fParameterCount; - fLoopCount = fMaxLoopCount = 0; - fConditionCount = fMaxConditionCount = 0; - fStackCount = fMaxStackCount = 0; - fCode = &result->fCode; - - this->writeStatement(*f.fBody); - if (0 == fErrors.errorCount()) { - SkASSERT(fLoopCount == 0); - SkASSERT(fConditionCount == 0); - SkASSERT(fStackCount == 0); - } - this->write(ByteCodeInstruction::kReturn, 0); - this->write8(0); - - result->fLocalCount = fLocals.size(); - result->fConditionCount = fMaxConditionCount; - result->fLoopCount = fMaxLoopCount; - result->fStackCount = fMaxStackCount; - - const Type& returnType = f.fDeclaration.fReturnType; - if (returnType != *fContext.fVoid_Type) { - result->fReturnCount = SlotCount(returnType); - } - fLocals.clear(); - fFunction = nullptr; - return result; } // A "simple" Swizzle is based on a variable (or a compound variable like a struct or array), and @@ -186,1273 +141,240 @@ static bool swizzle_is_simple(const Swizzle& s) { return true; } -int ByteCodeGenerator::StackUsage(ByteCodeInstruction inst, int count_) { - // Ensures that we use count iff we're passed a non-default value. Most instructions have an - // implicit count, so the caller shouldn't need to worry about it (or count makes no sense). - // The asserts avoids callers thinking they're supplying useful information in that scenario, - // or failing to supply necessary information for the ops that need a count. - struct CountValue { - operator int() { - SkASSERT(val != ByteCodeGenerator::kUnusedStackCount); - SkDEBUGCODE(used = true); - return val; - } - ~CountValue() { - SkASSERT(used || val == ByteCodeGenerator::kUnusedStackCount); - } - int val; - SkDEBUGCODE(bool used = false;) - } count = { count_ }; - - switch (inst) { - // Unary functions/operators that don't change stack depth at all: -#define VECTOR_UNARY_OP(base) \ - case ByteCodeInstruction::base: \ - case ByteCodeInstruction::base ## 2: \ - case ByteCodeInstruction::base ## 3: \ - case ByteCodeInstruction::base ## 4: \ - return 0; - - VECTOR_UNARY_OP(kConvertFtoI) - VECTOR_UNARY_OP(kConvertStoF) - VECTOR_UNARY_OP(kConvertUtoF) - - VECTOR_UNARY_OP(kCos) - VECTOR_UNARY_OP(kSin) - VECTOR_UNARY_OP(kSqrt) - VECTOR_UNARY_OP(kTan) - - VECTOR_UNARY_OP(kNegateF) - VECTOR_UNARY_OP(kNegateI) - - case ByteCodeInstruction::kInverse2x2: - case ByteCodeInstruction::kInverse3x3: - case ByteCodeInstruction::kInverse4x4: return 0; - - case ByteCodeInstruction::kClampIndex: return 0; - case ByteCodeInstruction::kNotB: return 0; - case ByteCodeInstruction::kNegateFN: return 0; - case ByteCodeInstruction::kShiftLeft: return 0; - case ByteCodeInstruction::kShiftRightS: return 0; - case ByteCodeInstruction::kShiftRightU: return 0; - -#undef VECTOR_UNARY_OP - - // Binary functions/operators that do a 2 -> 1 reduction (possibly N times) -#define VECTOR_BINARY_OP(base) \ - case ByteCodeInstruction::base: return -1; \ - case ByteCodeInstruction::base ## 2: return -2; \ - case ByteCodeInstruction::base ## 3: return -3; \ - case ByteCodeInstruction::base ## 4: return -4; - -#define VECTOR_MATRIX_BINARY_OP(base) \ - VECTOR_BINARY_OP(base) \ - case ByteCodeInstruction::base ## N: return -count; - - case ByteCodeInstruction::kAndB: return -1; - case ByteCodeInstruction::kOrB: return -1; - case ByteCodeInstruction::kXorB: return -1; - - VECTOR_BINARY_OP(kAddI) - VECTOR_MATRIX_BINARY_OP(kAddF) - - VECTOR_BINARY_OP(kCompareIEQ) - VECTOR_MATRIX_BINARY_OP(kCompareFEQ) - VECTOR_BINARY_OP(kCompareINEQ) - VECTOR_MATRIX_BINARY_OP(kCompareFNEQ) - VECTOR_BINARY_OP(kCompareSGT) - VECTOR_BINARY_OP(kCompareUGT) - VECTOR_BINARY_OP(kCompareFGT) - VECTOR_BINARY_OP(kCompareSGTEQ) - VECTOR_BINARY_OP(kCompareUGTEQ) - VECTOR_BINARY_OP(kCompareFGTEQ) - VECTOR_BINARY_OP(kCompareSLT) - VECTOR_BINARY_OP(kCompareULT) - VECTOR_BINARY_OP(kCompareFLT) - VECTOR_BINARY_OP(kCompareSLTEQ) - VECTOR_BINARY_OP(kCompareULTEQ) - VECTOR_BINARY_OP(kCompareFLTEQ) - - VECTOR_BINARY_OP(kDivideS) - VECTOR_BINARY_OP(kDivideU) - VECTOR_MATRIX_BINARY_OP(kDivideF) - VECTOR_BINARY_OP(kMultiplyI) - VECTOR_MATRIX_BINARY_OP(kMultiplyF) - VECTOR_BINARY_OP(kRemainderF) - VECTOR_BINARY_OP(kRemainderS) - VECTOR_BINARY_OP(kRemainderU) - VECTOR_BINARY_OP(kSubtractI) - VECTOR_MATRIX_BINARY_OP(kSubtractF) - -#undef VECTOR_BINARY_OP -#undef VECTOR_MATRIX_BINARY_OP - - // Ops that push or load data to grow the stack: - case ByteCodeInstruction::kDup: - case ByteCodeInstruction::kLoad: - case ByteCodeInstruction::kLoadGlobal: - case ByteCodeInstruction::kLoadUniform: - case ByteCodeInstruction::kReadExternal: - case ByteCodeInstruction::kPushImmediate: - return 1; - - case ByteCodeInstruction::kDup2: - case ByteCodeInstruction::kLoad2: - case ByteCodeInstruction::kLoadGlobal2: - case ByteCodeInstruction::kLoadUniform2: - case ByteCodeInstruction::kReadExternal2: - return 2; - - case ByteCodeInstruction::kDup3: - case ByteCodeInstruction::kLoad3: - case ByteCodeInstruction::kLoadGlobal3: - case ByteCodeInstruction::kLoadUniform3: - case ByteCodeInstruction::kReadExternal3: - return 3; - - case ByteCodeInstruction::kDup4: - case ByteCodeInstruction::kLoad4: - case ByteCodeInstruction::kLoadGlobal4: - case ByteCodeInstruction::kLoadUniform4: - case ByteCodeInstruction::kReadExternal4: - return 4; - - case ByteCodeInstruction::kDupN: - case ByteCodeInstruction::kLoadSwizzle: - case ByteCodeInstruction::kLoadSwizzleGlobal: - case ByteCodeInstruction::kLoadSwizzleUniform: - return count; - - // Pushes 'count' values, minus one for the 'address' that's consumed first - case ByteCodeInstruction::kLoadExtended: - case ByteCodeInstruction::kLoadExtendedGlobal: - case ByteCodeInstruction::kLoadExtendedUniform: - return count - 1; - - // Ops that pop or store data to shrink the stack: - case ByteCodeInstruction::kPop: - case ByteCodeInstruction::kStore: - case ByteCodeInstruction::kStoreGlobal: - case ByteCodeInstruction::kWriteExternal: - return -1; - - case ByteCodeInstruction::kPop2: - case ByteCodeInstruction::kStore2: - case ByteCodeInstruction::kStoreGlobal2: - case ByteCodeInstruction::kWriteExternal2: - return -2; - - case ByteCodeInstruction::kPop3: - case ByteCodeInstruction::kStore3: - case ByteCodeInstruction::kStoreGlobal3: - case ByteCodeInstruction::kWriteExternal3: - return -3; - - case ByteCodeInstruction::kPop4: - case ByteCodeInstruction::kStore4: - case ByteCodeInstruction::kStoreGlobal4: - case ByteCodeInstruction::kWriteExternal4: - return -4; - - case ByteCodeInstruction::kPopN: - case ByteCodeInstruction::kStoreSwizzle: - case ByteCodeInstruction::kStoreSwizzleGlobal: - return -count; - - // Consumes 'count' values, plus one for the 'address' - case ByteCodeInstruction::kStoreExtended: - case ByteCodeInstruction::kStoreExtendedGlobal: - case ByteCodeInstruction::kStoreSwizzleIndirect: - case ByteCodeInstruction::kStoreSwizzleIndirectGlobal: - return -count - 1; - - // Strange ops where the caller computes the delta for us: - case ByteCodeInstruction::kCallExternal: - case ByteCodeInstruction::kMatrixToMatrix: - case ByteCodeInstruction::kMatrixMultiply: - case ByteCodeInstruction::kReserve: - case ByteCodeInstruction::kReturn: - case ByteCodeInstruction::kScalarToMatrix: - case ByteCodeInstruction::kSwizzle: - return count; - - // Miscellaneous - - // kCall is net-zero. Max stack depth is adjusted in writeFunctionCall. - case ByteCodeInstruction::kCall: return 0; - case ByteCodeInstruction::kBranch: return 0; - case ByteCodeInstruction::kBranchIfAllFalse: return 0; - - case ByteCodeInstruction::kMaskPush: return -1; - case ByteCodeInstruction::kMaskPop: return 0; - case ByteCodeInstruction::kMaskNegate: return 0; - case ByteCodeInstruction::kMaskBlend: return -count; - - case ByteCodeInstruction::kLoopBegin: return 0; - case ByteCodeInstruction::kLoopNext: return 0; - case ByteCodeInstruction::kLoopMask: return -1; - case ByteCodeInstruction::kLoopEnd: return 0; - case ByteCodeInstruction::kLoopBreak: return 0; - case ByteCodeInstruction::kLoopContinue: return 0; - - default: - ABORT("unsupported instruction %d\n", (int)inst); - return 0; - } -} - -ByteCodeGenerator::Location ByteCodeGenerator::getLocation(const Variable& var) { - // given that we seldom have more than a couple of variables, linear search is probably the most - // efficient way to handle lookups - switch (var.fStorage) { - case Variable::kLocal_Storage: { - for (int i = fLocals.size() - 1; i >= 0; --i) { - if (fLocals[i] == &var) { - SkASSERT(fParameterCount + i <= 255); - return { fParameterCount + i, Storage::kLocal }; - } - } - int result = fParameterCount + fLocals.size(); - fLocals.push_back(&var); - for (int i = 0; i < SlotCount(var.fType) - 1; ++i) { - fLocals.push_back(nullptr); - } - SkASSERT(result <= 255); - return { result, Storage::kLocal }; - } - case Variable::kParameter_Storage: { - int offset = 0; - for (const auto& p : fFunction->fDeclaration.fParameters) { - if (p == &var) { - SkASSERT(offset <= 255); - return { offset, Storage::kLocal }; - } - offset += SlotCount(p->fType); - } - SkASSERT(false); - return Location::MakeInvalid(); - } - case Variable::kGlobal_Storage: { - if (is_in(var)) { - // If you trip this assert, it means the program is using raw 'in' variables. You - // should either specialize the program (Compiler::specialize) to bake in the final - // values of the 'in' variables, or not use 'in' variables (maybe you meant to use - // 'uniform' instead?). - SkASSERT(false); - return Location::MakeInvalid(); - } - int offset = 0; - bool isUniform = is_uniform(var); - for (const auto& e : fProgram) { - if (e.fKind == ProgramElement::kVar_Kind) { - VarDeclarations& decl = (VarDeclarations&) e; - for (const auto& v : decl.fVars) { - const Variable* declVar = ((VarDeclaration&) *v).fVar; - if (declVar->fModifiers.fLayout.fBuiltin >= 0 || is_in(*declVar)) { - continue; - } - if (isUniform != is_uniform(*declVar)) { - continue; - } - if (declVar == &var) { - SkASSERT(offset <= 255); - return { offset, isUniform ? Storage::kUniform : Storage::kGlobal }; - } - offset += SlotCount(declVar->fType); - } - } - } - SkASSERT(false); - return Location::MakeInvalid(); - } - default: - SkASSERT(false); - return Location::MakeInvalid(); - } -} - ByteCodeGenerator::Location ByteCodeGenerator::getLocation(const Expression& expr) { switch (expr.fKind) { case Expression::kFieldAccess_Kind: { - const FieldAccess& f = (const FieldAccess&)expr; - Location baseLoc = this->getLocation(*f.fBase); + const FieldAccess& f = (const FieldAccess&) expr; + Location result = this->getLocation(*f.fBase); int offset = 0; for (int i = 0; i < f.fFieldIndex; ++i) { offset += SlotCount(*f.fBase->fType.fields()[i].fType); } - if (baseLoc.isOnStack()) { - if (offset != 0) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(offset); - this->write(ByteCodeInstruction::kAddI); - this->write8(1); - } - return baseLoc; - } else { - return baseLoc + offset; - } + return result.offset(*this, offset); } case Expression::kIndex_Kind: { - const IndexExpression& i = (const IndexExpression&)expr; - int stride = SlotCount(i.fType); - int length = i.fBase->fType.columns(); - SkASSERT(length <= 255); - int offset = -1; - if (i.fIndex->isConstant()) { - int64_t index = i.fIndex->getConstantInt(); + const IndexExpression& idx = (const IndexExpression&) expr; + int stride = SlotCount(idx.fType); + int length = idx.fBase->fType.columns(); + Location result = this->getLocation(*idx.fBase); + if (idx.fIndex->isConstant()) { + int64_t index = idx.fIndex->getConstantInt(); if (index < 0 || index >= length) { - fErrors.error(i.fIndex->fOffset, "Array index out of bounds."); - return Location::MakeInvalid(); + fErrors.error(idx.fIndex->fOffset, "Array index out of bounds"); + return result; } - offset = index * stride; + return result.offset(*this, index * stride); } else { - if (i.fIndex->hasSideEffects()) { - // Having a side-effect in an indexer is technically safe for an rvalue, - // but with lvalues we have to evaluate the indexer twice, so make it an error. - fErrors.error(i.fIndex->fOffset, - "Index expressions with side-effects not supported in byte code."); - return Location::MakeInvalid(); - } - this->writeExpression(*i.fIndex); - this->write(ByteCodeInstruction::kClampIndex); - this->write8(length); - if (stride != 1) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(stride); - this->write(ByteCodeInstruction::kMultiplyI); - this->write8(1); - } + ByteCode::Register index = this->next(1); + this->writeExpression(*idx.fIndex, index); + this->write(ByteCode::Instruction::kBoundsCheck); + this->write(index); + this->write(length); + ByteCode::Register imm = this->next(1); + this->write(ByteCode::Instruction::kImmediate); + this->write(imm); + this->write(ByteCode::Immediate{stride}); + ByteCode::Register offset = this->next(1); + this->write(ByteCode::Instruction::kMultiplyI); + this->write(offset); + this->write(index); + this->write(imm); + return result.offset(*this, offset); } - Location baseLoc = this->getLocation(*i.fBase); - - // Are both components known statically? - if (!baseLoc.isOnStack() && offset >= 0) { - return baseLoc + offset; - } - - // At least one component is dynamic (and on the stack). - - // If the other component is zero, we're done - if (baseLoc.fSlot == 0 || offset == 0) { - return baseLoc.makeOnStack(); - } - - // Push the non-dynamic component (if any) to the stack, then add the two - if (!baseLoc.isOnStack()) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(baseLoc.fSlot); - } - if (offset >= 0) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(offset); - } - this->write(ByteCodeInstruction::kAddI); - this->write8(1); - return baseLoc.makeOnStack(); } case Expression::kSwizzle_Kind: { - const Swizzle& s = (const Swizzle&)expr; + const Swizzle& s = (const Swizzle&) expr; SkASSERT(swizzle_is_simple(s)); - Location baseLoc = this->getLocation(*s.fBase); - int offset = s.fComponents[0]; - if (baseLoc.isOnStack()) { - if (offset != 0) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(offset); - this->write(ByteCodeInstruction::kAddI); - this->write8(1); - } - return baseLoc; - } else { - return baseLoc + offset; - } + return this->getLocation(*s.fBase).offset(*this, s.fComponents[0]); } case Expression::kVariableReference_Kind: { - const Variable& var = ((const VariableReference&)expr).fVariable; + const Variable& var = ((const VariableReference&) expr).fVariable; return this->getLocation(var); } default: SkASSERT(false); - return Location::MakeInvalid(); + return ByteCode::Pointer{0}; } } -void ByteCodeGenerator::write8(uint8_t b) { - fCode->push_back(b); -} - -void ByteCodeGenerator::write16(uint16_t i) { - size_t n = fCode->size(); - fCode->resize(n+2); - memcpy(fCode->data() + n, &i, 2); -} - -void ByteCodeGenerator::write32(uint32_t i) { - size_t n = fCode->size(); - fCode->resize(n+4); - memcpy(fCode->data() + n, &i, 4); -} - -void ByteCodeGenerator::write(ByteCodeInstruction i, int count) { - switch (i) { - case ByteCodeInstruction::kLoopBegin: this->enterLoop(); break; - case ByteCodeInstruction::kLoopEnd: this->exitLoop(); break; - - case ByteCodeInstruction::kMaskPush: this->enterCondition(); break; - case ByteCodeInstruction::kMaskPop: - case ByteCodeInstruction::kMaskBlend: this->exitCondition(); break; - default: /* Do nothing */ break; - } - instruction val = (instruction) i; - size_t n = fCode->size(); - fCode->resize(n + sizeof(val)); - memcpy(fCode->data() + n, &val, sizeof(val)); - fStackCount += StackUsage(i, count); - fMaxStackCount = std::max(fMaxStackCount, fStackCount); -} - -static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) { - SkASSERT(count >= 1 && count <= 4); - return ((ByteCodeInstruction) ((int) base + 1 - count)); -} - -void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCodeInstruction s, - ByteCodeInstruction u, ByteCodeInstruction f, - int count, bool writeCount) { - switch (type_category(type)) { - case TypeCategory::kSigned: - this->write(vector_instruction(s, count)); - break; - case TypeCategory::kUnsigned: - this->write(vector_instruction(u, count)); - break; - case TypeCategory::kFloat: { - if (count > 4) { - this->write((ByteCodeInstruction)((int)f + 1), count); - } else { - this->write(vector_instruction(f, count)); - } - break; +Variable::Storage ByteCodeGenerator::getStorage(const Expression& expr) { + switch (expr.fKind) { + case Expression::kFieldAccess_Kind: { + const FieldAccess& f = (const FieldAccess&) expr; + return this->getStorage(*f.fBase); } - default: - SkASSERT(false); - } - if (writeCount) { - this->write8(count); - } -} - -bool ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b, bool discard) { - if (b.fOperator == Token::Kind::EQ) { - std::unique_ptr lvalue = this->getLValue(*b.fLeft); - this->writeExpression(*b.fRight); - lvalue->store(discard); - discard = false; - return discard; - } - const Type& lType = b.fLeft->fType; - const Type& rType = b.fRight->fType; - bool lVecOrMtx = (lType.kind() == Type::kVector_Kind || lType.kind() == Type::kMatrix_Kind); - bool rVecOrMtx = (rType.kind() == Type::kVector_Kind || rType.kind() == Type::kMatrix_Kind); - Token::Kind op; - std::unique_ptr lvalue; - if (is_assignment(b.fOperator)) { - lvalue = this->getLValue(*b.fLeft); - lvalue->load(); - op = remove_assignment(b.fOperator); - } else { - this->writeExpression(*b.fLeft); - op = b.fOperator; - if (!lVecOrMtx && rVecOrMtx) { - for (int i = SlotCount(rType); i > 1; --i) { - this->write(ByteCodeInstruction::kDup); - this->write8(1); - } + case Expression::kIndex_Kind: { + const IndexExpression& idx = (const IndexExpression&) expr; + return this->getStorage(*idx.fBase); } - } - int count = std::max(SlotCount(lType), SlotCount(rType)); - SkDEBUGCODE(TypeCategory tc = type_category(lType)); - switch (op) { - case Token::Kind::LOGICALAND: { - SkASSERT(tc == SkSL::TypeCategory::kBool && count == 1); - this->write(ByteCodeInstruction::kDup); - this->write8(1); - this->write(ByteCodeInstruction::kMaskPush); - this->write(ByteCodeInstruction::kBranchIfAllFalse); - DeferredLocation falseLocation(this); - this->writeExpression(*b.fRight); - this->write(ByteCodeInstruction::kAndB); - falseLocation.set(); - this->write(ByteCodeInstruction::kMaskPop); - return false; + case Expression::kSwizzle_Kind: { + const Swizzle& s = (const Swizzle&) expr; + return this->getStorage(*s.fBase); } - case Token::Kind::LOGICALOR: { - SkASSERT(tc == SkSL::TypeCategory::kBool && count == 1); - this->write(ByteCodeInstruction::kDup); - this->write8(1); - this->write(ByteCodeInstruction::kNotB); - this->write(ByteCodeInstruction::kMaskPush); - this->write(ByteCodeInstruction::kBranchIfAllFalse); - DeferredLocation falseLocation(this); - this->writeExpression(*b.fRight); - this->write(ByteCodeInstruction::kOrB); - falseLocation.set(); - this->write(ByteCodeInstruction::kMaskPop); - return false; - } - case Token::Kind::SHL: - case Token::Kind::SHR: { - SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned || - tc == SkSL::TypeCategory::kUnsigned)); - if (!b.fRight->isConstant()) { - fErrors.error(b.fRight->fOffset, "Shift amounts must be constant"); - return false; - } - int64_t shift = b.fRight->getConstantInt(); - if (shift < 0 || shift > 31) { - fErrors.error(b.fRight->fOffset, "Shift amount out of range"); - return false; - } - - if (op == Token::Kind::SHL) { - this->write(ByteCodeInstruction::kShiftLeft); - } else { - this->write(type_category(lType) == TypeCategory::kSigned - ? ByteCodeInstruction::kShiftRightS - : ByteCodeInstruction::kShiftRightU); - } - this->write8(shift); - return false; - } - - default: - break; - } - this->writeExpression(*b.fRight); - if (lVecOrMtx && !rVecOrMtx) { - for (int i = SlotCount(lType); i > 1; --i) { - this->write(ByteCodeInstruction::kDup); - this->write8(1); - } - } - // Special case for M*V, V*M, M*M (but not V*V!) - if (op == Token::Kind::STAR && lVecOrMtx && rVecOrMtx && - !(lType.kind() == Type::kVector_Kind && rType.kind() == Type::kVector_Kind)) { - this->write(ByteCodeInstruction::kMatrixMultiply, - SlotCount(b.fType) - (SlotCount(lType) + SlotCount(rType))); - int rCols = rType.columns(), - rRows = rType.rows(), - lCols = lType.columns(), - lRows = lType.rows(); - // M*V treats the vector as a column - if (rType.kind() == Type::kVector_Kind) { - std::swap(rCols, rRows); - } - SkASSERT(lCols == rRows); - SkASSERT(SlotCount(b.fType) == lRows * rCols); - this->write8(lCols); - this->write8(lRows); - this->write8(rCols); - } else { - switch (op) { - case Token::Kind::EQEQ: - this->writeTypedInstruction(lType, ByteCodeInstruction::kCompareIEQ, - ByteCodeInstruction::kCompareIEQ, - ByteCodeInstruction::kCompareFEQ, - count); - // Collapse to a single bool - for (int i = count; i > 1; --i) { - this->write(ByteCodeInstruction::kAndB); - } - break; - case Token::Kind::GT: - this->writeTypedInstruction(lType, ByteCodeInstruction::kCompareSGT, - ByteCodeInstruction::kCompareUGT, - ByteCodeInstruction::kCompareFGT, - count); - break; - case Token::Kind::GTEQ: - this->writeTypedInstruction(lType, ByteCodeInstruction::kCompareSGTEQ, - ByteCodeInstruction::kCompareUGTEQ, - ByteCodeInstruction::kCompareFGTEQ, - count); - break; - case Token::Kind::LT: - this->writeTypedInstruction(lType, ByteCodeInstruction::kCompareSLT, - ByteCodeInstruction::kCompareULT, - ByteCodeInstruction::kCompareFLT, - count); - break; - case Token::Kind::LTEQ: - this->writeTypedInstruction(lType, ByteCodeInstruction::kCompareSLTEQ, - ByteCodeInstruction::kCompareULTEQ, - ByteCodeInstruction::kCompareFLTEQ, - count); - break; - case Token::Kind::MINUS: - this->writeTypedInstruction(lType, ByteCodeInstruction::kSubtractI, - ByteCodeInstruction::kSubtractI, - ByteCodeInstruction::kSubtractF, - count); - break; - case Token::Kind::NEQ: - this->writeTypedInstruction(lType, ByteCodeInstruction::kCompareINEQ, - ByteCodeInstruction::kCompareINEQ, - ByteCodeInstruction::kCompareFNEQ, - count); - // Collapse to a single bool - for (int i = count; i > 1; --i) { - this->write(ByteCodeInstruction::kOrB); - } - break; - case Token::Kind::PERCENT: - this->writeTypedInstruction(lType, ByteCodeInstruction::kRemainderS, - ByteCodeInstruction::kRemainderU, - ByteCodeInstruction::kRemainderF, - count); - break; - case Token::Kind::PLUS: - this->writeTypedInstruction(lType, ByteCodeInstruction::kAddI, - ByteCodeInstruction::kAddI, - ByteCodeInstruction::kAddF, - count); - break; - case Token::Kind::SLASH: - this->writeTypedInstruction(lType, ByteCodeInstruction::kDivideS, - ByteCodeInstruction::kDivideU, - ByteCodeInstruction::kDivideF, - count); - break; - case Token::Kind::STAR: - this->writeTypedInstruction(lType, ByteCodeInstruction::kMultiplyI, - ByteCodeInstruction::kMultiplyI, - ByteCodeInstruction::kMultiplyF, - count); - break; - - case Token::Kind::LOGICALXOR: - SkASSERT(tc == SkSL::TypeCategory::kBool && count == 1); - this->write(ByteCodeInstruction::kXorB); - break; - - case Token::Kind::BITWISEAND: - SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned || - tc == SkSL::TypeCategory::kUnsigned)); - this->write(ByteCodeInstruction::kAndB); - break; - case Token::Kind::BITWISEOR: - SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned || - tc == SkSL::TypeCategory::kUnsigned)); - this->write(ByteCodeInstruction::kOrB); - break; - case Token::Kind::BITWISEXOR: - SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned || - tc == SkSL::TypeCategory::kUnsigned)); - this->write(ByteCodeInstruction::kXorB); - break; - - default: - fErrors.error(b.fOffset, SkSL::String::printf("Unsupported binary operator '%s'", - Compiler::OperatorName(op))); - break; - } - } - if (lvalue) { - lvalue->store(discard); - discard = false; - } - return discard; -} - -void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(b.fValue ? ~0 : 0); -} - -void ByteCodeGenerator::writeConstructor(const Constructor& c) { - for (const auto& arg : c.fArguments) { - this->writeExpression(*arg); - } - if (c.fArguments.size() == 1) { - const Type& inType = c.fArguments[0]->fType; - const Type& outType = c.fType; - TypeCategory inCategory = type_category(inType); - TypeCategory outCategory = type_category(outType); - int inCount = SlotCount(inType); - int outCount = SlotCount(outType); - if (inCategory != outCategory) { - SkASSERT(inCount == outCount); - if (inCategory == TypeCategory::kFloat) { - SkASSERT(outCategory == TypeCategory::kSigned || - outCategory == TypeCategory::kUnsigned); - this->write(vector_instruction(ByteCodeInstruction::kConvertFtoI, outCount)); - } else if (outCategory == TypeCategory::kFloat) { - if (inCategory == TypeCategory::kSigned) { - this->write(vector_instruction(ByteCodeInstruction::kConvertStoF, outCount)); - } else { - SkASSERT(inCategory == TypeCategory::kUnsigned); - this->write(vector_instruction(ByteCodeInstruction::kConvertUtoF, outCount)); - } - } else { - SkASSERT(false); - } - } - if (inType.kind() == Type::kMatrix_Kind && outType.kind() == Type::kMatrix_Kind) { - this->write(ByteCodeInstruction::kMatrixToMatrix, - SlotCount(outType) - SlotCount(inType)); - this->write8(inType.columns()); - this->write8(inType.rows()); - this->write8(outType.columns()); - this->write8(outType.rows()); - } else if (inCount != outCount) { - SkASSERT(inCount == 1); - if (outType.kind() == Type::kMatrix_Kind) { - this->write(ByteCodeInstruction::kScalarToMatrix, SlotCount(outType) - 1); - this->write8(outType.columns()); - this->write8(outType.rows()); - } else { - SkASSERT(outType.kind() == Type::kVector_Kind); - for (; inCount != outCount; ++inCount) { - this->write(ByteCodeInstruction::kDup); - this->write8(1); - } - } - } - } -} - -void ByteCodeGenerator::writeExternalFunctionCall(const ExternalFunctionCall& f) { - int argumentCount = 0; - for (const auto& arg : f.fArguments) { - this->writeExpression(*arg); - argumentCount += SlotCount(arg->fType); - } - this->write(ByteCodeInstruction::kCallExternal, SlotCount(f.fType) - argumentCount); - SkASSERT(argumentCount <= 255); - this->write8(argumentCount); - this->write8(SlotCount(f.fType)); - int index = fOutput->fExternalValues.size(); - fOutput->fExternalValues.push_back(f.fFunction); - SkASSERT(index <= 255); - this->write8(index); -} - -void ByteCodeGenerator::writeExternalValue(const ExternalValueReference& e) { - int count = SlotCount(e.fValue->type()); - this->write(vector_instruction(ByteCodeInstruction::kReadExternal, count)); - this->write8(count); - int index = fOutput->fExternalValues.size(); - fOutput->fExternalValues.push_back(e.fValue); - SkASSERT(index <= 255); - this->write8(index); -} - -void ByteCodeGenerator::writeVariableExpression(const Expression& expr) { - Location location = this->getLocation(expr); - int count = SlotCount(expr.fType); - if (location.isOnStack() || count > 4) { - if (!location.isOnStack()) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(location.fSlot); - } - this->write(location.selectLoad(ByteCodeInstruction::kLoadExtended, - ByteCodeInstruction::kLoadExtendedGlobal, - ByteCodeInstruction::kLoadExtendedUniform), - count); - this->write8(count); - } else { - this->write(vector_instruction(location.selectLoad(ByteCodeInstruction::kLoad, - ByteCodeInstruction::kLoadGlobal, - ByteCodeInstruction::kLoadUniform), - count)); - this->write8(count); - this->write8(location.fSlot); - } -} - -static inline uint32_t float_to_bits(float x) { - uint32_t u; - memcpy(&u, &x, sizeof(uint32_t)); - return u; -} - -void ByteCodeGenerator::writeFloatLiteral(const FloatLiteral& f) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(float_to_bits(f.fValue)); -} - -void ByteCodeGenerator::writeIntrinsicCall(const FunctionCall& c) { - auto found = fIntrinsics.find(c.fFunction.fName); - if (found == fIntrinsics.end()) { - fErrors.error(c.fOffset, String::printf("Unsupported intrinsic: '%s'", - String(c.fFunction.fName).c_str())); - return; - } - int count = SlotCount(c.fArguments[0]->fType); - if (found->second.fIsSpecial) { - SpecialIntrinsic special = found->second.fValue.fSpecial; - switch (special) { - case SpecialIntrinsic::kDot: { - SkASSERT(c.fArguments.size() == 2); - SkASSERT(count == SlotCount(c.fArguments[1]->fType)); - this->write(vector_instruction(ByteCodeInstruction::kMultiplyF, count)); - this->write8(count); - for (int i = count; i > 1; --i) { - this->write(ByteCodeInstruction::kAddF); - this->write8(1); - } - break; - } - default: - SkASSERT(false); - } - } else { - switch (found->second.fValue.fInstruction) { - case ByteCodeInstruction::kCos: - case ByteCodeInstruction::kSin: - case ByteCodeInstruction::kTan: - SkASSERT(c.fArguments.size() > 0); - this->write(vector_instruction(found->second.fValue.fInstruction, count)); - this->write8(count); - break; - case ByteCodeInstruction::kSqrt: - SkASSERT(c.fArguments.size() > 0); - this->write(vector_instruction(found->second.fValue.fInstruction, count)); - break; - case ByteCodeInstruction::kInverse2x2: { - SkASSERT(c.fArguments.size() > 0); - auto op = ByteCodeInstruction::kInverse2x2; - switch (count) { - case 4: break; // float2x2 - case 9: op = ByteCodeInstruction::kInverse3x3; break; - case 16: op = ByteCodeInstruction::kInverse4x4; break; - default: SkASSERT(false); - } - this->write(op); - break; - } - default: - SkASSERT(false); - } - } -} - -void ByteCodeGenerator::writeFunctionCall(const FunctionCall& f) { - // Find the index of the function we're calling. We explicitly do not allow calls to functions - // before they're defined. This is an easy-to-understand rule that prevents recursion. - int idx = -1; - for (size_t i = 0; i < fFunctions.size(); ++i) { - if (f.fFunction.matches(fFunctions[i]->fDeclaration)) { - idx = i; - break; - } - } - if (idx == -1) { - for (const auto& arg : f.fArguments) { - this->writeExpression(*arg); - } - this->writeIntrinsicCall(f); - return; - } - - - if (idx > 255) { - fErrors.error(f.fOffset, "Function count limit exceeded"); - return; - } else if (idx >= (int) fFunctions.size()) { - fErrors.error(f.fOffset, "Call to undefined function"); - return; - } - - // We may need to deal with out parameters, so the sequence is tricky - if (int returnCount = SlotCount(f.fType)) { - this->write(ByteCodeInstruction::kReserve, returnCount); - this->write8(returnCount); - } - - int argCount = f.fArguments.size(); - std::vector> lvalues; - for (int i = 0; i < argCount; ++i) { - const auto& param = f.fFunction.fParameters[i]; - const auto& arg = f.fArguments[i]; - if (param->fModifiers.fFlags & Modifiers::kOut_Flag) { - lvalues.emplace_back(this->getLValue(*arg)); - lvalues.back()->load(); - } else { - this->writeExpression(*arg); - } - } - - // The space used by the call is based on the callee, but it also unwinds all of that before - // we continue execution. We adjust our max stack depths below. - this->write(ByteCodeInstruction::kCall); - this->write8(idx); - - const ByteCodeFunction* callee = fOutput->fFunctions[idx].get(); - fMaxLoopCount = std::max(fMaxLoopCount, fLoopCount + callee->fLoopCount); - fMaxConditionCount = std::max(fMaxConditionCount, fConditionCount + callee->fConditionCount); - fMaxStackCount = std::max(fMaxStackCount, fStackCount + callee->fLocalCount - + callee->fStackCount); - - // After the called function returns, the stack will still contain our arguments. We have to - // pop them (storing any out parameters back to their lvalues as we go). We glob together slot - // counts for all parameters that aren't out-params, so we can pop them in one big chunk. - int popCount = 0; - auto pop = [&]() { - if (popCount > 4) { - this->write(ByteCodeInstruction::kPopN, popCount); - this->write8(popCount); - } else if (popCount > 0) { - this->write(vector_instruction(ByteCodeInstruction::kPop, popCount)); - } - popCount = 0; - }; - - for (int i = argCount - 1; i >= 0; --i) { - const auto& param = f.fFunction.fParameters[i]; - const auto& arg = f.fArguments[i]; - if (param->fModifiers.fFlags & Modifiers::kOut_Flag) { - pop(); - lvalues.back()->store(true); - lvalues.pop_back(); - } else { - popCount += SlotCount(arg->fType); - } - } - pop(); -} - -void ByteCodeGenerator::writeIntLiteral(const IntLiteral& i) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(i.fValue); -} - -void ByteCodeGenerator::writeNullLiteral(const NullLiteral& n) { - // not yet implemented - abort(); -} - -bool ByteCodeGenerator::writePrefixExpression(const PrefixExpression& p, bool discard) { - switch (p.fOperator) { - case Token::Kind::PLUSPLUS: // fall through - case Token::Kind::MINUSMINUS: { - SkASSERT(SlotCount(p.fOperand->fType) == 1); - std::unique_ptr lvalue = this->getLValue(*p.fOperand); - lvalue->load(); - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(type_category(p.fType) == TypeCategory::kFloat ? float_to_bits(1.0f) : 1); - if (p.fOperator == Token::Kind::PLUSPLUS) { - this->writeTypedInstruction(p.fType, - ByteCodeInstruction::kAddI, - ByteCodeInstruction::kAddI, - ByteCodeInstruction::kAddF, - 1); - } else { - this->writeTypedInstruction(p.fType, - ByteCodeInstruction::kSubtractI, - ByteCodeInstruction::kSubtractI, - ByteCodeInstruction::kSubtractF, - 1); - } - lvalue->store(discard); - discard = false; - break; - } - case Token::Kind::MINUS: { - this->writeExpression(*p.fOperand); - this->writeTypedInstruction(p.fType, - ByteCodeInstruction::kNegateI, - ByteCodeInstruction::kNegateI, - ByteCodeInstruction::kNegateF, - SlotCount(p.fOperand->fType), - false); - break; - } - case Token::Kind::LOGICALNOT: - case Token::Kind::BITWISENOT: { - SkASSERT(SlotCount(p.fOperand->fType) == 1); - SkDEBUGCODE(TypeCategory tc = type_category(p.fOperand->fType)); - SkASSERT((p.fOperator == Token::Kind::LOGICALNOT && tc == TypeCategory::kBool) || - (p.fOperator == Token::Kind::BITWISENOT && (tc == TypeCategory::kSigned || - tc == TypeCategory::kUnsigned))); - this->writeExpression(*p.fOperand); - this->write(ByteCodeInstruction::kNotB); - break; - } - default: - SkASSERT(false); - } - return discard; -} - -bool ByteCodeGenerator::writePostfixExpression(const PostfixExpression& p, bool discard) { - switch (p.fOperator) { - case Token::Kind::PLUSPLUS: // fall through - case Token::Kind::MINUSMINUS: { - SkASSERT(SlotCount(p.fOperand->fType) == 1); - std::unique_ptr lvalue = this->getLValue(*p.fOperand); - lvalue->load(); - // If we're not supposed to discard the result, then make a copy *before* the +/- - if (!discard) { - this->write(ByteCodeInstruction::kDup); - this->write8(1); - } - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(type_category(p.fType) == TypeCategory::kFloat ? float_to_bits(1.0f) : 1); - if (p.fOperator == Token::Kind::PLUSPLUS) { - this->writeTypedInstruction(p.fType, - ByteCodeInstruction::kAddI, - ByteCodeInstruction::kAddI, - ByteCodeInstruction::kAddF, - 1); - } else { - this->writeTypedInstruction(p.fType, - ByteCodeInstruction::kSubtractI, - ByteCodeInstruction::kSubtractI, - ByteCodeInstruction::kSubtractF, - 1); - } - // Always consume the result as part of the store - lvalue->store(true); - discard = false; - break; - } - default: - SkASSERT(false); - } - return discard; -} - -void ByteCodeGenerator::writeSwizzle(const Swizzle& s) { - if (swizzle_is_simple(s)) { - this->writeVariableExpression(s); - return; - } - - switch (s.fBase->fKind) { case Expression::kVariableReference_Kind: { - Location location = this->getLocation(*s.fBase); - this->write(location.selectLoad(ByteCodeInstruction::kLoadSwizzle, - ByteCodeInstruction::kLoadSwizzleGlobal, - ByteCodeInstruction::kLoadSwizzleUniform), - s.fComponents.size()); - this->write8(location.fSlot); - this->write8(s.fComponents.size()); - for (int c : s.fComponents) { - this->write8(c); - } - break; + const Variable& var = ((const VariableReference&) expr).fVariable; + return var.fStorage; } default: - this->writeExpression(*s.fBase); - this->write(ByteCodeInstruction::kSwizzle, - s.fComponents.size() - s.fBase->fType.columns()); - this->write8(s.fBase->fType.columns()); - this->write8(s.fComponents.size()); - for (int c : s.fComponents) { - this->write8(c); - } - } -} - -void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t) { - int count = SlotCount(t.fType); - SkASSERT(count == SlotCount(t.fIfTrue->fType)); - SkASSERT(count == SlotCount(t.fIfFalse->fType)); - - this->writeExpression(*t.fTest); - this->write(ByteCodeInstruction::kMaskPush); - this->writeExpression(*t.fIfTrue); - this->write(ByteCodeInstruction::kMaskNegate); - this->writeExpression(*t.fIfFalse); - this->write(ByteCodeInstruction::kMaskBlend, count); - this->write8(count); -} - -void ByteCodeGenerator::writeExpression(const Expression& e, bool discard) { - switch (e.fKind) { - case Expression::kBinary_Kind: - discard = this->writeBinaryExpression((BinaryExpression&) e, discard); - break; - case Expression::kBoolLiteral_Kind: - this->writeBoolLiteral((BoolLiteral&) e); - break; - case Expression::kConstructor_Kind: - this->writeConstructor((Constructor&) e); - break; - case Expression::kExternalFunctionCall_Kind: - this->writeExternalFunctionCall((ExternalFunctionCall&) e); - break; - case Expression::kExternalValue_Kind: - this->writeExternalValue((ExternalValueReference&) e); - break; - case Expression::kFieldAccess_Kind: - case Expression::kIndex_Kind: - case Expression::kVariableReference_Kind: - this->writeVariableExpression(e); - break; - case Expression::kFloatLiteral_Kind: - this->writeFloatLiteral((FloatLiteral&) e); - break; - case Expression::kFunctionCall_Kind: - this->writeFunctionCall((FunctionCall&) e); - break; - case Expression::kIntLiteral_Kind: - this->writeIntLiteral((IntLiteral&) e); - break; - case Expression::kNullLiteral_Kind: - this->writeNullLiteral((NullLiteral&) e); - break; - case Expression::kPrefix_Kind: - discard = this->writePrefixExpression((PrefixExpression&) e, discard); - break; - case Expression::kPostfix_Kind: - discard = this->writePostfixExpression((PostfixExpression&) e, discard); - break; - case Expression::kSwizzle_Kind: - this->writeSwizzle((Swizzle&) e); - break; - case Expression::kTernary_Kind: - this->writeTernaryExpression((TernaryExpression&) e); - break; - default: -#ifdef SK_DEBUG - printf("unsupported expression %s\n", e.description().c_str()); -#endif SkASSERT(false); - } - if (discard) { - int count = SlotCount(e.fType); - if (count > 4) { - this->write(ByteCodeInstruction::kPopN, count); - this->write8(count); - } else if (count != 0) { - this->write(vector_instruction(ByteCodeInstruction::kPop, count)); - } - discard = false; + return Variable::kLocal_Storage; } } -class ByteCodeExternalValueLValue : public ByteCodeGenerator::LValue { -public: - ByteCodeExternalValueLValue(ByteCodeGenerator* generator, ExternalValue& value, int index) - : INHERITED(*generator) - , fCount(ByteCodeGenerator::SlotCount(value.type())) - , fIndex(index) {} +ByteCode::Instruction ByteCodeGenerator::getLoadInstruction(ByteCodeGenerator::Location location, + Variable::Storage storage) { + switch (storage) { + case Variable::kGlobal_Storage: + switch (location.fKind) { + case Location::kPointer_Kind: return ByteCode::Instruction::kLoadDirect; + case Location::kRegister_Kind: return ByteCode::Instruction::kLoad; + } + case Variable::kParameter_Storage: + switch (location.fKind) { + case Location::kPointer_Kind: return ByteCode::Instruction::kLoadParameterDirect; + case Location::kRegister_Kind: return ByteCode::Instruction::kLoadParameter; + } + case Variable::kLocal_Storage: + switch (location.fKind) { + case Location::kPointer_Kind: return ByteCode::Instruction::kLoadStackDirect; + case Location::kRegister_Kind: return ByteCode::Instruction::kLoadStack; + } + default: + break; + } + SkASSERT(false); + return ByteCode::Instruction::kNop; +} - void load() override { - fGenerator.write(vector_instruction(ByteCodeInstruction::kReadExternal, fCount)); - fGenerator.write8(fCount); - fGenerator.write8(fIndex); +ByteCode::Instruction ByteCodeGenerator::getStoreInstruction(ByteCodeGenerator::Location location, + Variable::Storage storage) { + switch (storage) { + case Variable::kGlobal_Storage: + switch (location.fKind) { + case Location::kPointer_Kind: return ByteCode::Instruction::kStoreDirect; + case Location::kRegister_Kind: return ByteCode::Instruction::kStore; + } + case Variable::kParameter_Storage: + switch (location.fKind) { + case Location::kPointer_Kind: return ByteCode::Instruction::kStoreParameterDirect; + case Location::kRegister_Kind: return ByteCode::Instruction::kStoreParameter; + } + case Variable::kLocal_Storage: + switch (location.fKind) { + case Location::kPointer_Kind: return ByteCode::Instruction::kStoreStackDirect; + case Location::kRegister_Kind: return ByteCode::Instruction::kStoreStack; + } + default: + break; + } + SkASSERT(false); + return ByteCode::Instruction::kNop; +} + +class ByteCodeSimpleLValue : public ByteCodeGenerator::LValue { +public: + ByteCodeSimpleLValue(ByteCodeGenerator* generator, ByteCodeGenerator::Location location, + int count, ByteCode::Instruction load, ByteCode::Instruction store) + : INHERITED(*generator) + , fLocation(location) + , fCount(count) + , fLoad(load) + , fStore(store) {} + + void load(ByteCode::Register result) override { + for (int i = 0; i < fCount; ++i) { + ByteCodeGenerator::Location final = fLocation.offset(fGenerator, i); + fGenerator.write(fLoad); + fGenerator.write(result + i); + fGenerator.write(final); + } } - void store(bool discard) override { - if (!discard) { - fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, fCount)); - fGenerator.write8(fCount); + void store(ByteCode::Register src) override { + for (int i = 0; i < fCount; ++i) { + ByteCodeGenerator::Location final = fLocation.offset(fGenerator, i); + fGenerator.write(fStore); + fGenerator.write(final); + fGenerator.write(src + i); } - fGenerator.write(vector_instruction(ByteCodeInstruction::kWriteExternal, fCount)); - fGenerator.write8(fCount); - fGenerator.write8(fIndex); } private: - typedef LValue INHERITED; + ByteCodeGenerator::Location fLocation; int fCount; - int fIndex; + ByteCode::Instruction fLoad; + + ByteCode::Instruction fStore; + + typedef ByteCodeGenerator::LValue INHERITED; }; class ByteCodeSwizzleLValue : public ByteCodeGenerator::LValue { public: - ByteCodeSwizzleLValue(ByteCodeGenerator* generator, const Swizzle& swizzle) + ByteCodeSwizzleLValue(ByteCodeGenerator* generator, const Swizzle* swizzle) : INHERITED(*generator) - , fSwizzle(swizzle) {} + , fSwizzle(*swizzle) {} - void load() override { - fGenerator.writeSwizzle(fSwizzle); + void load(ByteCode::Register result) override { + fGenerator.writeSwizzle(fSwizzle, result); } - void store(bool discard) override { - int count = fSwizzle.fComponents.size(); - if (!discard) { - fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count)); - fGenerator.write8(count); - } - ByteCodeGenerator::Location location = fGenerator.getLocation(*fSwizzle.fBase); - if (location.isOnStack()) { - fGenerator.write(location.selectStore(ByteCodeInstruction::kStoreSwizzleIndirect, - ByteCodeInstruction::kStoreSwizzleIndirectGlobal), - count); - } else { - fGenerator.write(location.selectStore(ByteCodeInstruction::kStoreSwizzle, - ByteCodeInstruction::kStoreSwizzleGlobal), - count); - fGenerator.write8(location.fSlot); - } - fGenerator.write8(count); - for (int c : fSwizzle.fComponents) { - fGenerator.write8(c); + void store(ByteCode::Register src) override { + ByteCodeGenerator::Location target = fGenerator.getLocation(*fSwizzle.fBase); + ByteCode::Instruction inst = fGenerator.getStoreInstruction( + target, + fGenerator.getStorage(*fSwizzle.fBase)); + for (size_t i = 0; i < fSwizzle.fComponents.size(); ++i) { + ByteCodeGenerator::Location final = target.offset(fGenerator, fSwizzle.fComponents[i]); + fGenerator.write(inst); + fGenerator.write(final); + fGenerator.write(src + i); } } private: const Swizzle& fSwizzle; - typedef LValue INHERITED; + typedef ByteCodeGenerator::LValue INHERITED; }; -class ByteCodeExpressionLValue : public ByteCodeGenerator::LValue { +class ByteCodeExternalValueLValue : public ByteCodeGenerator::LValue { public: - ByteCodeExpressionLValue(ByteCodeGenerator* generator, const Expression& expr) + ByteCodeExternalValueLValue(ByteCodeGenerator* generator, ExternalValue& value, int index) : INHERITED(*generator) - , fExpression(expr) {} - - void load() override { - fGenerator.writeVariableExpression(fExpression); + , fIndex(index) + , fSlotCount(ByteCodeGenerator::SlotCount(value.type())) { + SkASSERT(fSlotCount <= 4); } - void store(bool discard) override { - int count = ByteCodeGenerator::SlotCount(fExpression.fType); - if (!discard) { - if (count > 4) { - fGenerator.write(ByteCodeInstruction::kDupN, count); - fGenerator.write8(count); - } else { - fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count)); - fGenerator.write8(count); - } - } - ByteCodeGenerator::Location location = fGenerator.getLocation(fExpression); - if (location.isOnStack() || count > 4) { - if (!location.isOnStack()) { - fGenerator.write(ByteCodeInstruction::kPushImmediate); - fGenerator.write32(location.fSlot); - } - fGenerator.write(location.selectStore(ByteCodeInstruction::kStoreExtended, - ByteCodeInstruction::kStoreExtendedGlobal), - count); - fGenerator.write8(count); - } else { - fGenerator.write( - vector_instruction(location.selectStore(ByteCodeInstruction::kStore, - ByteCodeInstruction::kStoreGlobal), - count)); - fGenerator.write8(location.fSlot); - } + void load(ByteCode::Register result) override { + fGenerator.write(ByteCode::Instruction::kReadExternal); + fGenerator.write(result); + fGenerator.write((uint8_t) fSlotCount); + fGenerator.write((uint8_t) fIndex); + } + + void store(ByteCode::Register src) override { + fGenerator.write(ByteCode::Instruction::kWriteExternal); + fGenerator.write((uint8_t) fIndex); + fGenerator.write((uint8_t) fSlotCount); + fGenerator.write(src); } private: typedef LValue INHERITED; - const Expression& fExpression; + int fIndex; + + int fSlotCount; }; -std::unique_ptr ByteCodeGenerator::getLValue(const Expression& e) { - switch (e.fKind) { +std::unique_ptr ByteCodeGenerator::getLValue(const Expression& expr) { + switch (expr.fKind) { case Expression::kExternalValue_Kind: { - ExternalValue* value = ((ExternalValueReference&) e).fValue; + ExternalValue* value = ((ExternalValueReference&) expr).fValue; int index = fOutput->fExternalValues.size(); fOutput->fExternalValues.push_back(value); SkASSERT(index <= 255); @@ -1460,169 +382,954 @@ std::unique_ptr ByteCodeGenerator::getLValue(const Ex } case Expression::kFieldAccess_Kind: case Expression::kIndex_Kind: - case Expression::kVariableReference_Kind: - return std::unique_ptr(new ByteCodeExpressionLValue(this, e)); - case Expression::kSwizzle_Kind: { - const Swizzle& s = (const Swizzle&) e; - return swizzle_is_simple(s) - ? std::unique_ptr(new ByteCodeExpressionLValue(this, e)) - : std::unique_ptr(new ByteCodeSwizzleLValue(this, s)); + case Expression::kVariableReference_Kind: { + Location location = this->getLocation(expr); + Variable::Storage storage = this->getStorage(expr); + ByteCode::Instruction loadInst = this->getLoadInstruction(location, storage); + ByteCode::Instruction storeInst = this->getStoreInstruction(location, storage); + return std::unique_ptr(new ByteCodeSimpleLValue(this, location, + SlotCount(expr.fType), + loadInst, storeInst)); } + case Expression::kSwizzle_Kind: + return std::unique_ptr(new ByteCodeSwizzleLValue(this, &(Swizzle&) expr)); + default: + ABORT("unsupported lvalue\n"); + } +} + +ByteCode::Register ByteCodeGenerator::next(int count) { + SkASSERT(fNextRegister + count <= ByteCode::kRegisterMax); + fNextRegister += count; + return ByteCode::Register{(uint16_t) (fNextRegister - count)}; +} + +static TypeCategory type_category(const Type& type) { + switch (type.kind()) { + case Type::Kind::kVector_Kind: + case Type::Kind::kMatrix_Kind: + return type_category(type.componentType()); + default: + String name = type.displayName(); + if (name == "bool") { + return TypeCategory::kBool; + } else if (name == "int" || name == "short") { + return TypeCategory::kSigned; + } else if (name == "uint" || name == "ushort") { + return TypeCategory::kUnsigned; + } else { + SkASSERT(name == "float" || name == "half"); + return TypeCategory::kFloat; + } + ABORT("unsupported type: %s\n", name.c_str()); + } +} + +void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCode::Instruction s, + ByteCode::Instruction u, ByteCode::Instruction f) { + switch (type_category(type)) { + case TypeCategory::kSigned: + this->write(s); + break; + case TypeCategory::kUnsigned: + this->write(u); + break; + case TypeCategory::kFloat: { + this->write(f); + break; + } + default: + SkASSERT(false); + } +} + +void ByteCodeGenerator::writeBinaryInstruction(const Type& operandType, + ByteCode::Register left, + ByteCode::Register right, + ByteCode::Instruction s, + ByteCode::Instruction u, + ByteCode::Instruction f, + ByteCode::Register result) { + for (int i = 0; i < SlotCount(operandType); ++i) { + this->writeTypedInstruction(operandType, s, u, f); + this->write(result + i); + this->write(left + i); + this->write(right + i); + } +} + +void ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b, + ByteCode::Register result) { + if (b.fOperator == Token::Kind::EQ) { + std::unique_ptr lvalue = this->getLValue(*b.fLeft); + this->writeExpression(*b.fRight, result); + lvalue->store(result); + return; + } + const Type& lType = b.fLeft->fType; + const Type& rType = b.fRight->fType; + bool lVecOrMtx = (lType.kind() == Type::kVector_Kind || lType.kind() == Type::kMatrix_Kind); + bool rVecOrMtx = (rType.kind() == Type::kVector_Kind || rType.kind() == Type::kMatrix_Kind); + const Type* operandType; + if (!lVecOrMtx && rVecOrMtx) { + operandType = &rType; + } else { + operandType = &lType; + } + Token::Kind op; + std::unique_ptr lvalue; + ByteCode::Register left; + switch (b.fOperator) { + case Token::Kind::LOGICALAND: + case Token::Kind::LOGICALANDEQ: + case Token::Kind::LOGICALOR: + case Token::Kind::LOGICALOREQ: + left = result; + break; + default: + left = this->next(SlotCount(*operandType)); + } + if (is_assignment(b.fOperator)) { + lvalue = this->getLValue(*b.fLeft); + lvalue->load(left); + op = remove_assignment(b.fOperator); + } else { + this->writeExpression(*b.fLeft, left); + op = b.fOperator; + if (!lVecOrMtx && rVecOrMtx) { + for (int i = 1; i < SlotCount(rType); ++i) { + this->write(ByteCode::Instruction::kCopy); + this->write(left + i); + this->write(left); + } + } + } + SkDEBUGCODE(TypeCategory tc = type_category(lType)); + int count = std::max(SlotCount(lType), SlotCount(rType)); + switch (op) { + case Token::Kind::LOGICALAND: { + SkASSERT(left.fIndex == result.fIndex); + this->write(ByteCode::Instruction::kMaskPush); + ++fConditionCount; + this->write(left); + this->write(ByteCode::Instruction::kBranchIfAllFalse); + DeferredLocation falseLocation(this); + SkASSERT(SlotCount(b.fRight->fType) == 1); + ByteCode::Register right = this->next(1); + this->writeExpression(*b.fRight, right); + this->write(ByteCode::Instruction::kAnd); + this->write(result); + this->write(left); + this->write(right); + falseLocation.set(); + --fConditionCount; + this->write(ByteCode::Instruction::kMaskPop); + return; + } + case Token::Kind::LOGICALOR: { + SkASSERT(left.fIndex == result.fIndex); + ByteCode::Register mask = this->next(1); + this->write(ByteCode::Instruction::kNot); + this->write(mask); + this->write(left); + this->write(ByteCode::Instruction::kMaskPush); + ++fConditionCount; + this->write(mask); + this->write(ByteCode::Instruction::kBranchIfAllFalse); + DeferredLocation falseLocation(this); + SkASSERT(SlotCount(b.fRight->fType) == 1); + ByteCode::Register right = this->next(1); + this->writeExpression(*b.fRight, right); + this->write(ByteCode::Instruction::kOr); + this->write(result); + this->write(left); + this->write(right); + falseLocation.set(); + --fConditionCount; + this->write(ByteCode::Instruction::kMaskPop); + return; + } + case Token::Kind::SHL: + case Token::Kind::SHR: { + SkASSERT(count == 1 && (tc == SkSL::TypeCategory::kSigned || + tc == SkSL::TypeCategory::kUnsigned)); + if (!b.fRight->isConstant()) { + fErrors.error(b.fRight->fOffset, "Shift amounts must be constant"); + return; + } + int64_t shift = b.fRight->getConstantInt(); + if (shift < 0 || shift > 31) { + fErrors.error(b.fRight->fOffset, "Shift amount out of range"); + return; + } + + if (op == Token::Kind::SHL) { + this->write(ByteCode::Instruction::kShiftLeft); + } else { + this->write(type_category(lType) == TypeCategory::kSigned + ? ByteCode::Instruction::kShiftRightS + : ByteCode::Instruction::kShiftRightU); + } + this->write(result); + this->write(left); + this->write((uint8_t) shift); + return; + } + case Token::Kind::STAR: + // Special case for M*V, V*M, M*M (but not V*V!) + if (lType.columns() > 1 && rType.columns() > 1 && + (lType.rows() > 1 || rType.rows() > 1)) { + ByteCode::Register right = this->next(SlotCount(rType)); + this->writeExpression(*b.fRight, right); + int rCols = rType.columns(), + rRows = rType.rows(), + lCols = lType.columns(), + lRows = lType.rows(); + // M*V treats the vector as a column + if (rType.kind() == Type::kVector_Kind) { + std::swap(rCols, rRows); + } + SkASSERT(lCols == rRows); + SkASSERT(SlotCount(b.fType) == lRows * rCols); + this->write(ByteCode::Instruction::kMatrixMultiply); + this->write(result); + this->write(left); + this->write(right); + this->write((uint8_t) lCols); + this->write((uint8_t) lRows); + this->write((uint8_t) rCols); + return; + } + + default: + break; + } + ByteCode::Register right = this->next(SlotCount(*operandType)); + this->writeExpression(*b.fRight, right); + if (lVecOrMtx && !rVecOrMtx) { + for (int i = 1; i < SlotCount(*operandType); ++i) { + this->write(ByteCode::Instruction::kCopy); + this->write(right + i); + this->write(right); + } + } + switch (op) { + case Token::Kind::EQEQ: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kCompareEQI, + ByteCode::Instruction::kCompareEQI, + ByteCode::Instruction::kCompareEQF, + result); + // Collapse to a single bool + for (int i = 1; i < count; ++i) { + this->write(ByteCode::Instruction::kAnd); + this->write(result); + this->write(result); + this->write(result + i); + } + break; + case Token::Kind::GT: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kCompareGTS, + ByteCode::Instruction::kCompareGTU, + ByteCode::Instruction::kCompareGTF, + result); + break; + case Token::Kind::GTEQ: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kCompareGTEQS, + ByteCode::Instruction::kCompareGTEQU, + ByteCode::Instruction::kCompareGTEQF, + result); + break; + case Token::Kind::LT: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kCompareLTS, + ByteCode::Instruction::kCompareLTU, + ByteCode::Instruction::kCompareLTF, + result); + break; + case Token::Kind::LTEQ: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kCompareLTEQS, + ByteCode::Instruction::kCompareLTEQU, + ByteCode::Instruction::kCompareLTEQF, + result); + break; + case Token::Kind::MINUS: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kSubtractI, + ByteCode::Instruction::kSubtractI, + ByteCode::Instruction::kSubtractF, + result); + break; + case Token::Kind::NEQ: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kCompareNEQI, + ByteCode::Instruction::kCompareNEQI, + ByteCode::Instruction::kCompareNEQF, + result); + // Collapse to a single bool + for (int i = 1; i < count; ++i) { + this->write(ByteCode::Instruction::kOr); + this->write(result); + this->write(result); + this->write(result + i); + } + break; + case Token::Kind::PERCENT: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kRemainderS, + ByteCode::Instruction::kRemainderU, + ByteCode::Instruction::kRemainderF, + result); + break; + case Token::Kind::PLUS: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kAddI, + ByteCode::Instruction::kAddI, + ByteCode::Instruction::kAddF, + result); + break; + case Token::Kind::SLASH: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kDivideS, + ByteCode::Instruction::kDivideU, + ByteCode::Instruction::kDivideF, + result); + break; + case Token::Kind::STAR: + this->writeBinaryInstruction(*operandType, left, right, + ByteCode::Instruction::kMultiplyI, + ByteCode::Instruction::kMultiplyI, + ByteCode::Instruction::kMultiplyF, + result); + break; + case Token::Kind::LOGICALXOR: { + SkASSERT(tc == SkSL::TypeCategory::kBool); + this->write(ByteCode::Instruction::kXor); + this->write(result); + this->write(left); + this->write(right); + break; + } + case Token::Kind::BITWISEAND: { + SkASSERT(tc == SkSL::TypeCategory::kSigned || tc == SkSL::TypeCategory::kUnsigned); + this->write(ByteCode::Instruction::kAnd); + this->write(result); + this->write(left); + this->write(right); + break; + } + case Token::Kind::BITWISEOR: { + SkASSERT(tc == SkSL::TypeCategory::kSigned || tc == SkSL::TypeCategory::kUnsigned); + this->write(ByteCode::Instruction::kOr); + this->write(result); + this->write(left); + this->write(right); + break; + } + case Token::Kind::BITWISEXOR: { + SkASSERT(tc == SkSL::TypeCategory::kSigned || tc == SkSL::TypeCategory::kUnsigned); + this->write(ByteCode::Instruction::kXor); + this->write(result); + this->write(left); + this->write(right); + break; + } + default: + fErrors.error(b.fOffset, SkSL::String::printf("Unsupported binary operator '%s'", + Compiler::OperatorName(op))); + break; + } + if (lvalue) { + lvalue->store(result); + } +} + +void ByteCodeGenerator::writeConstructor(const Constructor& c, ByteCode::Register result) { + if (c.fType.rows() > 1) { + if (c.fArguments.size() == 1) { + if (SlotCount(c.fArguments[0]->fType) == 1) { + ByteCode::Register v = this->next(1); + this->writeExpression(*c.fArguments[0], v); + this->write(ByteCode::Instruction::kScalarToMatrix); + this->write(result); + this->write(v); + this->write((uint8_t) c.fType.columns()); + this->write((uint8_t) c.fType.rows()); + return; + } else if (c.fArguments[0]->fType.rows() > 1) { + ByteCode::Register v = this->next(SlotCount(c.fArguments[0]->fType)); + this->writeExpression(*c.fArguments[0], v); + this->write(ByteCode::Instruction::kMatrixToMatrix); + this->write(result); + this->write(v); + this->write((uint8_t) c.fArguments[0]->fType.columns()); + this->write((uint8_t) c.fArguments[0]->fType.rows()); + this->write((uint8_t) c.fType.columns()); + this->write((uint8_t) c.fType.rows()); + return; + } + } + int offset = 0; + for (const auto& arg : c.fArguments) { + this->writeExpression(*arg, ByteCode::Register{(uint16_t) (result.fIndex + offset)}); + offset += SlotCount(arg->fType); + } + return; + } + if (c.fArguments.size() == 1 && c.fArguments[0]->fType.columns() == 1 && + c.fType.columns() > 1) { + SkASSERT(SlotCount(c.fArguments[0]->fType) == 1); + ByteCode::Register v = result; + this->writeExpression(*c.fArguments[0], v); + for (int i = 1; i < c.fType.columns(); ++i) { + this->write(ByteCode::Instruction::kCopy); + this->write(v + i); + this->write(v); + } + return; + } + ByteCode::Instruction inst; + switch (type_category(c.fArguments[0]->fType)) { + case TypeCategory::kSigned: + if (type_category(c.fType) == TypeCategory::kFloat) { + inst = ByteCode::Instruction::kSignedToFloat; + } else { + inst = ByteCode::Instruction::kNop; + } + break; + case TypeCategory::kUnsigned: + if (type_category(c.fType) == TypeCategory::kFloat) { + inst = ByteCode::Instruction::kUnsignedToFloat; + } else { + inst = ByteCode::Instruction::kNop; + } + break; + case TypeCategory::kFloat: + if (type_category(c.fType) == TypeCategory::kSigned) { + inst = ByteCode::Instruction::kFloatToSigned; + } else if (type_category(c.fType) == TypeCategory::kUnsigned) { + inst = ByteCode::Instruction::kFloatToUnsigned; + } else { + inst = ByteCode::Instruction::kNop; + } + break; + default: + SkASSERT(false); + return; + } + ByteCode::Register values; + if (inst == ByteCode::Instruction::kNop) { + values = result; + } else { + values = this->next(SlotCount(c.fType)); + } + ByteCode::Register v = values; + for (size_t i = 0; i < c.fArguments.size(); ++i) { + this->writeExpression(*c.fArguments[i], v); + v.fIndex += SlotCount(c.fArguments[i]->fType); + } + if (inst != ByteCode::Instruction::kNop) { + v = values; + ByteCode::Register target = result; + for (size_t i = 0; i < c.fArguments.size(); ++i) { + int count = SlotCount(c.fArguments[i]->fType); + for (int j = 0; j < count; ++j) { + this->write(inst); + this->write(target); + ++target.fIndex; + this->write(v + j); + } + } + } +} + +void ByteCodeGenerator::writeExternalFunctionCall(const ExternalFunctionCall& f, + ByteCode::Register result) { + int argumentCount = 0; + for (const auto& arg : f.fArguments) { + argumentCount += SlotCount(arg->fType); + } + ByteCode::Register args = this->next(argumentCount); + argumentCount = 0; + for (const auto& arg : f.fArguments) { + this->writeExpression(*arg, args + argumentCount); + argumentCount += SlotCount(arg->fType); + } + this->write(ByteCode::Instruction::kCallExternal); + this->write(result); + int index = fOutput->fExternalValues.size(); + fOutput->fExternalValues.push_back(f.fFunction); + SkASSERT(index <= 255); + this->write((uint8_t) index); + SkASSERT(SlotCount(f.fType) <= 255); + this->write((uint8_t) SlotCount(f.fType)); + this->write(args); + SkASSERT(argumentCount <= 255); + this->write((uint8_t) argumentCount); +} + +void ByteCodeGenerator::writeExternalValue(const ExternalValueReference& e, + ByteCode::Register result) { + this->write(ByteCode::Instruction::kReadExternal); + this->write(result); + this->write((uint8_t) SlotCount(e.fValue->type())); + int index = fOutput->fExternalValues.size(); + fOutput->fExternalValues.push_back(e.fValue); + SkASSERT(index <= 255); + this->write((uint8_t) index); +} + +void ByteCodeGenerator::writeIntrinsicCall(const FunctionCall& c, Intrinsic intrinsic, + ByteCode::Register result) { + if (intrinsic.fIsSpecial) { + switch (intrinsic.fValue.fSpecial) { + case SpecialIntrinsic::kDot: { + SkASSERT(c.fArguments.size() == 2); + int count = SlotCount(c.fArguments[0]->fType); + ByteCode::Register left = this->next(count); + this->writeExpression(*c.fArguments[0], left); + ByteCode::Register right = this->next(count); + this->writeExpression(*c.fArguments[1], right); + ByteCode::Register product = this->next(count); + for (int i = 0; i < count; ++i) { + this->writeTypedInstruction(c.fType, + ByteCode::Instruction::kMultiplyI, + ByteCode::Instruction::kMultiplyI, + ByteCode::Instruction::kMultiplyF); + this->write(product + i); + this->write(left + i); + this->write(right + i); + } + ByteCode::Register total = product; + for (int i = 1; i < count; ++i) { + this->writeTypedInstruction(c.fType, + ByteCode::Instruction::kAddI, + ByteCode::Instruction::kAddI, + ByteCode::Instruction::kAddF); + ByteCode::Register sum = i == count - 1 ? result : this->next(1); + this->write(sum); + this->write(total); + this->write(product + i); + total = sum; + } + break; + } + case SpecialIntrinsic::kInverse: { + SkASSERT(c.fArguments.size() == 1); + int count = SlotCount(c.fArguments[0]->fType); + ByteCode::Register arg = this->next(count); + this->writeExpression(*c.fArguments[0], arg); + switch (SlotCount(c.fArguments[0]->fType)) { + case 4: this->write(ByteCode::Instruction::kInverse2x2); break; + case 9: this->write(ByteCode::Instruction::kInverse3x3); break; + case 16: this->write(ByteCode::Instruction::kInverse4x4); break; + default: SkASSERT(false); + } + this->write(result); + this->write(arg); + break; + } + } + } else { + std::vector argRegs; + for (const auto& expr : c.fArguments) { + ByteCode::Register reg = this->next(SlotCount(expr->fType)); + this->writeExpression(*expr, reg); + argRegs.push_back(reg); + } + this->write(intrinsic.fValue.fInstruction); + if (c.fType.fName != "void") { + this->write(result); + } + for (ByteCode::Register arg : argRegs) { + this->write(arg); + } + } +} + +void ByteCodeGenerator::writeFunctionCall(const FunctionCall& c, ByteCode::Register result) { + auto found = fIntrinsics.find(c.fFunction.fName); + if (found != fIntrinsics.end()) { + return this->writeIntrinsicCall(c, found->second, result); + } + int argCount = c.fArguments.size(); + std::vector> lvalues; + int parameterSlotCount = 0; + for (const auto& p : c.fFunction.fParameters) { + parameterSlotCount += SlotCount(p->fType); + } + ByteCode::Register argStart = this->next(parameterSlotCount); + ByteCode::Register nextArg = argStart; + for (int i = 0; i < argCount; ++i) { + const auto& param = c.fFunction.fParameters[i]; + const auto& arg = c.fArguments[i]; + if (param->fModifiers.fFlags & Modifiers::kOut_Flag) { + lvalues.emplace_back(this->getLValue(*arg)); + lvalues.back()->load(nextArg); + } else { + this->writeExpression(*arg, nextArg); + } + nextArg.fIndex += SlotCount(arg->fType); + } + // Find the index of the function we're calling. We explicitly do not allow calls to functions + // before they're defined. This is an easy-to-understand rule that prevents recursion. + size_t idx; + for (idx = 0; idx < fFunctions.size(); ++idx) { + if (c.fFunction.matches(fFunctions[idx]->fDeclaration)) { + break; + } + } + if (idx > 255) { + fErrors.error(c.fOffset, "Function count limit exceeded"); + return; + } else if (idx >= fOutput->fFunctions.size()) { + fErrors.error(c.fOffset, "Call to undefined function"); + return; + } + + this->write(ByteCode::Instruction::kCall); + this->write(result); + this->write((uint8_t) idx); + this->write(argStart); + nextArg = argStart; + auto lvalue = lvalues.begin(); + for (int i = 0; i < argCount; ++i) { + const auto& param = c.fFunction.fParameters[i]; + if (param->fModifiers.fFlags & Modifiers::kOut_Flag) { + (*(lvalue++))->store(nextArg); + } + nextArg.fIndex += SlotCount(param->fType); + } +} + +void ByteCodeGenerator::incOrDec(Token::Kind op, Expression& operand, bool prefix, + ByteCode::Register result) { + SkASSERT(op == Token::Kind::PLUSPLUS || op == Token::Kind::MINUSMINUS); + std::unique_ptr lvalue = this->getLValue(operand); + SkASSERT(SlotCount(operand.fType) == 1); + ByteCode::Register value; + if (prefix) { + value = this->next(1); + } else { + value = result; + } + lvalue->load(value); + ByteCode::Register one = this->next(1); + this->write(ByteCode::Instruction::kImmediate); + this->write(one); + if (type_category(operand.fType) == TypeCategory::kFloat) { + this->write(ByteCode::Immediate(1.0f)); + } else { + this->write(ByteCode::Immediate((int32_t) 1)); + } + if (op == Token::Kind::PLUSPLUS) { + this->writeTypedInstruction(operand.fType, + ByteCode::Instruction::kAddI, + ByteCode::Instruction::kAddI, + ByteCode::Instruction::kAddF); + } else { + this->writeTypedInstruction(operand.fType, + ByteCode::Instruction::kSubtractI, + ByteCode::Instruction::kSubtractI, + ByteCode::Instruction::kSubtractF); + } + if (prefix) { + this->write(result); + this->write(value); + this->write(one); + lvalue->store(result); + } else { + ByteCode::Register temp = this->next(1); + this->write(temp); + this->write(value); + this->write(one); + lvalue->store(temp); + } +} + +void ByteCodeGenerator::writePostfixExpression(const PostfixExpression& p, + ByteCode::Register result) { + this->incOrDec(p.fOperator, *p.fOperand, false, result); +} + +void ByteCodeGenerator::writePrefixExpression(const PrefixExpression& p, + ByteCode::Register result) { + switch (p.fOperator) { + case Token::Kind::PLUSPLUS: + case Token::Kind::MINUSMINUS: { + return this->incOrDec(p.fOperator, *p.fOperand, true, result); + } + case Token::Kind::MINUS: { + ByteCode::Register src = this->next(SlotCount(p.fType)); + this->writeExpression(*p.fOperand, src); + for (int i = 0; i < SlotCount(p.fType); ++i) { + this->writeTypedInstruction(p.fType, + ByteCode::Instruction::kNegateS, + ByteCode::Instruction::kNegateS, + ByteCode::Instruction::kNegateF); + this->write(result + i); + this->write(src + i); + } + break; + } + case Token::Kind::LOGICALNOT: + case Token::Kind::BITWISENOT: { + ByteCode::Register src = this->next(SlotCount(p.fType)); + this->writeExpression(*p.fOperand, src); + for (int i = 0; i < SlotCount(p.fType); ++i) { + this->write(ByteCode::Instruction::kNot); + this->write(result + i); + this->write(src + i); + } + break; + } + default: + SkASSERT(false); + } +} + +void ByteCodeGenerator::writeSwizzle(const Swizzle& s, ByteCode::Register result) { + if (swizzle_is_simple(s)) { + this->writeVariableExpression(s, result); + return; + } + ByteCode::Register base = this->writeExpression(*s.fBase); + for (int i = 0; i < (int) s.fComponents.size(); ++i) { + this->write(ByteCode::Instruction::kCopy); + this->write(result + i); + this->write(base + s.fComponents[i]); + } +} + +void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t, + ByteCode::Register result) { + int count = SlotCount(t.fType); + SkASSERT(count == SlotCount(t.fIfTrue->fType)); + SkASSERT(count == SlotCount(t.fIfFalse->fType)); + + ByteCode::Register test = this->writeExpression(*t.fTest); + this->write(ByteCode::Instruction::kMaskPush); + ++fConditionCount; + this->write(test); + ByteCode::Register ifTrue = this->writeExpression(*t.fIfTrue); + this->write(ByteCode::Instruction::kMaskNegate); + ByteCode::Register ifFalse = this->writeExpression(*t.fIfFalse); + --fConditionCount; + this->write(ByteCode::Instruction::kMaskPop); + for (int i = 0; i < count; ++i) { + this->write(ByteCode::Instruction::kSelect); + this->write(result + i); + this->write(test); + this->write(ifTrue + i); + this->write(ifFalse + i); + } +} + +void ByteCodeGenerator::writeVariableExpression(const Expression& expr, + ByteCode::Register result) { + ByteCodeGenerator::Location location = this->getLocation(expr); + int count = SlotCount(expr.fType); + for (int i = 0; i < count; ++i) { + ByteCodeGenerator::Location final = location.offset(*this, i); + this->write(this->getLoadInstruction(location, this->getStorage(expr))); + this->write(result + i); + this->write(final); + } +} + +void ByteCodeGenerator::writeExpression(const Expression& expr, ByteCode::Register result) { + switch (expr.fKind) { + case Expression::kBoolLiteral_Kind: { + this->write(ByteCode::Instruction::kImmediate); + this->write(result); + this->write(ByteCode::Immediate((int32_t) (((BoolLiteral&) expr).fValue ? -1 : 0))); + break; + } + case Expression::kBinary_Kind: { + this->writeBinaryExpression((BinaryExpression&) expr, result); + break; + } + case Expression::kConstructor_Kind: { + this->writeConstructor((Constructor&) expr, result); + break; + } + case Expression::kExternalFunctionCall_Kind: + this->writeExternalFunctionCall((ExternalFunctionCall&) expr, result); + break; + case Expression::kExternalValue_Kind: + this->writeExternalValue((ExternalValueReference&) expr, result); + break; + case Expression::kFloatLiteral_Kind: { + this->write(ByteCode::Instruction::kImmediate); + this->write(result); + this->write(ByteCode::Immediate((float) ((FloatLiteral&) expr).fValue)); + break; + } + case Expression::kFunctionCall_Kind: { + this->writeFunctionCall((FunctionCall&) expr, result); + break; + } + case Expression::kIntLiteral_Kind: { + this->write(ByteCode::Instruction::kImmediate); + this->write(result); + this->write(ByteCode::Immediate((int32_t) ((IntLiteral&) expr).fValue)); + break; + } + case Expression::kPostfix_Kind: + this->writePostfixExpression((PostfixExpression&) expr, result); + break; + case Expression::kPrefix_Kind: + this->writePrefixExpression((PrefixExpression&) expr, result); + break; + case Expression::kSwizzle_Kind: + this->writeSwizzle((Swizzle&) expr, result); + break; case Expression::kTernary_Kind: + this->writeTernaryExpression((TernaryExpression&) expr, result); + break; + case Expression::kFieldAccess_Kind: + case Expression::kIndex_Kind: + case Expression::kVariableReference_Kind: + this->writeVariableExpression(expr, result); + break; default: #ifdef SK_DEBUG - ABORT("unsupported lvalue %s\n", e.description().c_str()); + ABORT("unsupported lvalue %s\n", expr.description().c_str()); #endif - return nullptr; + break; } } +ByteCode::Register ByteCodeGenerator::writeExpression(const Expression& expr) { + ByteCode::Register result = this->next(SlotCount(expr.fType)); + this->writeExpression(expr, result); + return result; +} + void ByteCodeGenerator::writeBlock(const Block& b) { for (const auto& s : b.fStatements) { this->writeStatement(*s); } } -void ByteCodeGenerator::setBreakTargets() { - std::vector& breaks = fBreakTargets.top(); - for (DeferredLocation& b : breaks) { - b.set(); - } - fBreakTargets.pop(); -} - -void ByteCodeGenerator::setContinueTargets() { - std::vector& continues = fContinueTargets.top(); - for (DeferredLocation& c : continues) { - c.set(); - } - fContinueTargets.pop(); -} - -void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) { - // TODO: Include BranchIfAllFalse to top-most LoopNext - this->write(ByteCodeInstruction::kLoopBreak); -} - -void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) { - // TODO: Include BranchIfAllFalse to top-most LoopNext - this->write(ByteCodeInstruction::kLoopContinue); -} - void ByteCodeGenerator::writeDoStatement(const DoStatement& d) { - this->write(ByteCodeInstruction::kLoopBegin); - size_t start = fCode->size(); + this->write(ByteCode::Instruction::kLoopBegin); + ++fConditionCount; + SkASSERT(fCode->size() < ByteCode::kPointerMax); + ByteCode::Pointer start{(uint16_t) fCode->size()}; this->writeStatement(*d.fStatement); - this->write(ByteCodeInstruction::kLoopNext); - this->writeExpression(*d.fTest); - this->write(ByteCodeInstruction::kLoopMask); - // TODO: Could shorten this with kBranchIfAnyTrue - this->write(ByteCodeInstruction::kBranchIfAllFalse); + ByteCode::Register test = this->writeExpression(*d.fTest); + this->write(ByteCode::Instruction::kLoopNext); + this->write(ByteCode::Instruction::kLoopMask); + this->write(test); + this->write(ByteCode::Instruction::kBranchIfAllFalse); DeferredLocation endLocation(this); - this->write(ByteCodeInstruction::kBranch); - this->write16(start); + this->write(ByteCode::Instruction::kBranch); + this->write(start); endLocation.set(); - this->write(ByteCodeInstruction::kLoopEnd); + --fConditionCount; + this->write(ByteCode::Instruction::kLoopEnd); } void ByteCodeGenerator::writeForStatement(const ForStatement& f) { - fContinueTargets.emplace(); - fBreakTargets.emplace(); if (f.fInitializer) { this->writeStatement(*f.fInitializer); } - this->write(ByteCodeInstruction::kLoopBegin); - size_t start = fCode->size(); + this->write(ByteCode::Instruction::kLoopBegin); + ++fConditionCount; + ByteCode::Pointer start{(uint16_t) fCode->size()}; if (f.fTest) { - this->writeExpression(*f.fTest); - this->write(ByteCodeInstruction::kLoopMask); + ByteCode::Register test = this->writeExpression(*f.fTest); + this->write(ByteCode::Instruction::kLoopMask); + this->write(test); } - this->write(ByteCodeInstruction::kBranchIfAllFalse); + this->write(ByteCode::Instruction::kBranchIfAllFalse); DeferredLocation endLocation(this); this->writeStatement(*f.fStatement); - this->write(ByteCodeInstruction::kLoopNext); + this->write(ByteCode::Instruction::kLoopNext); if (f.fNext) { - this->writeExpression(*f.fNext, true); + this->writeExpression(*f.fNext); } - this->write(ByteCodeInstruction::kBranch); - this->write16(start); + this->write(ByteCode::Instruction::kBranch); + this->write(start); endLocation.set(); - this->write(ByteCodeInstruction::kLoopEnd); + --fConditionCount; + this->write(ByteCode::Instruction::kLoopEnd); } void ByteCodeGenerator::writeIfStatement(const IfStatement& i) { - this->writeExpression(*i.fTest); - this->write(ByteCodeInstruction::kMaskPush); - this->write(ByteCodeInstruction::kBranchIfAllFalse); + ByteCode::Register test = this->writeExpression(*i.fTest); + this->write(ByteCode::Instruction::kMaskPush); + ++fConditionCount; + this->write(test); + this->write(ByteCode::Instruction::kBranchIfAllFalse); DeferredLocation falseLocation(this); this->writeStatement(*i.fIfTrue); falseLocation.set(); if (i.fIfFalse) { - this->write(ByteCodeInstruction::kMaskNegate); - this->write(ByteCodeInstruction::kBranchIfAllFalse); + this->write(ByteCode::Instruction::kMaskNegate); + this->write(ByteCode::Instruction::kBranchIfAllFalse); DeferredLocation endLocation(this); this->writeStatement(*i.fIfFalse); endLocation.set(); } - this->write(ByteCodeInstruction::kMaskPop); + --fConditionCount; + this->write(ByteCode::Instruction::kMaskPop); } -void ByteCodeGenerator::writeReturnStatement(const ReturnStatement& r) { - if (fLoopCount || fConditionCount) { +void ByteCodeGenerator::writeReturn(const ReturnStatement& r) { + if (fConditionCount) { fErrors.error(r.fOffset, "return not allowed inside conditional or loop"); return; } - int count = SlotCount(r.fExpression->fType); - this->writeExpression(*r.fExpression); - - // Technically, the kReturn also pops fOutput->fLocalCount values from the stack, too, but we - // haven't counted pushing those (they're outside the scope of our stack tracking). Instead, - // we account for those in writeFunction(). - - // This is all fine because we don't allow conditional returns, so we only return once anyway. - this->write(ByteCodeInstruction::kReturn, -count); - this->write8(count); -} - -void ByteCodeGenerator::writeSwitchStatement(const SwitchStatement& r) { - // not yet implemented - abort(); + if (r.fExpression) { + ByteCode::Register value = this->writeExpression(*r.fExpression); + this->write(ByteCode::Instruction::kReturnValue); + this->write(value); + } + else { + this->write(ByteCode::Instruction::kReturn); + } } void ByteCodeGenerator::writeVarDeclarations(const VarDeclarations& v) { for (const auto& declStatement : v.fVars) { const VarDeclaration& decl = (VarDeclaration&) *declStatement; - // we need to grab the location even if we don't use it, to ensure it has been allocated - Location location = this->getLocation(*decl.fVar); + // we need to grab the location even if we don't use it, to ensure it + // has been allocated + ByteCodeGenerator::Location location = this->getLocation(*decl.fVar); if (decl.fValue) { - this->writeExpression(*decl.fValue); - int count = SlotCount(decl.fValue->fType); - if (count > 4) { - this->write(ByteCodeInstruction::kPushImmediate); - this->write32(location.fSlot); - this->write(ByteCodeInstruction::kStoreExtended, count); - this->write8(count); - } else { - this->write(vector_instruction(ByteCodeInstruction::kStore, count)); - this->write8(location.fSlot); + ByteCode::Register src = this->writeExpression(*decl.fValue); + for (int i = 0; i < SlotCount(decl.fVar->fType); ++i) { + ByteCodeGenerator::Location final = location.offset(*this, i); + this->write(ByteCode::Instruction::kStoreStackDirect); + this->write(final); + this->write(src + i); } } } } void ByteCodeGenerator::writeWhileStatement(const WhileStatement& w) { - this->write(ByteCodeInstruction::kLoopBegin); - size_t cond = fCode->size(); - this->writeExpression(*w.fTest); - this->write(ByteCodeInstruction::kLoopMask); - this->write(ByteCodeInstruction::kBranchIfAllFalse); + this->write(ByteCode::Instruction::kLoopBegin); + ++fConditionCount; + SkASSERT(fCode->size() < ByteCode::kPointerMax); + ByteCode::Pointer start{(uint16_t) fCode->size()}; + ByteCode::Register test = this->writeExpression(*w.fTest); + this->write(ByteCode::Instruction::kLoopMask); + this->write(test); + this->write(ByteCode::Instruction::kBranchIfAllFalse); DeferredLocation endLocation(this); this->writeStatement(*w.fStatement); - this->write(ByteCodeInstruction::kLoopNext); - this->write(ByteCodeInstruction::kBranch); - this->write16(cond); + this->write(ByteCode::Instruction::kLoopNext); + this->write(ByteCode::Instruction::kBranch); + this->write(start); endLocation.set(); - this->write(ByteCodeInstruction::kLoopEnd); + --fConditionCount; + this->write(ByteCode::Instruction::kLoopEnd); } void ByteCodeGenerator::writeStatement(const Statement& s) { @@ -1631,19 +1338,16 @@ void ByteCodeGenerator::writeStatement(const Statement& s) { this->writeBlock((Block&) s); break; case Statement::kBreak_Kind: - this->writeBreakStatement((BreakStatement&) s); + this->write(ByteCode::Instruction::kBreak); break; case Statement::kContinue_Kind: - this->writeContinueStatement((ContinueStatement&) s); + this->write(ByteCode::Instruction::kContinue); break; - case Statement::kDiscard_Kind: - // not yet implemented - abort(); case Statement::kDo_Kind: this->writeDoStatement((DoStatement&) s); break; case Statement::kExpression_Kind: - this->writeExpression(*((ExpressionStatement&) s).fExpression, true); + this->writeExpression(*((ExpressionStatement&) s).fExpression); break; case Statement::kFor_Kind: this->writeForStatement((ForStatement&) s); @@ -1654,10 +1358,7 @@ void ByteCodeGenerator::writeStatement(const Statement& s) { case Statement::kNop_Kind: break; case Statement::kReturn_Kind: - this->writeReturnStatement((ReturnStatement&) s); - break; - case Statement::kSwitch_Kind: - this->writeSwitchStatement((SwitchStatement&) s); + this->writeReturn((ReturnStatement&) s); break; case Statement::kVarDeclarations_Kind: this->writeVarDeclarations(*((VarDeclarationsStatement&) s).fDeclaration); @@ -1666,18 +1367,80 @@ void ByteCodeGenerator::writeStatement(const Statement& s) { this->writeWhileStatement((WhileStatement&) s); break; default: - SkASSERT(false); + ABORT("unsupported statement\n"); } } -ByteCodeFunction::ByteCodeFunction(const FunctionDeclaration* declaration) - : fName(declaration->fName) { +void ByteCodeGenerator::writeFunction(const FunctionDefinition& f) { + fFunction = &f; + std::unique_ptr result(new ByteCodeFunction(&f.fDeclaration)); + result->fReturnSlotCount = SlotCount(f.fDeclaration.fReturnType); fParameterCount = 0; - for (const auto& p : declaration->fParameters) { - int slots = ByteCodeGenerator::SlotCount(p->fType); - fParameters.push_back({ slots, (bool)(p->fModifiers.fFlags & Modifiers::kOut_Flag) }); - fParameterCount += slots; + fConditionCount = 0; + for (const auto& p : f.fDeclaration.fParameters) { + int count = SlotCount(p->fType); + bool isOut = ((p->fModifiers.fFlags & Modifiers::kOut_Flag) != 0); + result->fParameters.push_back(ByteCodeFunction::Parameter{count, isOut}); + fParameterCount += count; + } + result->fParameterSlotCount = fParameterCount; + fCode = &result->fCode; + this->writeStatement(*f.fBody); + result->fStackSlotCount = fLocals.size(); + if (f.fDeclaration.fReturnType.fName == "void") { + this->write(ByteCode::Instruction::kReturn); + } else { + this->write(ByteCode::Instruction::kAbort); + } + fOutput->fFunctions.push_back(std::move(result)); + SkASSERT(fConditionCount == 0); +} + +void ByteCodeGenerator::gatherUniforms(const Type& type, const String& name) { + if (type.kind() == Type::kOther_Kind) { + return; + } else if (type.kind() == Type::kStruct_Kind) { + for (const auto& f : type.fields()) { + this->gatherUniforms(*f.fType, name + "." + f.fName); + } + } else if (type.kind() == Type::kArray_Kind) { + for (int i = 0; i < type.columns(); ++i) { + this->gatherUniforms(type.componentType(), String::printf("%s[%d]", name.c_str(), i)); + } + } else { + fOutput->fUniforms.push_back({ name, type_category(type), type.rows(), type.columns(), + fOutput->fUniformSlotCount }); + fOutput->fUniformSlotCount += type.columns() * type.rows(); } } +bool ByteCodeGenerator::generateCode() { + fOutput->fGlobalSlotCount = 0; + fOutput->fUniformSlotCount = 0; + for (const auto& pe : fProgram) { + if (pe.fKind == ProgramElement::kVar_Kind) { + VarDeclarations& decl = (VarDeclarations&) pe; + for (const auto& v : decl.fVars) { + const Variable* declVar = ((VarDeclaration&) *v).fVar; + if (declVar->fModifiers.fLayout.fBuiltin >= 0 || is_in(*declVar)) { + continue; + } + if (is_uniform(*declVar)) { + this->gatherUniforms(declVar->fType, declVar->fName); + } else { + fOutput->fGlobalSlotCount += SlotCount(declVar->fType); + } + } + } + } + for (const auto& pe : fProgram) { + if (pe.fKind == ProgramElement::kFunction_Kind) { + FunctionDefinition& f = (FunctionDefinition&) pe; + fFunctions.push_back(&f); + this->writeFunction(f); + } + } + return fErrors.errorCount() == 0; } + +} // namespace diff --git a/src/sksl/SkSLByteCodeGenerator.h b/src/sksl/SkSLByteCodeGenerator.h index 4e3accd49d..ab232c4b1e 100644 --- a/src/sksl/SkSLByteCodeGenerator.h +++ b/src/sksl/SkSLByteCodeGenerator.h @@ -54,95 +54,19 @@ namespace SkSL { class ByteCodeGenerator : public CodeGenerator { public: - class LValue { - public: - LValue(ByteCodeGenerator& generator) - : fGenerator(generator) {} - - virtual ~LValue() {} - - /** - * Stack before call: ... lvalue - * Stack after call: ... lvalue load - */ - virtual void load() = 0; - - /** - * Stack before call: ... lvalue value - * Stack after call: ... - */ - virtual void store(bool discard) = 0; - - protected: - ByteCodeGenerator& fGenerator; - }; - - ByteCodeGenerator(const Context* context, const Program* program, ErrorReporter* errors, - ByteCode* output); + ByteCodeGenerator(const Program* program, ErrorReporter* errors, ByteCode* output); bool generateCode() override; - void write8(uint8_t b); - - void write16(uint16_t b); - - void write32(uint32_t b); - - void write(ByteCodeInstruction inst, int count = kUnusedStackCount); - - /** - * Based on 'type', writes the s (signed), u (unsigned), or f (float) instruction. - */ - void writeTypedInstruction(const Type& type, ByteCodeInstruction s, ByteCodeInstruction u, - ByteCodeInstruction f, int count, bool writeCount = true); - - static int SlotCount(const Type& type); - private: - static constexpr int kUnusedStackCount = INT32_MAX; - static int StackUsage(ByteCodeInstruction, int count); - - // reserves 16 bits in the output code, to be filled in later with an address once we determine - // it - class DeferredLocation { - public: - DeferredLocation(ByteCodeGenerator* generator) - : fGenerator(*generator) - , fOffset(generator->fCode->size()) { - generator->write16(0); - } - -#ifdef SK_DEBUG - ~DeferredLocation() { - SkASSERT(fSet); - } -#endif - - void set() { - int target = fGenerator.fCode->size(); - SkASSERT(target <= 65535); - (*fGenerator.fCode)[fOffset] = target; - (*fGenerator.fCode)[fOffset + 1] = target >> 8; -#ifdef SK_DEBUG - fSet = true; -#endif - } - - private: - ByteCodeGenerator& fGenerator; - size_t fOffset; -#ifdef SK_DEBUG - bool fSet = false; -#endif - }; - // Intrinsics which do not simply map to a single opcode enum class SpecialIntrinsic { kDot, + kInverse, }; struct Intrinsic { - Intrinsic(ByteCodeInstruction instruction) + Intrinsic(ByteCode::Instruction instruction) : fIsSpecial(false) , fValue(instruction) {} @@ -153,201 +77,250 @@ private: bool fIsSpecial; union Value { - Value(ByteCodeInstruction instruction) + Value(ByteCode::Instruction instruction) : fInstruction(instruction) {} Value(SpecialIntrinsic special) : fSpecial(special) {} - ByteCodeInstruction fInstruction; + ByteCode::Instruction fInstruction; SpecialIntrinsic fSpecial; } fValue; }; + class LValue { + public: + LValue(ByteCodeGenerator& generator) + : fGenerator(generator) {} - // Similar to Variable::Storage, but locals and parameters are grouped together, and globals - // are further subidivided into uniforms and other (writable) globals. - enum class Storage { - kLocal, // include parameters - kGlobal, // non-uniform globals - kUniform, // uniform globals + virtual ~LValue() {} + + virtual void load(ByteCode::Register result) = 0; + + virtual void store(ByteCode::Register src) = 0; + + protected: + ByteCodeGenerator& fGenerator; }; struct Location { - int fSlot; - Storage fStorage; + enum { + kPointer_Kind, + kRegister_Kind + } fKind; - // Not really invalid, but a "safe" placeholder to be more explicit at call-sites - static Location MakeInvalid() { return { 0, Storage::kLocal }; } + union { + ByteCode::Pointer fPointer; + ByteCode::Register fRegister; + }; - Location makeOnStack() { return { -1, fStorage }; } - bool isOnStack() const { return fSlot < 0; } + Location(ByteCode::Pointer p) + : fKind(kPointer_Kind) + , fPointer(p) {} - Location operator+(int offset) { - SkASSERT(fSlot >= 0); - return { fSlot + offset, fStorage }; + Location(ByteCode::Register r) + : fKind(kRegister_Kind) + , fRegister(r) {} + + /** + * Returns this location offset by 'offset' bytes. For pointers, this is a compile-time + * operation, while for registers there will be CPU instructions output to handle the + * runtime calculation of the address. + */ + Location offset(ByteCodeGenerator& generator, int offset) { + if (!offset) { + return *this; + } + if (fKind == kPointer_Kind) { + return Location(fPointer + offset); + } + ByteCode::Register a = generator.next(1); + generator.write(ByteCode::Instruction::kImmediate); + generator.write(a); + generator.write(ByteCode::Immediate{offset}); + ByteCode::Register result = generator.next(1); + generator.write(ByteCode::Instruction::kAddI); + generator.write(result); + generator.write(fRegister); + generator.write(a); + return result; } - ByteCodeInstruction selectLoad(ByteCodeInstruction local, - ByteCodeInstruction global, - ByteCodeInstruction uniform) const { - switch (fStorage) { - case Storage::kLocal: return local; - case Storage::kGlobal: return global; - case Storage::kUniform: return uniform; + /** + * Returns this location offset by the number of bytes stored in the 'offset' register. This + * will output the necessary CPU instructions to perform the math and return a new register + * location. + */ + Location offset(ByteCodeGenerator& generator, ByteCode::Register offset) { + ByteCode::Register current; + switch (fKind) { + case kPointer_Kind: + current = generator.next(1); + generator.write(ByteCode::Instruction::kImmediate); + generator.write(current); + generator.write(ByteCode::Immediate{fPointer.fAddress}); + break; + case kRegister_Kind: + current = fRegister; } - SkUNREACHABLE; - } - - ByteCodeInstruction selectStore(ByteCodeInstruction local, - ByteCodeInstruction global) const { - switch (fStorage) { - case Storage::kLocal: return local; - case Storage::kGlobal: return global; - case Storage::kUniform: ABORT("Trying to store to a uniform"); break; - } - return local; + ByteCode::Register result = generator.next(1); + generator.write(ByteCode::Instruction::kAddI); + generator.write(result); + generator.write(current); + generator.write(offset); + return result; } }; + // reserves 16 bits in the output code, to be filled in later with an address once we determine + // it + class DeferredLocation { + public: + explicit DeferredLocation(ByteCodeGenerator* generator) + : fGenerator(*generator) + , fOffset(generator->fCode->size()) { + generator->write(ByteCode::Pointer{65535}); + } + + void set() { + SkASSERT(fGenerator.fCode->size() <= ByteCode::kPointerMax); + static_assert(sizeof(ByteCode::Pointer) == 2, + "failed assumption that ByteCode::Pointer is uint16_t"); + void* dst = &(*fGenerator.fCode)[fOffset]; + // ensure that the placeholder value 65535 hasn't been modified yet + SkASSERT(((uint8_t*) dst)[0] == 255 && ((uint8_t*) dst)[1] == 255); + ByteCode::Pointer target{(uint16_t) fGenerator.fCode->size()}; + memcpy(dst, &target, sizeof(target)); + } + + private: + ByteCodeGenerator& fGenerator; + size_t fOffset; + }; + + template + void write(T value) { + size_t n = fCode->size(); + fCode->resize(n + sizeof(value)); + memcpy(fCode->data() + n, &value, sizeof(value)); + } + + ByteCode::Register next(int slotCount); + /** - * Returns the local slot into which var should be stored, allocating a new slot if it has not - * already been assigned one. Compound variables (e.g. vectors) will consume more than one local - * slot, with the getLocation return value indicating where the first element should be stored. + * Based on 'type', writes the s (signed), u (unsigned), or f (float) instruction. */ + void writeTypedInstruction(const Type& type, ByteCode::Instruction s, ByteCode::Instruction u, + ByteCode::Instruction f); + + ByteCode::Instruction getLoadInstruction(Location location, Variable::Storage storage); + + ByteCode::Instruction getStoreInstruction(Location location, Variable::Storage storage); + + static int SlotCount(const Type& type); + Location getLocation(const Variable& var); - /** - * As above, but computes the (possibly dynamic) address of an expression involving indexing & - * field access. If the address is known, it's returned. If not, -1 is returned, and the - * location will be left on the top of the stack. - */ Location getLocation(const Expression& expr); - void gatherUniforms(const Type& type, const String& name); + Variable::Storage getStorage(const Expression& expr); - std::unique_ptr writeFunction(const FunctionDefinition& f); - - void writeVarDeclarations(const VarDeclarations& decl); - - void writeVariableExpression(const Expression& expr); - - void writeExpression(const Expression& expr, bool discard = false); - - /** - * Pushes whatever values are required by the lvalue onto the stack, and returns an LValue - * permitting loads and stores to it. - */ std::unique_ptr getLValue(const Expression& expr); - void writeIntrinsicCall(const FunctionCall& c); + void writeFunction(const FunctionDefinition& f); - void writeFunctionCall(const FunctionCall& c); + // For compound values, the result argument specifies the first component. Subsequent components + // will be in subsequent registers. - void writeConstructor(const Constructor& c); + void writeBinaryInstruction(const Type& operandType, ByteCode::Register left, + ByteCode::Register right, ByteCode::Instruction s, + ByteCode::Instruction u, ByteCode::Instruction f, + ByteCode::Register result); - void writeExternalFunctionCall(const ExternalFunctionCall& c); + void writeBinaryExpression(const BinaryExpression& expr, ByteCode::Register result); - void writeExternalValue(const ExternalValueReference& r); + void writeConstructor(const Constructor& c, ByteCode::Register result); - void writeSwizzle(const Swizzle& swizzle); + void writeExternalFunctionCall(const ExternalFunctionCall& f, ByteCode::Register result); - bool writeBinaryExpression(const BinaryExpression& b, bool discard); + void writeExternalValue(const ExternalValueReference& e, ByteCode::Register result); - void writeTernaryExpression(const TernaryExpression& t); + void writeIntrinsicCall(const FunctionCall& c, Intrinsic intrinsic, ByteCode::Register result); - void writeNullLiteral(const NullLiteral& n); + void writeFunctionCall(const FunctionCall& c, ByteCode::Register result); - bool writePrefixExpression(const PrefixExpression& p, bool discard); + void incOrDec(Token::Kind op, Expression& operand, bool prefix, ByteCode::Register result); - bool writePostfixExpression(const PostfixExpression& p, bool discard); + void writePostfixExpression(const PostfixExpression& p, ByteCode::Register result); - void writeBoolLiteral(const BoolLiteral& b); + void writePrefixExpression(const PrefixExpression& p, ByteCode::Register result); - void writeIntLiteral(const IntLiteral& i); + void writeSwizzle(const Swizzle& s, ByteCode::Register result); - void writeFloatLiteral(const FloatLiteral& f); + void writeTernaryExpression(const TernaryExpression& t, ByteCode::Register result); - void writeStatement(const Statement& s); + void writeVariableExpression(const Expression& e, ByteCode::Register result); + + void writeExpression(const Expression& expr, ByteCode::Register result); + + ByteCode::Register writeExpression(const Expression& expr); void writeBlock(const Block& b); - void writeBreakStatement(const BreakStatement& b); - - void writeContinueStatement(const ContinueStatement& c); - - void writeIfStatement(const IfStatement& stmt); + void writeDoStatement(const DoStatement& d); void writeForStatement(const ForStatement& f); + void writeIfStatement(const IfStatement& i); + + void writeReturn(const ReturnStatement& r); + + void writeVarDeclarations(const VarDeclarations& v); + void writeWhileStatement(const WhileStatement& w); - void writeDoStatement(const DoStatement& d); + void writeStatement(const Statement& s); - void writeSwitchStatement(const SwitchStatement& s); - - void writeReturnStatement(const ReturnStatement& r); - - // updates the current set of breaks to branch to the current location - void setBreakTargets(); - - // updates the current set of continues to branch to the current location - void setContinueTargets(); - - void enterLoop() { - fLoopCount++; - fMaxLoopCount = std::max(fMaxLoopCount, fLoopCount); - } - - void exitLoop() { - SkASSERT(fLoopCount > 0); - fLoopCount--; - } - - void enterCondition() { - fConditionCount++; - fMaxConditionCount = std::max(fMaxConditionCount, fConditionCount); - } - - void exitCondition() { - SkASSERT(fConditionCount > 0); - fConditionCount--; - } - - const Context& fContext; + void gatherUniforms(const Type& type, const String& name); ByteCode* fOutput; + int fNextRegister = 0; + const FunctionDefinition* fFunction; + std::vector fFunctions; + std::vector* fCode; std::vector fLocals; - std::stack> fContinueTargets; - - std::stack> fBreakTargets; - - std::vector fFunctions; - int fParameterCount; - int fStackCount; - int fMaxStackCount; - int fLoopCount; - int fMaxLoopCount; int fConditionCount; - int fMaxConditionCount; const std::unordered_map fIntrinsics; friend class DeferredLocation; - friend class ByteCodeExpressionLValue; + friend class ByteCodeExternalValueLValue; + friend class ByteCodeSimpleLValue; friend class ByteCodeSwizzleLValue; typedef CodeGenerator INHERITED; }; +template<> +inline void ByteCodeGenerator::write(ByteCodeGenerator::Location loc) { + switch (loc.fKind) { + case ByteCodeGenerator::Location::kPointer_Kind: + this->write(loc.fPointer); + break; + case ByteCodeGenerator::Location::kRegister_Kind: + this->write(loc.fRegister); + break; + } +} + } #endif diff --git a/src/sksl/SkSLCompiler.cpp b/src/sksl/SkSLCompiler.cpp index 7bfdce13ff..6e84b9864d 100644 --- a/src/sksl/SkSLCompiler.cpp +++ b/src/sksl/SkSLCompiler.cpp @@ -77,14 +77,17 @@ static const char* SKSL_PIPELINE_INCLUDE = namespace SkSL { static void grab_intrinsics(std::vector>* src, - std::map, bool>>* target) { - for (auto& element : *src) { + std::map, bool>>* target) { + for (auto iter = src->begin(); iter != src->end(); ) { + std::unique_ptr& element = *iter; switch (element->fKind) { case ProgramElement::kFunction_Kind: { FunctionDefinition& f = (FunctionDefinition&) *element; - StringFragment name = f.fDeclaration.fName; - SkASSERT(target->find(name) == target->end()); - (*target)[name] = std::make_pair(std::move(element), false); + SkASSERT(f.fDeclaration.fBuiltin); + String key = f.fDeclaration.declaration(); + SkASSERT(target->find(key) == target->end()); + (*target)[key] = std::make_pair(std::move(element), false); + iter = src->erase(iter); break; } case ProgramElement::kEnum_Kind: { @@ -92,6 +95,7 @@ static void grab_intrinsics(std::vector>* src, StringFragment name = e.fTypeName; SkASSERT(target->find(name) == target->end()); (*target)[name] = std::make_pair(std::move(element), false); + iter = src->erase(iter); break; } default: @@ -278,11 +282,13 @@ Compiler::Compiler(Flags flags) this->processIncludeFile(Program::kPipelineStage_Kind, SKSL_PIPELINE_INCLUDE, strlen(SKSL_PIPELINE_INCLUDE), fGpuSymbolTable, &fPipelineInclude, &fPipelineSymbolTable); - std::vector> interpIntrinsics; this->processIncludeFile(Program::kGeneric_Kind, SKSL_INTERP_INCLUDE, strlen(SKSL_INTERP_INCLUDE), symbols, &fInterpreterInclude, &fInterpreterSymbolTable); - grab_intrinsics(&interpIntrinsics, &fInterpreterIntrinsics); + grab_intrinsics(&fInterpreterInclude, &fInterpreterIntrinsics); + // need to hang on to the source so that FunctionDefinition.fSource pointers in this file + // remain valid + fInterpreterIncludeSource = std::move(fIRGenerator->fFile); } Compiler::~Compiler() { @@ -1624,7 +1630,7 @@ std::unique_ptr Compiler::toByteCode(Program& program) { } fSource = program.fSource.get(); std::unique_ptr result(new ByteCode()); - ByteCodeGenerator cg(fContext.get(), &program, this, result.get()); + ByteCodeGenerator cg(&program, this, result.get()); bool success = cg.generateCode(); fSource = nullptr; if (success) { diff --git a/src/sksl/SkSLCompiler.h b/src/sksl/SkSLCompiler.h index fb4b4fba82..25762e80ca 100644 --- a/src/sksl/SkSLCompiler.h +++ b/src/sksl/SkSLCompiler.h @@ -215,8 +215,8 @@ private: Position position(int offset); - std::map, bool>> fGPUIntrinsics; - std::map, bool>> fInterpreterIntrinsics; + std::map, bool>> fGPUIntrinsics; + std::map, bool>> fInterpreterIntrinsics; std::unique_ptr fGpuIncludeSource; std::shared_ptr fGpuSymbolTable; std::vector> fVertexInclude; @@ -227,6 +227,7 @@ private: std::shared_ptr fGeometrySymbolTable; std::vector> fPipelineInclude; std::shared_ptr fPipelineSymbolTable; + std::unique_ptr fInterpreterIncludeSource; std::vector> fInterpreterInclude; std::shared_ptr fInterpreterSymbolTable; diff --git a/src/sksl/SkSLIRGenerator.cpp b/src/sksl/SkSLIRGenerator.cpp index 44b420070d..8d9d42da1e 100644 --- a/src/sksl/SkSLIRGenerator.cpp +++ b/src/sksl/SkSLIRGenerator.cpp @@ -1776,7 +1776,7 @@ std::unique_ptr IRGenerator::call(int offset, const FunctionDeclaration& function, std::vector> arguments) { if (function.fBuiltin) { - auto found = fIntrinsics->find(function.fName); + auto found = fIntrinsics->find(function.declaration()); if (found != fIntrinsics->end() && !found->second.second) { found->second.second = true; const FunctionDeclaration* old = fCurrentFunction; @@ -2186,7 +2186,7 @@ std::unique_ptr IRGenerator::convertField(std::unique_ptrfOffset, "type '" + base->fType.displayName() + "' does not have a " - "field named '" + field + ""); + "field named '" + field + "'"); return nullptr; } diff --git a/src/sksl/SkSLIRGenerator.h b/src/sksl/SkSLIRGenerator.h index a0884445f4..14ea097422 100644 --- a/src/sksl/SkSLIRGenerator.h +++ b/src/sksl/SkSLIRGenerator.h @@ -159,7 +159,7 @@ private: std::shared_ptr fSymbolTable; // Symbols which have definitions in the include files. The bool tells us whether this // intrinsic has been included already. - std::map, bool>>* fIntrinsics = nullptr; + std::map, bool>>* fIntrinsics = nullptr; // holds extra temp variable declarations needed for the current function std::vector> fExtraVars; int fLoopLevel; diff --git a/src/sksl/SkSLInterpreter.h b/src/sksl/SkSLInterpreter.h new file mode 100644 index 0000000000..398a9b794a --- /dev/null +++ b/src/sksl/SkSLInterpreter.h @@ -0,0 +1,1353 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "include/private/GrTypesPriv.h" // GrAlignTo +#include "src/core/SkUtils.h" // sk_unaligned_load +#include "src/sksl/SkSLByteCode.h" +#include "src/sksl/SkSLExternalValue.h" + +#include + +#ifndef SKSL_INTERPRETER +#define SKSL_INTERPRETER + +namespace SkSL { + +// GCC and Clang support the "labels as values" extension which we need to implement the interpreter +// using threaded code. Otherwise, we fall back to using a switch statement in a for loop. +#if defined(__GNUC__) || defined(__clang__) + #define SKSL_THREADED_CODE +#endif + +#ifdef SKSL_THREADED_CODE + using instruction = void*; + #define LABEL(name) name: + #ifdef TRACE + #define NEXT() \ + { \ + const uint8_t* trace_ip = ip; \ + printf("%d: ", (int) (trace_ip - code)); \ + disassemble(&trace_ip); \ + } \ + goto *labels[(int) read(&ip)] + #else + #define NEXT() goto *labels[(int) read(&ip)] + #endif +#else + using instruction = uint16_t; + #define LABEL(name) case ByteCode::Instruction::name: + #define NEXT() continue +#endif + +// If you trip this assert, it means that the order of the opcodes listed in ByteCodeInstruction +// does not match the order of the opcodes listed in the 'labels' array in innerRun(). +#define CHECK_LABEL(name) \ + SkASSERT(labels[(int) ByteCode::Instruction::name] == &&name) + +template +static T read(const uint8_t** ip) { + *ip += sizeof(T); + return sk_unaligned_load(*ip - sizeof(T)); +} + +#define BINARY_OP(inst, src, result, op) \ + LABEL(inst) { \ + ByteCode::Register target = read(&ip); \ + ByteCode::Register src1 = read(&ip); \ + ByteCode::Register src2 = read(&ip); \ + fRegisters[target.fIndex].result = fRegisters[src1.fIndex].src op \ + fRegisters[src2.fIndex].src; \ + NEXT(); \ + } + +#define MASKED_BINARY_OP(inst, src, result, op) \ + LABEL(inst) { \ + ByteCode::Register target = read(&ip); \ + ByteCode::Register src1 = read(&ip); \ + ByteCode::Register src2 = read(&ip); \ + auto m = mask(); \ + for (int i = 0; i < width; ++i) { \ + if (m[i]) { \ + fRegisters[target.fIndex].result[i] = fRegisters[src1.fIndex].src[i] op \ + fRegisters[src2.fIndex].src[i]; \ + } \ + } \ + NEXT(); \ + } + +#define VECTOR_UNARY_FN(inst, fn) \ + LABEL(inst) { \ + ByteCode::Register target = read(&ip); \ + ByteCode::Register src = read(&ip); \ + for (int i = 0; i < width; ++ i) { \ + fRegisters[target.fIndex].fFloat[i] = fn(fRegisters[src.fIndex].fFloat[i]); \ + } \ + NEXT(); \ + } + +#define DISASSEMBLE_0(inst, name) \ + case ByteCode::Instruction::inst: printf(name "\n"); break; + +#define DISASSEMBLE_1(inst, name) \ + case ByteCode::Instruction::inst: \ + printf(name " $%d\n", read(ip).fIndex); \ + break; + +#define DISASSEMBLE_UNARY(inst, name) \ + case ByteCode::Instruction::inst: { \ + ByteCode::Register target = read(ip); \ + ByteCode::Register src = read(ip); \ + printf(name " $%d -> $%d\n", src.fIndex, target.fIndex); \ + break; \ + } + +#define DISASSEMBLE_BINARY(inst, name) \ + case ByteCode::Instruction::inst: { \ + ByteCode::Register target = read(ip); \ + ByteCode::Register src1 = read(ip); \ + ByteCode::Register src2 = read(ip); \ + printf(name " $%d, $%d -> $%d\n", src1.fIndex, src2.fIndex, target.fIndex); \ + break; \ + } + +/** + * Operates on vectors of the specified width, so creating an Interpreter<16> means that all inputs, + * outputs, and internal calculations will be 16-wide vectors. + */ +template +class Interpreter { +public: + using Vector = ByteCode::Vector; + using VectorI = skvx::Vec; + using VectorF = skvx::Vec; + + Interpreter(std::unique_ptr code) + : fCode(std::move(code)) { + // C++ doesn't guarantee proper alignment of naively-allocated vectors, so we can't have the + // registers and memory directly as fields of this object without jumping through some hoops + // during Interpreter allocation and deallocation. We simplify this by having the backing + // store be a separate allocation, jumping through the hoops ourselves rather than require + // Interpreter's clients to be aware of alignment. + // Ideally, we could use std::aligned_alloc here, but as of this writing it is not available + // on some compilers despite claiming to support C++17. + fBackingStore = calloc(sizeof(Vector), MEMORY_SIZE + REGISTER_COUNT + 1); + fMemory = (Vector*) GrAlignTo((size_t) fBackingStore, alignof(Vector)); + fRegisters = fMemory + MEMORY_SIZE; + } + + ~Interpreter() { + free(fBackingStore); + } + + void setUniforms(const float uniforms[]) { + for (int i = 0; i < fCode->getUniformSlotCount(); ++i) { + fMemory[fCode->getGlobalSlotCount() + i].fFloat = VectorF(uniforms[i]); + } + } + + /** + * Returns true on success and stores a pointer to the first slot of the result into outResult. + * This pointer is only guaranteed to be valid until the next run() call. + */ + bool run(const ByteCodeFunction* f, Vector args[], Vector** outResult) { + SkASSERT(f); + VectorI condStack[MASK_STACK_SIZE]; + memset(condStack, 255, sizeof(VectorI)); + VectorI maskStack[MASK_STACK_SIZE]; + memset(maskStack, 255, sizeof(VectorI)); + VectorI loopStack[LOOP_STACK_SIZE]; + memset(loopStack, 255, sizeof(VectorI)); + VectorI continueStack[LOOP_STACK_SIZE]; + memset(continueStack, 0, sizeof(VectorI)); + Vector* stack = fMemory + MEMORY_SIZE; + int stackCount = f->fStackSlotCount + f->fParameterSlotCount; + stack -= stackCount; + memcpy(stack, args, f->fParameterSlotCount * sizeof(Vector)); + Context context(fMemory, stack, condStack, maskStack, loopStack, continueStack); + if (this->innerRun(f, context, 0, outResult)) { + int slot = 0; + for (const auto& p : f->fParameters) { + if (p.fIsOutParameter) { + memcpy(&args[slot], &stack[slot], p.fSlotCount * sizeof(Vector)); + } + slot += p.fSlotCount; + } + return true; + } + return false; + } + + /** + * Invokes the specified function with the given arguments, 'count' times. 'args' and + * 'outResult' are accepted and returned in structure-of-arrays form: + * args[0] points to an array of N values, the first argument for each invocation + * ... + * args[argCount - 1] points to an array of N values, the last argument for each invocation + * + * All values in 'args', 'outReturn', and 'uniforms' are 32-bit values (typically floats, + * but possibly int32_t or uint32_t, depending on the types used in the SkSL). + * Any 'out' or 'inout' parameters will result in the 'args' array being modified. + */ + bool runStriped(const ByteCodeFunction* f, int count, float* args[]) { + SkASSERT(f); + Vector* stack = fMemory + MEMORY_SIZE; + int stackCount = f->fStackSlotCount + f->fParameterSlotCount; + stack -= stackCount; + VectorI condStack[MASK_STACK_SIZE]; + VectorI maskStack[MASK_STACK_SIZE]; + VectorI loopStack[LOOP_STACK_SIZE]; + VectorI continueStack[LOOP_STACK_SIZE]; + Context context(fMemory, stack, condStack, maskStack, loopStack, continueStack); + for (int i = 0; i < count; i += width) { + int lanes = std::min(width, count - i); + size_t size = lanes * sizeof(float); + memset(maskStack, 255, sizeof(VectorI)); + memset(loopStack, 255, sizeof(VectorI)); + for (int j = lanes; j < width; ++j) { + maskStack[0][j] = 0; + loopStack[0][j] = 0; + } + memset(continueStack, 0, sizeof(VectorI)); + for (int j = 0; j < f->fParameterSlotCount; ++j) { + memcpy(stack + j, &args[j][i], size); + } + if (!this->innerRun(f, context, i, nullptr)) { + return false; + } + int slot = 0; + for (const auto& p : f->fParameters) { + if (p.fIsOutParameter) { + for (int j = 0; j < p.fSlotCount; ++j) { + memcpy(&args[slot + j][i], stack + slot + j, size); + } + } + slot += p.fSlotCount; + } + } + return true; + } + + const ByteCode& getCode() { + return *fCode; + } + +private: + static constexpr size_t REGISTER_COUNT = 1024; + + static constexpr size_t MEMORY_SIZE = 1024; + + static constexpr size_t MASK_STACK_SIZE = 64; + + static constexpr size_t LOOP_STACK_SIZE = 16; + + struct StackFrame { + StackFrame(const ByteCodeFunction* function, const uint8_t* ip, const int stackSlotCount, + Vector* parameters, Vector* returnValue) + : fFunction(function) + , fIP(ip) + , fStackSlotCount(stackSlotCount) + , fParameters(parameters) + , fReturnValue(returnValue) {} + + const ByteCodeFunction* fFunction; + const uint8_t* fIP; + const int fStackSlotCount; + Vector* fParameters; + Vector* fReturnValue; + }; + + struct Context { + Context(Vector* memory, Vector* stack, VectorI* condStack, VectorI* maskStack, + VectorI* loopStack,VectorI* continueStack) + : fMemory(memory) + , fStack(stack) + , fCondStack(condStack) + , fMaskStack(maskStack) + , fLoopStack(loopStack) + , fContinueStack(continueStack) {} + + Vector* fMemory; + Vector* fStack; + VectorI* fCondStack; + VectorI* fMaskStack; + VectorI* fLoopStack; + VectorI* fContinueStack; + std::stack fCallStack; + }; + + // $x = register + // @x = memory cell + // &x = parameter + void disassemble(const uint8_t** ip) { + ByteCode::Instruction inst = read(ip); + switch (inst) { + DISASSEMBLE_BINARY(kAddF, "addF") + DISASSEMBLE_BINARY(kAddI, "addI") + DISASSEMBLE_BINARY(kAnd, "and") + DISASSEMBLE_BINARY(kCompareEQF, "compare eqF") + DISASSEMBLE_BINARY(kCompareEQI, "compare eqI") + DISASSEMBLE_BINARY(kCompareNEQF, "compare neqF") + DISASSEMBLE_BINARY(kCompareNEQI, "compare neqI") + DISASSEMBLE_BINARY(kCompareGTF, "compare gtF") + DISASSEMBLE_BINARY(kCompareGTS, "compare gtS") + DISASSEMBLE_BINARY(kCompareGTU, "compare gtU") + DISASSEMBLE_BINARY(kCompareGTEQF, "compare gteqF") + DISASSEMBLE_BINARY(kCompareGTEQS, "compare gteqS") + DISASSEMBLE_BINARY(kCompareGTEQU, "compare gteqU") + DISASSEMBLE_BINARY(kCompareLTF, "compare ltF") + DISASSEMBLE_BINARY(kCompareLTS, "compare ltS") + DISASSEMBLE_BINARY(kCompareLTU, "compare ltU") + DISASSEMBLE_BINARY(kCompareLTEQF, "compare lteqF") + DISASSEMBLE_BINARY(kCompareLTEQS, "compare lteqS") + DISASSEMBLE_BINARY(kCompareLTEQU, "compare lteqU") + DISASSEMBLE_BINARY(kSubtractF, "subF") + DISASSEMBLE_BINARY(kSubtractI, "subI") + DISASSEMBLE_BINARY(kDivideF, "divF") + DISASSEMBLE_BINARY(kDivideS, "divS") + DISASSEMBLE_BINARY(kDivideU, "divU") + DISASSEMBLE_BINARY(kRemainderS, "remS") + DISASSEMBLE_BINARY(kRemainderU, "remU") + DISASSEMBLE_BINARY(kMultiplyF, "mulF") + DISASSEMBLE_BINARY(kMultiplyI, "mulI") + DISASSEMBLE_BINARY(kOr, "or") + DISASSEMBLE_BINARY(kXor, "xor") + DISASSEMBLE_0(kNop, "nop") + DISASSEMBLE_BINARY(kRemainderF, "remF") + case ByteCode::Instruction::kBoundsCheck: { + ByteCode::Register r = read(ip); + int length = read(ip); + printf("boundsCheck 0 <= $%d < %d\n", r.fIndex, length); + break; + } + case ByteCode::Instruction::kBranch: + printf("branch %d\n", read(ip).fAddress); + break; + case ByteCode::Instruction::kBranchIfAllFalse: + printf("branchIfAllFalse %d\n", read(ip).fAddress); + break; + DISASSEMBLE_0(kBreak, "break") + case ByteCode::Instruction::kCall: { + ByteCode::Register target = read(ip); + uint8_t idx = read(ip); + ByteCode::Register args = read(ip); + ByteCodeFunction* f = fCode->fFunctions[idx].get(); + printf("call %s($%d...) -> $%d", f->fName.c_str(), args.fIndex, target.fIndex); + printf("\n"); + break; + } + case ByteCode::Instruction::kCallExternal: { + ByteCode::Register target = read(ip); + uint8_t idx = read(ip); + uint8_t targetCount = read(ip); + ByteCode::Register args = read(ip); + uint8_t argCount = read(ip); + ExternalValue* ev = fCode->fExternalValues[idx]; + printf("callExternal %s($%d(%d)...) -> $%d(%d)", String(ev->fName).c_str(), + args.fIndex, argCount, target.fIndex, targetCount); + printf("\n"); + break; + } + DISASSEMBLE_0(kContinue, "continue") + DISASSEMBLE_UNARY(kCopy, "copy") + DISASSEMBLE_UNARY(kCos, "cos") + DISASSEMBLE_UNARY(kFloatToSigned, "FtoS") + DISASSEMBLE_UNARY(kFloatToUnsigned, "FtoU") + case ByteCode::Instruction::kImmediate: { + ByteCode::Register target = read(ip); + ByteCode::Immediate src = read(ip); + printf("immediate (%d | %f) -> $%d\n", src.fInt, src.fFloat, target.fIndex); + break; + } + DISASSEMBLE_UNARY(kInverse2x2, "inverse2x2") + DISASSEMBLE_UNARY(kInverse3x3, "inverse3x3") + DISASSEMBLE_UNARY(kInverse4x4, "inverse4x4") + DISASSEMBLE_UNARY(kLoad, "load") + case ByteCode::Instruction::kLoadDirect: { + ByteCode::Register target = read(ip); + ByteCode::Pointer src = read(ip); + printf("loadDirect @%d -> $%d\n", src.fAddress, target.fIndex); + break; + } + DISASSEMBLE_UNARY(kLoadParameter, "loadParameter") + case ByteCode::Instruction::kLoadParameterDirect: { + ByteCode::Register target = read(ip); + ByteCode::Pointer src = read(ip); + printf("loadParameterDirect &%d -> $%d\n", src.fAddress, target.fIndex); + break; + } + DISASSEMBLE_UNARY(kLoadStack, "loadStack") + case ByteCode::Instruction::kLoadStackDirect: { + ByteCode::Register target = read(ip); + ByteCode::Pointer src = read(ip); + printf("loadStackDirect @%d -> $%d\n", src.fAddress, target.fIndex); + break; + } + DISASSEMBLE_0(kLoopBegin, "loopBegin") + DISASSEMBLE_0(kLoopEnd, "loopEnd") + DISASSEMBLE_1(kLoopMask, "loopMask") + DISASSEMBLE_0(kLoopNext, "loopNext") + DISASSEMBLE_0(kMaskNegate, "maskNegate") + DISASSEMBLE_0(kMaskPop, "maskPop") + DISASSEMBLE_1(kMaskPush, "maskPush") + case ByteCode::Instruction::kMatrixMultiply: { + ByteCode::Register target = read(ip); + ByteCode::Register left = read(ip); + ByteCode::Register right = read(ip); + uint8_t leftColsAndRightRows = read(ip); + uint8_t leftRows = read(ip); + uint8_t rightColumns = read(ip); + printf("matrixMultiply $%d, $%d, %d, %d, %d -> $%d\n", left.fIndex, right.fIndex, + leftColsAndRightRows, leftRows, rightColumns, target.fIndex); + break; + } + case ByteCode::Instruction::kMatrixToMatrix: { + ByteCode::Register target = read(ip); + ByteCode::Register src = read(ip); + uint8_t srcColumns = read(ip); + uint8_t srcRows = read(ip); + uint8_t dstColumns = read(ip); + uint8_t dstRows = read(ip); + printf("matrixToMatrix $%d, %dx%d to %dx%d -> $%d\n", src.fIndex, srcColumns, + srcRows, dstColumns, dstRows, target.fIndex); + break; + } + DISASSEMBLE_UNARY(kNegateF, "negateF") + DISASSEMBLE_UNARY(kNegateS, "negateS") + DISASSEMBLE_UNARY(kNot, "not") + case ByteCode::Instruction::kReadExternal: { + ByteCode::Register target = read(ip); + uint8_t count = read(ip); + uint8_t index = read(ip); + printf("readExternal %d, %d -> $%d\n", count, index, target.fIndex); + break; + } + DISASSEMBLE_1(kPrint, "print") + DISASSEMBLE_0(kReturn, "return") + DISASSEMBLE_1(kReturnValue, "returnValue") + case ByteCode::Instruction::kScalarToMatrix: { + ByteCode::Register target = read(ip); + ByteCode::Register src = read(ip); + uint8_t columns = read(ip); + uint8_t rows = read(ip); + printf("scalarToMatrix $%d, %dx%d -> $%d\n", src.fIndex, columns, rows, + target.fIndex); + break; + } + case ByteCode::Instruction::kSelect: { + ByteCode::Register target = read(ip); + ByteCode::Register test = read(ip); + ByteCode::Register src1 = read(ip); + ByteCode::Register src2 = read(ip); + printf("select $%d, $%d, $%d -> %d\n", test.fIndex, src1.fIndex, src2.fIndex, + target.fIndex); + break; + } + DISASSEMBLE_BINARY(kShiftLeft, "shiftLeft") + DISASSEMBLE_BINARY(kShiftRightS, "shiftRightS") + DISASSEMBLE_BINARY(kShiftRightU, "shiftRightU") + DISASSEMBLE_UNARY(kSignedToFloat, "signedToFloat") + DISASSEMBLE_UNARY(kSin, "sin") + DISASSEMBLE_UNARY(kSqrt, "sqrt") + DISASSEMBLE_UNARY(kStore, "store") + case ByteCode::Instruction::kStoreDirect: { + ByteCode::Pointer target = read(ip); + ByteCode::Register src = read(ip); + printf("store $%d -> @%d\n", src.fIndex, target.fAddress); + break; + } + DISASSEMBLE_UNARY(kStoreParameter, "storeParameter") + case ByteCode::Instruction::kStoreParameterDirect: { + ByteCode::Pointer target = read(ip); + ByteCode::Register src = read(ip); + printf("storeParameter $%d -> &%d\n", src.fIndex, target.fAddress); + break; + } + DISASSEMBLE_UNARY(kStoreStack, "storeStack") + case ByteCode::Instruction::kStoreStackDirect: { + ByteCode::Pointer target = read(ip); + ByteCode::Register src = read(ip); + printf("storeStackDirect $%d -> @%d\n", src.fIndex, target.fAddress); + break; + } + DISASSEMBLE_UNARY(kTan, "tan") + DISASSEMBLE_UNARY(kUnsignedToFloat, "unsignedToFloat") + case ByteCode::Instruction::kWriteExternal: { + uint8_t index = read(ip); + uint8_t count = read(ip); + ByteCode::Register src = read(ip); + printf("writeExternal $%d, %d -> %d\n", src.fIndex, count, index); + break; + } + default: + printf("unsupported: %d\n", (int) inst); + SkASSERT(false); + } + } + + static Vector VecMod(Vector x, Vector y) { + return Vector(x.fFloat - skvx::trunc(x.fFloat / y.fFloat) * y.fFloat); + } + + #define CHECK_STACK_BOUNDS(address) \ + SkASSERT(context.fStack + address >= fMemory && \ + context.fStack + address <= fMemory + MEMORY_SIZE) + + static void Inverse2x2(Vector* in, Vector* out) { + VectorF a = in[0].fFloat, + b = in[1].fFloat, + c = in[2].fFloat, + d = in[3].fFloat; + VectorF idet = VectorF(1) / (a*d - b*c); + out[0].fFloat = d * idet; + out[1].fFloat = -b * idet; + out[2].fFloat = -c * idet; + out[3].fFloat = a * idet; + } + + static void Inverse3x3(Vector* in, Vector* out) { + VectorF a11 = in[0].fFloat, a12 = in[3].fFloat, a13 = in[6].fFloat, + a21 = in[1].fFloat, a22 = in[4].fFloat, a23 = in[7].fFloat, + a31 = in[2].fFloat, a32 = in[5].fFloat, a33 = in[8].fFloat; + VectorF idet = VectorF(1) / (a11 * a22 * a33 + a12 * a23 * a31 + a13 * a21 * a32 - + a11 * a23 * a32 - a12 * a21 * a33 - a13 * a22 * a31); + out[0].fFloat = (a22 * a33 - a23 * a32) * idet; + out[1].fFloat = (a23 * a31 - a21 * a33) * idet; + out[2].fFloat = (a21 * a32 - a22 * a31) * idet; + out[3].fFloat = (a13 * a32 - a12 * a33) * idet; + out[4].fFloat = (a11 * a33 - a13 * a31) * idet; + out[5].fFloat = (a12 * a31 - a11 * a32) * idet; + out[6].fFloat = (a12 * a23 - a13 * a22) * idet; + out[7].fFloat = (a13 * a21 - a11 * a23) * idet; + out[8].fFloat = (a11 * a22 - a12 * a21) * idet; + } + + + static void Inverse4x4(Vector* in, Vector* out) { + #define inf(index) in[index].fFloat + #define outf(index) out[index].fFloat + VectorF a00 = inf(0), a10 = inf(4), a20 = inf( 8), a30 = inf(12), + a01 = inf(1), a11 = inf(5), a21 = inf( 9), a31 = inf(13), + a02 = inf(2), a12 = inf(6), a22 = inf(10), a32 = inf(14), + a03 = inf(3), a13 = inf(7), a23 = inf(11), a33 = inf(15); + + VectorF b00 = a00 * a11 - a01 * a10, + b01 = a00 * a12 - a02 * a10, + b02 = a00 * a13 - a03 * a10, + b03 = a01 * a12 - a02 * a11, + b04 = a01 * a13 - a03 * a11, + b05 = a02 * a13 - a03 * a12, + b06 = a20 * a31 - a21 * a30, + b07 = a20 * a32 - a22 * a30, + b08 = a20 * a33 - a23 * a30, + b09 = a21 * a32 - a22 * a31, + b10 = a21 * a33 - a23 * a31, + b11 = a22 * a33 - a23 * a32; + + VectorF idet = VectorF(1) / + (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); + + b00 *= idet; + b01 *= idet; + b02 *= idet; + b03 *= idet; + b04 *= idet; + b05 *= idet; + b06 *= idet; + b07 *= idet; + b08 *= idet; + b09 *= idet; + b10 *= idet; + b11 *= idet; + + outf( 0) = a11 * b11 - a12 * b10 + a13 * b09; + outf( 1) = a02 * b10 - a01 * b11 - a03 * b09; + outf( 2) = a31 * b05 - a32 * b04 + a33 * b03; + outf( 3) = a22 * b04 - a21 * b05 - a23 * b03; + outf( 4) = a12 * b08 - a10 * b11 - a13 * b07; + outf( 5) = a00 * b11 - a02 * b08 + a03 * b07; + outf( 6) = a32 * b02 - a30 * b05 - a33 * b01; + outf( 7) = a20 * b05 - a22 * b02 + a23 * b01; + outf( 8) = a10 * b10 - a11 * b08 + a13 * b06; + outf( 9) = a01 * b08 - a00 * b10 - a03 * b06; + outf(10) = a30 * b04 - a31 * b02 + a33 * b00; + outf(11) = a21 * b02 - a20 * b04 - a23 * b00; + outf(12) = a11 * b07 - a10 * b09 - a12 * b06; + outf(13) = a00 * b09 - a01 * b07 + a02 * b06; + outf(14) = a31 * b01 - a30 * b03 - a32 * b00; + outf(15) = a20 * b03 - a21 * b01 + a22 * b00; + #undef inf + #undef outf + } + + bool innerRun(const ByteCodeFunction* f, Context context, int baseIndex, Vector** outResult) { +#ifdef SKSL_THREADED_CODE + static const void* labels[] = { + // If you aren't familiar with it, the &&label syntax is the GCC / Clang "labels as + // values" extension. If you add anything to this array, be sure to add the + // corresponding CHECK_LABEL() assert below. + &&kNop, + &&kAbort, + &&kAddF, + &&kAddI, + &&kAnd, + &&kBoundsCheck, + &&kBranch, + &&kBranchIfAllFalse, + &&kBreak, + &&kCall, + &&kCallExternal, + &&kCompareEQF, + &&kCompareEQI, + &&kCompareNEQF, + &&kCompareNEQI, + &&kCompareGTF, + &&kCompareGTS, + &&kCompareGTU, + &&kCompareGTEQF, + &&kCompareGTEQS, + &&kCompareGTEQU, + &&kCompareLTF, + &&kCompareLTS, + &&kCompareLTU, + &&kCompareLTEQF, + &&kCompareLTEQS, + &&kCompareLTEQU, + &&kContinue, + &&kCopy, + &&kCos, + &&kDivideF, + &&kDivideS, + &&kDivideU, + &&kFloatToSigned, + &&kFloatToUnsigned, + &&kImmediate, + &&kInverse2x2, + &&kInverse3x3, + &&kInverse4x4, + &&kLoad, + &&kLoadDirect, + &&kLoadParameter, + &&kLoadParameterDirect, + &&kLoadStack, + &&kLoadStackDirect, + &&kLoopBegin, + &&kLoopEnd, + &&kLoopMask, + &&kLoopNext, + &&kMaskNegate, + &&kMaskPop, + &&kMaskPush, + &&kMatrixMultiply, + &&kMatrixToMatrix, + &&kMultiplyF, + &&kMultiplyI, + &&kNegateF, + &&kNegateS, + &&kNot, + &&kOr, + &&kPrint, + &&kReadExternal, + &&kRemainderF, + &&kRemainderS, + &&kRemainderU, + &&kReturn, + &&kReturnValue, + &&kScalarToMatrix, + &&kSelect, + &&kShiftLeft, + &&kShiftRightS, + &&kShiftRightU, + &&kSignedToFloat, + &&kSin, + &&kSqrt, + &&kStore, + &&kStoreDirect, + &&kStoreParameter, + &&kStoreParameterDirect, + &&kStoreStack, + &&kStoreStackDirect, + &&kSubtractF, + &&kSubtractI, + &&kTan, + &&kUnsignedToFloat, + &&kWriteExternal, + &&kXor + }; + CHECK_LABEL(kNop); + CHECK_LABEL(kAbort); + CHECK_LABEL(kAddF); + CHECK_LABEL(kAddI); + CHECK_LABEL(kAnd); + CHECK_LABEL(kBoundsCheck); + CHECK_LABEL(kBranch); + CHECK_LABEL(kBranchIfAllFalse); + CHECK_LABEL(kBreak); + CHECK_LABEL(kCall); + CHECK_LABEL(kCallExternal); + CHECK_LABEL(kCompareEQF); + CHECK_LABEL(kCompareEQI); + CHECK_LABEL(kCompareNEQF); + CHECK_LABEL(kCompareNEQI); + CHECK_LABEL(kCompareGTF); + CHECK_LABEL(kCompareGTS); + CHECK_LABEL(kCompareGTU); + CHECK_LABEL(kCompareGTEQF); + CHECK_LABEL(kCompareGTEQS); + CHECK_LABEL(kCompareGTEQU); + CHECK_LABEL(kCompareLTF); + CHECK_LABEL(kCompareLTS); + CHECK_LABEL(kCompareLTU); + CHECK_LABEL(kCompareLTEQF); + CHECK_LABEL(kCompareLTEQS); + CHECK_LABEL(kCompareLTEQU); + CHECK_LABEL(kContinue); + CHECK_LABEL(kCopy); + CHECK_LABEL(kCos); + CHECK_LABEL(kDivideF); + CHECK_LABEL(kDivideS); + CHECK_LABEL(kDivideU); + CHECK_LABEL(kFloatToSigned); + CHECK_LABEL(kFloatToUnsigned); + CHECK_LABEL(kImmediate); + CHECK_LABEL(kInverse2x2); + CHECK_LABEL(kInverse3x3); + CHECK_LABEL(kInverse4x4); + CHECK_LABEL(kLoad); + CHECK_LABEL(kLoadDirect); + CHECK_LABEL(kLoadParameter); + CHECK_LABEL(kLoadParameterDirect); + CHECK_LABEL(kLoadStack); + CHECK_LABEL(kLoadStackDirect); + CHECK_LABEL(kLoopBegin); + CHECK_LABEL(kLoopEnd); + CHECK_LABEL(kLoopMask); + CHECK_LABEL(kLoopNext); + CHECK_LABEL(kMaskNegate); + CHECK_LABEL(kMaskPop); + CHECK_LABEL(kMaskPush); + CHECK_LABEL(kMatrixMultiply); + CHECK_LABEL(kMatrixToMatrix); + CHECK_LABEL(kMultiplyF); + CHECK_LABEL(kMultiplyI); + CHECK_LABEL(kNegateF); + CHECK_LABEL(kNegateS); + CHECK_LABEL(kNot); + CHECK_LABEL(kOr); + CHECK_LABEL(kPrint); + CHECK_LABEL(kReadExternal); + CHECK_LABEL(kRemainderF); + CHECK_LABEL(kRemainderS); + CHECK_LABEL(kRemainderU); + CHECK_LABEL(kReturn); + CHECK_LABEL(kReturnValue); + CHECK_LABEL(kScalarToMatrix); + CHECK_LABEL(kSelect); + CHECK_LABEL(kShiftLeft); + CHECK_LABEL(kShiftRightS); + CHECK_LABEL(kShiftRightU); + CHECK_LABEL(kSignedToFloat); + CHECK_LABEL(kSin); + CHECK_LABEL(kSqrt); + CHECK_LABEL(kStore); + CHECK_LABEL(kStoreDirect); + CHECK_LABEL(kStoreParameter); + CHECK_LABEL(kStoreParameterDirect); + CHECK_LABEL(kStoreStack); + CHECK_LABEL(kStoreStackDirect); + CHECK_LABEL(kSubtractF); + CHECK_LABEL(kSubtractI); + CHECK_LABEL(kTan); + CHECK_LABEL(kUnsignedToFloat); + CHECK_LABEL(kWriteExternal); + CHECK_LABEL(kXor); +#endif + auto mask = [&]() { return *context.fMaskStack & *context.fLoopStack; }; + auto parameterBase = [&]() { + return context.fCallStack.empty() ? context.fStack + : context.fCallStack.top().fParameters; + }; + const uint8_t* code = f->fCode.data(); + const uint8_t* ip = code; +#ifdef SKSL_THREADED_CODE + #ifdef TRACE + const uint8_t* trace_ip = ip; + printf("0: "); + disassemble(&trace_ip); + #endif + goto *labels[(int) read(&ip)]; +#else + for (;;) { + #ifdef TRACE + const uint8_t* trace_ip = ip; + disassemble(&trace_ip); + #endif + ByteCode::Instruction inst = read(&ip); + switch (inst) { +#endif + BINARY_OP(kAddF, fFloat, fFloat, +) + BINARY_OP(kAddI, fInt, fInt, +) + BINARY_OP(kAnd, fInt, fInt, &) + BINARY_OP(kCompareEQF, fFloat, fInt, ==) + BINARY_OP(kCompareEQI, fInt, fInt, ==) + BINARY_OP(kCompareNEQF, fFloat, fInt, !=) + BINARY_OP(kCompareNEQI, fInt, fInt, !=) + BINARY_OP(kCompareGTF, fFloat, fInt, >) + BINARY_OP(kCompareGTS, fInt, fInt, >) + BINARY_OP(kCompareGTU, fUInt, fUInt, >) + BINARY_OP(kCompareGTEQF, fFloat, fInt, >=) + BINARY_OP(kCompareGTEQS, fInt, fInt, >=) + BINARY_OP(kCompareGTEQU, fUInt, fUInt, >=) + BINARY_OP(kCompareLTF, fFloat, fInt, <) + BINARY_OP(kCompareLTS, fInt, fInt, <) + BINARY_OP(kCompareLTU, fUInt, fUInt, <) + BINARY_OP(kCompareLTEQF, fFloat, fInt, <=) + BINARY_OP(kCompareLTEQS, fInt, fInt, <=) + BINARY_OP(kCompareLTEQU, fUInt, fUInt, <=) + BINARY_OP(kSubtractF, fFloat, fFloat, -) + BINARY_OP(kSubtractI, fInt, fInt, -) + BINARY_OP(kDivideF, fFloat, fFloat, /) + MASKED_BINARY_OP(kDivideS, fInt, fInt, /) + MASKED_BINARY_OP(kDivideU, fUInt, fUInt, /) + MASKED_BINARY_OP(kRemainderS, fInt, fInt, %) + MASKED_BINARY_OP(kRemainderU, fUInt, fUInt, %) + BINARY_OP(kMultiplyF, fFloat, fFloat, *) + BINARY_OP(kMultiplyI, fInt, fInt, *) + BINARY_OP(kOr, fInt, fInt, |) + BINARY_OP(kXor, fInt, fInt, ^) + LABEL(kAbort) + SkASSERT(false); + return false; + LABEL(kBoundsCheck) { + ByteCode::Register r = read(&ip); + int length = read(&ip); + if (skvx::any(mask() & ((fRegisters[r.fIndex].fInt < 0) | + (fRegisters[r.fIndex].fInt >= length)))) { + return false; + } + NEXT(); + } + LABEL(kBranch) { + ByteCode::Pointer target = read(&ip); + ip = code + target.fAddress; + NEXT(); + } + LABEL(kBranchIfAllFalse) { + ByteCode::Pointer target = read(&ip); + if (!skvx::any(mask())) { + ip = code + target.fAddress; + } + NEXT(); + } + LABEL(kBreak) + *context.fLoopStack &= ~mask(); + NEXT(); + LABEL(kCall) { + ByteCode::Register returnValue = read(&ip); + uint8_t idx = read(&ip); + ByteCode::Register args = read(&ip); + const ByteCodeFunction* target = fCode->fFunctions[idx].get(); + int stackSlotCount = target->fStackSlotCount + target->fParameterSlotCount; + context.fCallStack.push(StackFrame(f, ip, stackSlotCount, + &fRegisters[args.fIndex], + &fRegisters[returnValue.fIndex])); + f = target; + code = f->fCode.data(); + ip = code; + context.fStack -= stackSlotCount; + memcpy(context.fStack, &fRegisters[args.fIndex], + f->fParameterSlotCount * sizeof(Vector)); + NEXT(); + } + LABEL(kCallExternal) { + ByteCode::Register target = read(&ip); + uint8_t index = read(&ip); + uint8_t targetSize = read(&ip); + ByteCode::Register arguments = read(&ip); + uint8_t argumentSize = read(&ip); + ExternalValue* v = fCode->fExternalValues[index]; + float tmpReturn[64]; + SkASSERT(targetSize < 64); + float tmpArgs[64]; + SkASSERT(argumentSize < 64); + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + for (int j = 0; j < argumentSize; j++) { + tmpArgs[j] = fRegisters[arguments.fIndex + j].fFloat[i]; + } + v->call(baseIndex + i, tmpArgs, tmpReturn); + for (int j = 0; j < targetSize; j++) { + fRegisters[target.fIndex + j].fFloat[i] = tmpReturn[j]; + } + } + } + NEXT(); + } + LABEL(kContinue) { + VectorI m = mask(); + *context.fContinueStack |= m; + *context.fLoopStack &= ~m; + NEXT(); + } + LABEL(kCopy) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex].fInt = fRegisters[src.fIndex].fInt; + NEXT(); + } + VECTOR_UNARY_FN(kCos, cosf) + LABEL(kFloatToSigned) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex] = Vector(skvx::cast( + fRegisters[src.fIndex].fFloat)); + NEXT(); + } + LABEL(kFloatToUnsigned) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex] = Vector(skvx::cast( + fRegisters[src.fIndex].fFloat)); + NEXT(); + } + LABEL(kImmediate) { + ByteCode::Register target = read(&ip); + ByteCode::Immediate src = read(&ip); + fRegisters[target.fIndex].fInt = src.fInt; + NEXT(); + } + LABEL(kInverse2x2) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + Inverse2x2(&fRegisters[src.fIndex], &fRegisters[target.fIndex]); + NEXT(); + } + LABEL(kInverse3x3) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + Inverse3x3(&fRegisters[src.fIndex], &fRegisters[target.fIndex]); + NEXT(); + } + LABEL(kInverse4x4) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + Inverse4x4(&fRegisters[src.fIndex], &fRegisters[target.fIndex]); + NEXT(); + } + LABEL(kLoad) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + fRegisters[target.fIndex].fInt[i] = + fMemory[fRegisters[src.fIndex].fInt[i]].fInt[i]; + } + } + NEXT(); + } + LABEL(kLoadDirect) { + ByteCode::Register target = read(&ip); + ByteCode::Pointer src = read(&ip); + fRegisters[target.fIndex].fInt = fMemory[src.fAddress].fInt; + NEXT(); + } + LABEL(kLoadParameter) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + Vector* base = parameterBase(); + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + fRegisters[target.fIndex].fInt[i] = + base[fRegisters[src.fIndex].fInt[i]].fInt[i]; + } + } + NEXT(); + } + LABEL(kLoadParameterDirect) { + ByteCode::Register target = read(&ip); + ByteCode::Pointer src = read(&ip); + Vector* base = parameterBase(); + fRegisters[target.fIndex].fInt = base[src.fAddress].fInt; + NEXT(); + } + LABEL(kLoadStack) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + fRegisters[target.fIndex].fInt[i] = + context.fStack[fRegisters[src.fIndex].fInt[i]].fInt[i]; + } + } + NEXT(); + } + LABEL(kLoadStackDirect) { + ByteCode::Register target = read(&ip); + ByteCode::Pointer src = read(&ip); + CHECK_STACK_BOUNDS(src.fAddress); + fRegisters[target.fIndex].fInt = context.fStack[src.fAddress].fInt; + NEXT(); + } + LABEL(kLoopBegin) { + context.fLoopStack[1] = context.fLoopStack[0]; + ++context.fLoopStack; + context.fContinueStack[1] = 0; + ++context.fContinueStack; + NEXT(); + } + LABEL(kLoopEnd) { + --context.fLoopStack; + --context.fContinueStack; + NEXT(); + } + LABEL(kLoopMask) { + ByteCode::Register value = read(&ip); + *context.fLoopStack &= fRegisters[value.fIndex].fInt; + NEXT(); + } + LABEL(kLoopNext) { + *context.fLoopStack |= *context.fContinueStack; + *context.fContinueStack = 0; + NEXT(); + } + LABEL(kMaskNegate) { + *context.fMaskStack = context.fMaskStack[-1] & ~context.fCondStack[0]; + NEXT(); + } + LABEL(kMaskPop) { + --context.fMaskStack; + --context.fCondStack; + NEXT(); + } + LABEL(kMaskPush) { + ByteCode::Register value = read(&ip); + context.fCondStack[1] = fRegisters[value.fIndex].fInt; + context.fMaskStack[1] = context.fMaskStack[0] & context.fCondStack[1]; + ++context.fCondStack; + ++context.fMaskStack; + NEXT(); + } + LABEL(kMatrixMultiply) { + ByteCode::Register target = read(&ip); + ByteCode::Register left = read(&ip); + ByteCode::Register right = read(&ip); + uint8_t lCols = read(&ip); + uint8_t lRows = read(&ip); + uint8_t rCols = read(&ip); + uint8_t rRows = lCols; + memset(&fRegisters[target.fIndex], 0, sizeof(Vector) * rCols * lRows); + for (int c = 0; c < rCols; ++c) { + for (int r = 0; r < lRows; ++r) { + for (int j = 0; j < lCols; ++j) { + fRegisters[target.fIndex + c * lRows + r].fFloat += + fRegisters[left.fIndex + j * lRows + r].fFloat * + fRegisters[right.fIndex + c * rRows + j].fFloat; + } + } + } + NEXT(); + } + LABEL(kMatrixToMatrix) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + uint8_t srcColumns = read(&ip); + uint8_t srcRows = read(&ip); + uint8_t dstColumns = read(&ip); + uint8_t dstRows = read(&ip); + int offset = 0; + for (int i = 0; i < dstColumns; ++i) { + for (int j = 0; j < dstRows; ++j) { + if (i < srcColumns && j < srcRows) { + fRegisters[target.fIndex + offset] = + fRegisters[src.fIndex + (srcRows * i) + j]; + } else { + if (i == j) { + fRegisters[target.fIndex + offset].fFloat = 1; + } else { + fRegisters[target.fIndex + offset].fFloat = 0; + } + } + ++offset; + } + } + NEXT(); + } + LABEL(kNegateF) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex].fFloat = -fRegisters[src.fIndex].fFloat; + NEXT(); + } + LABEL(kNegateS) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex].fInt = -fRegisters[src.fIndex].fInt; + NEXT(); + } + LABEL(kNop) + NEXT(); + LABEL(kNot) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex].fInt = ~fRegisters[src.fIndex].fInt; + NEXT(); + } + LABEL(kPrint) { + ByteCode::Register src = read(&ip); + if (skvx::any(mask())) { + printf("["); + const char* separator = ""; + for (int i = 0; i < width; ++i) { + if (mask()[i]) { + printf("%s%f", separator, fRegisters[src.fIndex].fFloat[i]); + } + else { + printf("%s-", separator); + } + separator = ", "; + } + printf("]\n"); + } + NEXT(); + } + LABEL(kReadExternal) { + ByteCode::Register target = read(&ip); + uint8_t count = read(&ip); + uint8_t index = read(&ip); + SkASSERT(count <= 4); + SkASSERT(fCode->fExternalValues.size() > index); + float tmp[4]; + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + fCode->fExternalValues[index]->read(baseIndex + i, tmp); + for (int j = 0; j < count; ++j) { + fRegisters[target.fIndex + j].fFloat[i] = tmp[j]; + } + } + } + NEXT(); + } + LABEL(kRemainderF) { + ByteCode::Register target = read(&ip); + ByteCode::Register src1 = read(&ip); + ByteCode::Register src2 = read(&ip); + fRegisters[target.fIndex] = VecMod(fRegisters[src1.fIndex], + fRegisters[src2.fIndex]); + NEXT(); + } + LABEL(kReturn) { + if (context.fCallStack.empty()) { + return true; + } + StackFrame frame = context.fCallStack.top(); + f = frame.fFunction; + code = f->fCode.data(); + ip = frame.fIP; + context.fStack += frame.fStackSlotCount; + context.fCallStack.pop(); + NEXT(); + } + LABEL(kReturnValue) { + ByteCode::Register returnValue = read(&ip); + if (context.fCallStack.empty()) { + if (outResult) { + *outResult = &fRegisters[returnValue.fIndex]; + } + return true; + } + StackFrame frame = context.fCallStack.top(); + ip = frame.fIP; + context.fStack += frame.fStackSlotCount; + memcpy(frame.fReturnValue, &fRegisters[returnValue.fIndex], + sizeof(Vector) * f->fReturnSlotCount); + f = frame.fFunction; + code = f->fCode.data(); + context.fCallStack.pop(); + NEXT(); + } + LABEL(kScalarToMatrix) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + uint8_t columns = read(&ip); + uint8_t rows = read(&ip); + int offset = 0; + for (int i = 0; i < columns; ++i) { + for (int j = 0; j < rows; ++j) { + if (i == j) { + fRegisters[target.fIndex + offset] = fRegisters[src.fIndex]; + } else { + fRegisters[target.fIndex + offset].fFloat = 0; + } + ++offset; + } + } + NEXT(); + } + LABEL(kSelect) { + ByteCode::Register target = read(&ip); + ByteCode::Register test = read(&ip); + ByteCode::Register src1 = read(&ip); + ByteCode::Register src2 = read(&ip); + fRegisters[target.fIndex] = skvx::if_then_else(fRegisters[test.fIndex].fInt, + fRegisters[src1.fIndex].fFloat, + fRegisters[src2.fIndex].fFloat); + NEXT(); + } + LABEL(kShiftLeft) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + uint8_t count = read(&ip); + fRegisters[target.fIndex].fInt = fRegisters[src.fIndex].fInt << count; + NEXT(); + } + LABEL(kShiftRightS) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + int8_t count = read(&ip); + fRegisters[target.fIndex].fInt = fRegisters[src.fIndex].fInt >> count; + NEXT(); + } + LABEL(kShiftRightU) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + uint8_t count = read(&ip); + fRegisters[target.fIndex].fUInt = fRegisters[src.fIndex].fUInt >> count; + NEXT(); + } + LABEL(kSignedToFloat) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex] = Vector(skvx::cast( + fRegisters[src.fIndex].fInt)); + NEXT(); + } + VECTOR_UNARY_FN(kSin, sinf) + LABEL(kSqrt) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex].fFloat = skvx::sqrt(fRegisters[src.fIndex].fFloat); + NEXT(); + } + LABEL(kStore) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + fMemory[fRegisters[target.fIndex].fInt[i]].fInt[i] = + fRegisters[src.fIndex].fInt[i]; + } + } + NEXT(); + } + LABEL(kStoreDirect) { + ByteCode::Pointer target = read(&ip); + ByteCode::Register src = read(&ip); + fMemory[target.fAddress] = skvx::if_then_else(mask(), + fRegisters[src.fIndex].fFloat, + fMemory[target.fAddress].fFloat); + NEXT(); + } + LABEL(kStoreParameter) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + Vector* base = parameterBase(); + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + base[fRegisters[target.fIndex].fInt[i]].fInt[i] = + fRegisters[src.fIndex].fInt[i]; + } + } + NEXT(); + } + LABEL(kStoreParameterDirect) { + ByteCode::Pointer target = read(&ip); + ByteCode::Register src = read(&ip); + Vector* base = parameterBase(); + base[target.fAddress] = skvx::if_then_else(mask(), + fRegisters[src.fIndex].fFloat, + base[target.fAddress].fFloat); + NEXT(); + } + LABEL(kStoreStack) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + context.fStack[fRegisters[target.fIndex].fInt[i]].fInt[i] = + fRegisters[src.fIndex].fInt[i]; + } + } + NEXT(); + } + LABEL(kStoreStackDirect) { + ByteCode::Pointer target = read(&ip); + CHECK_STACK_BOUNDS(target.fAddress); + ByteCode::Register src = read(&ip); + context.fStack[target.fAddress] = skvx::if_then_else( + mask(), + fRegisters[src.fIndex].fFloat, + context.fStack[target.fAddress].fFloat); + NEXT(); + } + VECTOR_UNARY_FN(kTan, tanf) + LABEL(kUnsignedToFloat) { + ByteCode::Register target = read(&ip); + ByteCode::Register src = read(&ip); + fRegisters[target.fIndex] = Vector(skvx::cast( + fRegisters[src.fIndex].fUInt)); + NEXT(); + } + LABEL(kWriteExternal) { + uint8_t index = read(&ip); + uint8_t count = read(&ip); + SkASSERT(count <= 4); + SkASSERT(fCode->fExternalValues.size() > index); + ByteCode::Register src = read(&ip); + float tmp[4]; + VectorI m = mask(); + for (int i = 0; i < width; ++i) { + if (m[i]) { + for (int j = 0; j < count; ++j) { + tmp[j] = fRegisters[src.fIndex + j].fFloat[i]; + } + fCode->fExternalValues[index]->write(baseIndex + i, tmp); + } + } + NEXT(); + } +#ifndef SKSL_THREADED_CODE + } + } +#endif + } + + const std::unique_ptr fCode; + + void* fBackingStore; + + Vector* fRegisters; + + Vector* fMemory; + + friend class ByteCode; + + friend class ByteCodeGenerator; +}; + +#undef BINARY_OP +#undef CHECK_STACK_BOUNDS + +} // namespace + +#endif diff --git a/src/sksl/ir/SkSLFunctionDeclaration.h b/src/sksl/ir/SkSLFunctionDeclaration.h index 11b04a5d89..f7ce904b36 100644 --- a/src/sksl/ir/SkSLFunctionDeclaration.h +++ b/src/sksl/ir/SkSLFunctionDeclaration.h @@ -36,7 +36,7 @@ struct FunctionDeclaration : public Symbol { for (auto p : fParameters) { result += separator; separator = ", "; - result += p->fName; + result += p->fType.displayName(); } result += ")"; return result; diff --git a/src/sksl/ir/SkSLSymbolTable.cpp b/src/sksl/ir/SkSLSymbolTable.cpp index ed2cb4d565..bbf001d110 100644 --- a/src/sksl/ir/SkSLSymbolTable.cpp +++ b/src/sksl/ir/SkSLSymbolTable.cpp @@ -114,9 +114,7 @@ void SymbolTable::markAllFunctionsBuiltin() { break; case Symbol::kUnresolvedFunction_Kind: for (auto& f : ((UnresolvedFunction&) *pair.second).fFunctions) { - if (!((FunctionDeclaration*)f)->fDefined) { - ((FunctionDeclaration*)f)->fBuiltin = true; - } + ((FunctionDeclaration*)f)->fBuiltin = true; } break; default: diff --git a/src/sksl/sksl_interp.inc b/src/sksl/sksl_interp.inc index f43f05f8bf..e576f9fc3e 100644 --- a/src/sksl/sksl_interp.inc +++ b/src/sksl/sksl_interp.inc @@ -1,5 +1,7 @@ STRINGIFY( +sk_has_side_effects void print(float f); + $genType cos($genType y); $genHType cos($genHType y); float dot($genType x, $genType y); diff --git a/tests/SkSLInterpreterTest.cpp b/tests/SkSLInterpreterTest.cpp index 8251269138..9b2ab2c07e 100644 --- a/tests/SkSLInterpreterTest.cpp +++ b/tests/SkSLInterpreterTest.cpp @@ -9,19 +9,11 @@ #include "src/sksl/SkSLByteCode.h" #include "src/sksl/SkSLCompiler.h" #include "src/sksl/SkSLExternalValue.h" +#include "src/sksl/SkSLInterpreter.h" #include "src/utils/SkJSON.h" #include "tests/Test.h" -static bool nearly_equal(const float a[], const float b[], int count) { - for (int i = 0; i < count; ++i) { - if (!SkScalarNearlyEqual(a[i], b[i])) { - return false; - } - } - return true; -} - void test(skiatest::Reporter* r, const char* src, float* in, float* expected, bool exactCompare = true) { SkSL::Compiler compiler; @@ -39,30 +31,11 @@ void test(skiatest::Reporter* r, const char* src, float* in, float* expected, return; } const SkSL::ByteCodeFunction* main = byteCode->getFunction("main"); - int returnCount = main->getReturnCount(); - std::unique_ptr out = std::unique_ptr(new float[returnCount]); - SkAssertResult(byteCode->run(main, in, main->getParameterCount(), out.get(), returnCount, - nullptr, 0)); - bool valid = exactCompare ? !memcmp(out.get(), expected, sizeof(float) * returnCount) - : nearly_equal(out.get(), expected, returnCount); - if (!valid) { - printf("for program: %s\n", src); - printf(" expected ("); - const char* separator = ""; - for (int i = 0; i < returnCount; ++i) { - printf("%s%f", separator, expected[i]); - separator = ", "; - } - printf("), but received ("); - separator = ""; - for (int i = 0; i < returnCount; ++i) { - printf("%s%f", separator, out.get()[i]); - separator = ", "; - } - printf(")\n"); - main->disassemble(); - } - REPORTER_ASSERT(r, valid); + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + SkSL::ByteCode::Vector<1>* result; + bool success = interpreter.run(main, (SkSL::ByteCode::Vector<1>*) in, &result); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, result->fFloat[0] == expected[0]); } else { printf("%s\n%s", src, compiler.errorText().c_str()); } @@ -83,7 +56,8 @@ void vec_test(skiatest::Reporter* r, const char* src) { return; } - const SkSL::ByteCodeFunction* main = byteCode->getFunction("main"); + const SkSL::ByteCodeFunction* main1 = byteCode->getFunction("main"); + SkSL::Interpreter<1> interpreter1(std::move(byteCode)); // Test on four different vectors (with varying orderings to get divergent control flow) const float input[16] = { 1, 2, 3, 4, @@ -97,9 +71,16 @@ void vec_test(skiatest::Reporter* r, const char* src) { // First run in scalar mode to determine the expected output for (int i = 0; i < 4; ++i) { - SkAssertResult(byteCode->run(main, out_s + i * 4, 4, nullptr, 0, nullptr, 0)); + SkAssertResult(interpreter1.run(main1, (SkSL::ByteCode::Vector<1>*) (out_s + i * 4), + nullptr)); } + byteCode = compiler.toByteCode(*program); + SkASSERT(compiler.errorCount() == 0); + + const SkSL::ByteCodeFunction* main4 = byteCode->getFunction("main"); + SkSL::Interpreter<4> interpreter4(std::move(byteCode)); + // Need to transpose input vectors for striped execution auto transpose = [](float* v) { for (int r = 0; r < 4; ++r) @@ -112,7 +93,7 @@ void vec_test(skiatest::Reporter* r, const char* src) { float* args[] = { out_v, out_v + 4, out_v + 8, out_v + 12 }; // Now run in parallel and compare results - SkAssertResult(byteCode->runStriped(main, 4, args, 4, nullptr, 0, nullptr, 0)); + SkAssertResult(interpreter4.runStriped(main4, 4, (float**) args)); // Transpose striped outputs back transpose(out_v); @@ -125,7 +106,7 @@ void vec_test(skiatest::Reporter* r, const char* src) { out_v[4*i + 0], out_v[4*i + 1], out_v[4*i + 2], out_v[4*i + 3], out_s[4*i + 0], out_s[4*i + 1], out_s[4*i + 2], out_s[4*i + 3]); } - main->disassemble(); + main4->disassemble(); REPORT_FAILURE(r, "VecInterpreter mismatch", SkString()); } } @@ -147,20 +128,26 @@ void test(skiatest::Reporter* r, const char* src, float inR, float inG, float in return; } const SkSL::ByteCodeFunction* main = byteCode->getFunction("main"); - float inoutColor[4] = { inR, inG, inB, inA }; - SkAssertResult(byteCode->run(main, inoutColor, 4, nullptr, 0, nullptr, 0)); - if (inoutColor[0] != expectedR || inoutColor[1] != expectedG || - inoutColor[2] != expectedB || inoutColor[3] != expectedA) { + SkSL::ByteCode::Vector<1> inoutColor[4]; + inoutColor[0].fFloat[0] = inR; + inoutColor[1].fFloat[0] = inG; + inoutColor[2].fFloat[0] = inB; + inoutColor[3].fFloat[0] = inA; + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + bool success = interpreter.run(main, inoutColor, nullptr); + REPORTER_ASSERT(r, success); + if (inoutColor[0].fFloat[0] != expectedR || inoutColor[1].fFloat[0] != expectedG || + inoutColor[2].fFloat[0] != expectedB || inoutColor[3].fFloat[0] != expectedA) { printf("for program: %s\n", src); printf(" expected (%f, %f, %f, %f), but received (%f, %f, %f, %f)\n", expectedR, - expectedG, expectedB, expectedA, inoutColor[0], inoutColor[1], inoutColor[2], - inoutColor[3]); + expectedG, expectedB, expectedA, inoutColor[0].fFloat[0], + inoutColor[1].fFloat[0], inoutColor[2].fFloat[0], inoutColor[3].fFloat[0]); main->disassemble(); } - REPORTER_ASSERT(r, inoutColor[0] == expectedR); - REPORTER_ASSERT(r, inoutColor[1] == expectedG); - REPORTER_ASSERT(r, inoutColor[2] == expectedB); - REPORTER_ASSERT(r, inoutColor[3] == expectedA); + REPORTER_ASSERT(r, inoutColor[0].fFloat[0] == expectedR); + REPORTER_ASSERT(r, inoutColor[1].fFloat[0] == expectedG); + REPORTER_ASSERT(r, inoutColor[2].fFloat[0] == expectedB); + REPORTER_ASSERT(r, inoutColor[3].fFloat[0] == expectedA); } else { printf("%s\n%s", src, compiler.errorText().c_str()); } @@ -177,6 +164,10 @@ DEF_TEST(SkSLInterpreterAdd, r) { 0.5, 1, 1.5, 2); test(r, "void main(inout half4 color) { color.r = int(color.r) + int(color.g); }", 1, 3, 0, 0, 4, 3, 0, 0); + test(r, "void main(inout half4 color) { color.rg = color.r + color.gb; }", 1, 2, 3, 4, + 3, 4, 3, 4); + test(r, "void main(inout half4 color) { color.rg = color.rg + color.b; }", 1, 2, 3, 4, + 4, 5, 3, 4); } DEF_TEST(SkSLInterpreterSubtract, r) { @@ -189,6 +180,10 @@ DEF_TEST(SkSLInterpreterSubtract, r) { test(r, "void main(inout half4 color) { color = -color; }", 4, 3, 2, 1, -4, -3, -2, -1); test(r, "void main(inout half4 color) { color.r = int(color.r) - int(color.g); }", 3, 1, 0, 0, 2, 1, 0, 0); + test(r, "void main(inout half4 color) { color.rg = color.r - color.gb; }", 1, 2, 3, 4, + -1, -2, 3, 4); + test(r, "void main(inout half4 color) { color.rg = color.rg - color.b; }", 1, 2, 3, 4, + -2, -1, 3, 4); } DEF_TEST(SkSLInterpreterMultiply, r) { @@ -200,6 +195,10 @@ DEF_TEST(SkSLInterpreterMultiply, r) { 16, 9, 4, 1); test(r, "void main(inout half4 color) { color.r = int(color.r) * int(color.g); }", 3, -2, 0, 0, -6, -2, 0, 0); + test(r, "void main(inout half4 color) { color.rg = color.r * color.gb; }", 5, 2, 3, 4, + 10, 15, 3, 4); + test(r, "void main(inout half4 color) { color.rg = color.rg * color.b; }", 1, 2, 3, 4, + 3, 6, 3, 4); } DEF_TEST(SkSLInterpreterDivide, r) { @@ -211,6 +210,10 @@ DEF_TEST(SkSLInterpreterDivide, r) { 1, 1, 1, 1); test(r, "void main(inout half4 color) { color.r = int(color.r) / int(color.g); }", 8, -2, 0, 0, -4, -2, 0, 0); + test(r, "void main(inout half4 color) { color.rg = color.r / color.gb; }", 12, 2, 3, 4, + 6, 4, 3, 4); + test(r, "void main(inout half4 color) { color.rg = color.rg / color.b; }", 6, 3, 3, 4, + 2, 1, 3, 4); } DEF_TEST(SkSLInterpreterRemainder, r) { @@ -222,6 +225,14 @@ DEF_TEST(SkSLInterpreterRemainder, r) { 2, 3, 0, 0); test(r, "void main(inout half4 color) { color.rg = half2(int2(int(color.r), int(color.g)) % " "int(color.b)); }", 8, 10, 6, 0, 2, 4, 6, 0); + test(r, "void main(inout half4 color) { color.rg = color.r + color.gb; }", 1, 2, 3, 4, + 3, 4, 3, 4); + test(r, "void main(inout half4 color) { color.rg = color.rg + color.b; }", 1, 2, 3, 4, + 4, 5, 3, 4); + test(r, "void main(inout half4 color) { color.rg = color.r % color.gb; }", 10, 2, 3, 4, + 0, 1, 3, 4); + test(r, "void main(inout half4 color) { color.rg = color.rg % color.b; }", 6, 3, 4, 4, + 2, 3, 4, 4); } DEF_TEST(SkSLInterpreterAnd, r) { @@ -278,7 +289,7 @@ DEF_TEST(SkSLInterpreterBitwise, r) { unsigned out; out = 0x00000088; - test(r, "int main(int x) { return x << 3; }", (float*)&in, (float*)&out); + test(r, "int main(int x) { return x << 3; }", (float*)&in, (float*)&out); out = 0xF0000002; test(r, "int main(int x) { return x >> 3; }", (float*)&in, (float*)&out); @@ -473,12 +484,16 @@ DEF_TEST(SkSLInterpreterIf, r) { DEF_TEST(SkSLInterpreterIfVector, r) { test(r, "void main(inout half4 color) { if (color.rg == color.ba) color.a = 1; }", 1, 2, 1, 2, 1, 2, 1, 1); + test(r, "void main(inout half4 color) { if (color.rg == color.ba) color.a = 1; }", + 1, 2, 1, 3, 1, 2, 1, 3); test(r, "void main(inout half4 color) { if (color.rg == color.ba) color.a = 1; }", 1, 2, 3, 2, 1, 2, 3, 2); test(r, "void main(inout half4 color) { if (color.rg != color.ba) color.a = 1; }", 1, 2, 1, 2, 1, 2, 1, 2); test(r, "void main(inout half4 color) { if (color.rg != color.ba) color.a = 1; }", 1, 2, 3, 2, 1, 2, 3, 1); + test(r, "void main(inout half4 color) { if (color.rg != color.ba) color.a = 1; }", + 1, 2, 1, 3, 1, 2, 1, 1); } DEF_TEST(SkSLInterpreterWhile, r) { @@ -641,51 +656,67 @@ DEF_TEST(SkSLInterpreterCompound, r) { SkIRect gRects[4] = { { 1,2,3,4 }, { 5,6,7,8 }, { 9,10,11,12 }, { 13,14,15,16 } }; const float* fRects = (const float*)gRects; + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + auto geti = [](SkSL::Interpreter<1>::Vector* v) { return v->fInt[0]; }; + auto getf = [](SkSL::Interpreter<1>::Vector* v) { return v->fFloat[0]; }; + { SkIRect in = SkIRect::MakeXYWH(10, 10, 20, 30); - int out = 0; - SkAssertResult(byteCode->run(rect_height, (float*)&in, 4, (float*)&out, 1, fRects, 16)); - REPORTER_ASSERT(r, out == 30); + SkSL::Interpreter<1>::Vector* out; + bool success = interpreter.run(rect_height, (SkSL::Interpreter<1>::Vector*) &in, &out); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, geti(out) == 30); } { int in[2] = { 15, 25 }; - RectAndColor out; - SkAssertResult(byteCode->run(make_blue_rect, (float*)in, 2, (float*)&out, 8, fRects, 16)); - REPORTER_ASSERT(r, out.fRect.width() == 15); - REPORTER_ASSERT(r, out.fRect.height() == 25); + SkSL::Interpreter<1>::Vector* out; + bool success = interpreter.run(make_blue_rect, (SkSL::Interpreter<1>::Vector*) in, &out); + REPORTER_ASSERT(r, success); + RectAndColor result{ { geti(out), geti(out + 1), geti(out + 2), geti(out + 3) }, + { getf(out + 4), getf(out + 5), getf(out + 6), getf(out + 7) } }; + REPORTER_ASSERT(r, result.fRect.width() == 15); + REPORTER_ASSERT(r, result.fRect.height() == 25); SkColor4f blue = { 0.0f, 1.0f, 0.0f, 1.0f }; - REPORTER_ASSERT(r, out.fColor == blue); + REPORTER_ASSERT(r, result.fColor == blue); } { int in[15] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - int out = 0; - SkAssertResult(byteCode->run(median, (float*)in, 15, (float*)&out, 1, fRects, 16)); - REPORTER_ASSERT(r, out == 8); + SkSL::Interpreter<1>::Vector* out; + bool success = interpreter.run(median, (SkSL::Interpreter<1>::Vector*) in, &out); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, geti(out) == 8); } { float in[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; - float out[8] = { 0 }; - SkAssertResult(byteCode->run(sums, in, 8, out, 8, fRects, 16)); + SkSL::Interpreter<1>::Vector* out; + bool success = interpreter.run(sums, (SkSL::Interpreter<1>::Vector*) in, &out); + REPORTER_ASSERT(r, success); for (int i = 0; i < 8; ++i) { - REPORTER_ASSERT(r, out[i] == static_cast((i + 1) * (i + 2) / 2)); + REPORTER_ASSERT(r, getf(out + i) == static_cast((i + 1) * (i + 2) / 2)); } } { int in = 2; - SkIRect out = SkIRect::MakeEmpty(); - SkAssertResult(byteCode->run(get_rect, (float*)&in, 1, (float*)&out, 4, fRects, 16)); - REPORTER_ASSERT(r, out == gRects[2]); + interpreter.setUniforms(fRects); + SkSL::Interpreter<1>::Vector* out; + bool success = interpreter.run(get_rect, (SkSL::Interpreter<1>::Vector*) &in, &out); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, geti(out) == gRects[2].fLeft); + REPORTER_ASSERT(r, geti(out + 1) == gRects[2].fTop); + REPORTER_ASSERT(r, geti(out + 2) == gRects[2].fRight); + REPORTER_ASSERT(r, geti(out + 3) == gRects[2].fBottom); } { ManyRects in; memset(&in, 0, sizeof(in)); in.fNumRects = 2; - SkAssertResult(byteCode->run(fill_rects, (float*)&in, 33, nullptr, 0, fRects, 16)); + bool success = interpreter.run(fill_rects, (SkSL::Interpreter<1>::Vector*) &in, nullptr); + REPORTER_ASSERT(r, success); ManyRects expected; memset(&expected, 0, sizeof(expected)); expected.fNumRects = 2; @@ -718,9 +749,11 @@ static void expect_run_failure(skiatest::Reporter* r, const char* src, float* in auto byteCode = compiler.toByteCode(*program); REPORTER_ASSERT(r, byteCode); - auto fun = byteCode->getFunction("main"); - bool result = byteCode->run(fun, in, fun->getParameterCount(), nullptr, 0, nullptr, 0); - REPORTER_ASSERT(r, !result); + auto main = byteCode->getFunction("main"); + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + SkSL::ByteCode::Vector<1>* result; + bool success = interpreter.run(main, (SkSL::ByteCode::Vector<1>*) in, &result); + REPORTER_ASSERT(r, !success); } DEF_TEST(SkSLInterpreterRestrictFunctionCalls, r) { @@ -786,16 +819,21 @@ DEF_TEST(SkSLInterpreterFunctions, r) { REPORTER_ASSERT(r, dot3); REPORTER_ASSERT(r, dot2); - float out = 0.0f; + SkSL::Interpreter<1> interpreter(std::move(byteCode)); float in = 3.0f; - SkAssertResult(byteCode->run(main, &in, 1, &out, 1, nullptr, 0)); - REPORTER_ASSERT(r, out = 6.0f); - SkAssertResult(byteCode->run(dot3, &in, 1, &out, 1, nullptr, 0)); - REPORTER_ASSERT(r, out = 9.0f); + SkSL::Interpreter<1>::Vector* out; + bool success = interpreter.run(main, (SkSL::Interpreter<1>::Vector*) &in, &out); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, out->fFloat[0] = 6.0f); - SkAssertResult(byteCode->run(dot2, &in, 1, &out, 1, nullptr, 0)); - REPORTER_ASSERT(r, out = -1.0f); + success = interpreter.run(dot3, (SkSL::Interpreter<1>::Vector*) &in, &out); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, out->fFloat[0] = 9.0f); + + success = interpreter.run(dot2, (SkSL::Interpreter<1>::Vector*) &in, &out); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, out->fFloat[0] = -1.0f); } DEF_TEST(SkSLInterpreterOutParams, r) { @@ -804,15 +842,18 @@ DEF_TEST(SkSLInterpreterOutParams, r) { "void main(inout half4 color) { oneAlpha(color); }", 0, 0, 0, 0, 0, 0, 0, 1); test(r, - "half2 tricky(half x, half y, inout half2 color, half z) {" + "half2 tricky(half x, half y, inout half2 color, half z, out half w) {" " color.xy = color.yx;" + " w = 47;" " return half2(x + y, z);" "}" "void main(inout half4 color) {" - " half2 t = tricky(1, 2, color.rb, 5);" + " half w;" + " half2 t = tricky(1, 2, color.rb, 5, w);" + " color.r += w;" " color.ga = t;" "}", - 1, 2, 3, 4, 3, 3, 1, 5); + 1, 2, 3, 4, 50, 3, 1, 5); } DEF_TEST(SkSLInterpreterMathFunctions, r) { @@ -1029,9 +1070,11 @@ DEF_TEST(SkSLInterpreterExternalValues, r) { return; } const SkSL::ByteCodeFunction* main = byteCode->getFunction("main"); - float out; - SkAssertResult(byteCode->run(main, nullptr, 0, &out, 1, nullptr, 0)); - REPORTER_ASSERT(r, out == 66.0); + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + SkSL::ByteCode::Vector<1>* result; + bool success = interpreter.run(main, nullptr, &result); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, result->fFloat[0] == 66.0); REPORTER_ASSERT(r, outValue == 152); } else { printf("%s\n%s", src, compiler.errorText().c_str()); @@ -1062,7 +1105,9 @@ DEF_TEST(SkSLInterpreterExternalValuesVector, r) { return; } const SkSL::ByteCodeFunction* main = byteCode->getFunction("main"); - SkAssertResult(byteCode->run(main, nullptr, 0, nullptr, 0, nullptr, 0)); + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + bool success = interpreter.run(main, nullptr, nullptr); + REPORTER_ASSERT(r, success); REPORTER_ASSERT(r, value[0] == 2); REPORTER_ASSERT(r, value[1] == 4); REPORTER_ASSERT(r, value[2] == 6); @@ -1127,9 +1172,11 @@ DEF_TEST(SkSLInterpreterExternalValuesCall, r) { return; } const SkSL::ByteCodeFunction* main = byteCode->getFunction("main"); - float out; - SkAssertResult(byteCode->run(main, nullptr, 0, &out, 1, nullptr, 0)); - REPORTER_ASSERT(r, out == 5.0); + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + SkSL::ByteCode::Vector<1>* result; + bool success = interpreter.run(main, nullptr, &result); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, result->fFloat[0] == 5.0); } else { printf("%s\n%s", src, compiler.errorText().c_str()); } @@ -1142,32 +1189,23 @@ public: : INHERITED(name, *compiler.context().fFloat4_Type) , fCompiler(compiler) , fFunction(function) {} - bool canCall() const override { return true; } - int callParameterCount() const override { return 1; } - void getCallParameterTypes(const SkSL::Type** outTypes) const override { outTypes[0] = fCompiler.context().fFloat4_Type.get(); } - void call(int /*unusedIndex*/, float* arguments, float* outReturn) override { fFunction(arguments, outReturn); } - private: SkSL::Compiler& fCompiler; - void (*fFunction)(float[4], float[4]); - typedef SkSL::ExternalValue INHERITED; }; - - DEF_TEST(SkSLInterpreterExternalValuesVectorCall, r) { SkSL::Compiler compiler; SkSL::Program::Settings settings; @@ -1195,12 +1233,14 @@ DEF_TEST(SkSLInterpreterExternalValuesVectorCall, r) { return; } const SkSL::ByteCodeFunction* main = byteCode->getFunction("main"); - float out[4]; - SkAssertResult(byteCode->run(main, nullptr, 0, out, 4, nullptr, 0)); - REPORTER_ASSERT(r, out[0] == 1.0); - REPORTER_ASSERT(r, out[1] == 2.0); - REPORTER_ASSERT(r, out[2] == 3.0); - REPORTER_ASSERT(r, out[3] == 4.0); + SkSL::Interpreter<1> interpreter(std::move(byteCode)); + SkSL::ByteCode::Vector<1>* result; + bool success = interpreter.run(main, nullptr, &result); + REPORTER_ASSERT(r, success); + REPORTER_ASSERT(r, result[0].fFloat[0] == 1.0); + REPORTER_ASSERT(r, result[1].fFloat[0] == 2.0); + REPORTER_ASSERT(r, result[2].fFloat[0] == 3.0); + REPORTER_ASSERT(r, result[3].fFloat[0] == 4.0); } else { printf("%s\n%s", src, compiler.errorText().c_str()); }