move skvm debug tools back to core
I happened to have this on when profiling skottie_tool and got curious why I was seeing the interpreter run and not JIT code. Mostly this moves the code in bulk out of SkVMTest.cpp to SkVM.cpp so that code in SkVM.cpp can call dump() on itself. Also this CL has the skvm::Program hang onto the original value-based builder program (in addition to its own interpreter program and JIT program if we can). This is entirely so that when JIT bails out I can have it dump out both the builder and interpreter programs for more debugging aid. I'm still going to need more debug tools somewhere to figure out what the program that needs 17 registers is, and what to do about it. Finally, remove skvmtool. It's annoying to maintain its build rules, and I don't use it much if ever anymore. Change-Id: I995d15d04bda79ddfc4d68bda8aaa3b5b9261f08 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/242520 Auto-Submit: Mike Klein <mtklein@google.com> Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
356bb86b55
commit
6b4143e11f
20
BUILD.gn
20
BUILD.gn
@ -2427,26 +2427,6 @@ if (skia_enable_tools) {
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
if (skia_enable_skvm_jit) {
|
||||
test_app("skvmtool") {
|
||||
defines = [
|
||||
"SKVM_JIT",
|
||||
"SKVM_PERF_DUMPS",
|
||||
]
|
||||
include_dirs = [ "." ]
|
||||
sources = [
|
||||
"src/core/SkSpinlock.cpp",
|
||||
"src/core/SkThreadID.cpp",
|
||||
"src/core/SkVM.cpp",
|
||||
"tools/SkVMBuilders.cpp",
|
||||
"tools/SkVMTool.cpp",
|
||||
]
|
||||
if (target_cpu == "x64") {
|
||||
sources += [ "src/core/SkCpu.cpp" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_ios && skia_use_metal) {
|
||||
|
@ -5,6 +5,7 @@
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "include/core/SkStream.h"
|
||||
#include "include/private/SkSpinlock.h"
|
||||
#include "include/private/SkTFitsIn.h"
|
||||
#include "include/private/SkThreadID.h"
|
||||
@ -20,8 +21,272 @@
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace skvm {
|
||||
|
||||
// Debugging tools, mostly for printing various data structures out to a stream.
|
||||
|
||||
namespace {
|
||||
class SkDebugfStream final : public SkWStream {
|
||||
size_t fBytesWritten = 0;
|
||||
|
||||
bool write(const void* buffer, size_t size) override {
|
||||
SkDebugf("%.*s", size, buffer);
|
||||
fBytesWritten += size;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t bytesWritten() const override {
|
||||
return fBytesWritten;
|
||||
}
|
||||
};
|
||||
|
||||
struct V { Val id; };
|
||||
struct R { Reg id; };
|
||||
struct Shift { int bits; };
|
||||
struct Splat { int bits; };
|
||||
struct Hex { int bits; };
|
||||
|
||||
static void write(SkWStream* o, const char* s) {
|
||||
o->writeText(s);
|
||||
}
|
||||
|
||||
static void write(SkWStream* o, Arg a) {
|
||||
write(o, "arg(");
|
||||
o->writeDecAsText(a.ix);
|
||||
write(o, ")");
|
||||
}
|
||||
static void write(SkWStream* o, V v) {
|
||||
write(o, "v");
|
||||
o->writeDecAsText(v.id);
|
||||
}
|
||||
static void write(SkWStream* o, R r) {
|
||||
write(o, "r");
|
||||
o->writeDecAsText(r.id);
|
||||
}
|
||||
static void write(SkWStream* o, Shift s) {
|
||||
o->writeDecAsText(s.bits);
|
||||
}
|
||||
static void write(SkWStream* o, Splat s) {
|
||||
float f;
|
||||
memcpy(&f, &s.bits, 4);
|
||||
o->writeHexAsText(s.bits);
|
||||
write(o, " (");
|
||||
o->writeScalarAsText(f);
|
||||
write(o, ")");
|
||||
}
|
||||
static void write(SkWStream* o, Hex h) {
|
||||
o->writeHexAsText(h.bits);
|
||||
}
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
static void write(SkWStream* o, T first, Ts... rest) {
|
||||
write(o, first);
|
||||
write(o, " ");
|
||||
write(o, rest...);
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_builder_program(const std::vector<Builder::Instruction>& program,
|
||||
SkWStream* o) {
|
||||
for (Val id = 0; id < (Val)program.size(); id++) {
|
||||
const Builder::Instruction& inst = program[id];
|
||||
Op op = inst.op;
|
||||
Val x = inst.x,
|
||||
y = inst.y,
|
||||
z = inst.z;
|
||||
int imm = inst.imm;
|
||||
write(o, inst.death == 0 ? "☠️ " :
|
||||
inst.hoist ? "↑ " : " ");
|
||||
switch (op) {
|
||||
case Op::store8: write(o, "store8" , Arg{imm}, V{x}); break;
|
||||
case Op::store16: write(o, "store16", Arg{imm}, V{x}); break;
|
||||
case Op::store32: write(o, "store32", Arg{imm}, V{x}); break;
|
||||
|
||||
case Op::load8: write(o, V{id}, "= load8" , Arg{imm}); break;
|
||||
case Op::load16: write(o, V{id}, "= load16", Arg{imm}); break;
|
||||
case Op::load32: write(o, V{id}, "= load32", Arg{imm}); break;
|
||||
|
||||
case Op::gather8: write(o, V{id}, "= gather8" , Arg{imm}, V{x}); break;
|
||||
case Op::gather16: write(o, V{id}, "= gather16", Arg{imm}, V{x}); break;
|
||||
case Op::gather32: write(o, V{id}, "= gather32", Arg{imm}, V{x}); break;
|
||||
|
||||
case Op::uniform8: write(o, V{id}, "= uniform8" , Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform16: write(o, V{id}, "= uniform16", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform32: write(o, V{id}, "= uniform32", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
|
||||
case Op::splat: write(o, V{id}, "= splat", Splat{imm}); break;
|
||||
|
||||
|
||||
case Op::add_f32: write(o, V{id}, "= add_f32", V{x}, V{y} ); break;
|
||||
case Op::sub_f32: write(o, V{id}, "= sub_f32", V{x}, V{y} ); break;
|
||||
case Op::mul_f32: write(o, V{id}, "= mul_f32", V{x}, V{y} ); break;
|
||||
case Op::div_f32: write(o, V{id}, "= div_f32", V{x}, V{y} ); break;
|
||||
case Op::mad_f32: write(o, V{id}, "= mad_f32", V{x}, V{y}, V{z}); break;
|
||||
|
||||
case Op:: eq_f32: write(o, V{id}, "= eq_f32", V{x}, V{y}); break;
|
||||
case Op::neq_f32: write(o, V{id}, "= neq_f32", V{x}, V{y}); break;
|
||||
case Op:: lt_f32: write(o, V{id}, "= lt_f32", V{x}, V{y}); break;
|
||||
case Op::lte_f32: write(o, V{id}, "= lte_f32", V{x}, V{y}); break;
|
||||
case Op:: gt_f32: write(o, V{id}, "= gt_f32", V{x}, V{y}); break;
|
||||
case Op::gte_f32: write(o, V{id}, "= gte_f32", V{x}, V{y}); break;
|
||||
|
||||
|
||||
case Op::add_i32: write(o, V{id}, "= add_i32", V{x}, V{y}); break;
|
||||
case Op::sub_i32: write(o, V{id}, "= sub_i32", V{x}, V{y}); break;
|
||||
case Op::mul_i32: write(o, V{id}, "= mul_i32", V{x}, V{y}); break;
|
||||
|
||||
case Op::shl_i32: write(o, V{id}, "= shl_i32", V{x}, Shift{imm}); break;
|
||||
case Op::shr_i32: write(o, V{id}, "= shr_i32", V{x}, Shift{imm}); break;
|
||||
case Op::sra_i32: write(o, V{id}, "= sra_i32", V{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i32: write(o, V{id}, "= eq_i32", V{x}, V{y}); break;
|
||||
case Op::neq_i32: write(o, V{id}, "= neq_i32", V{x}, V{y}); break;
|
||||
case Op:: lt_i32: write(o, V{id}, "= lt_i32", V{x}, V{y}); break;
|
||||
case Op::lte_i32: write(o, V{id}, "= lte_i32", V{x}, V{y}); break;
|
||||
case Op:: gt_i32: write(o, V{id}, "= gt_i32", V{x}, V{y}); break;
|
||||
case Op::gte_i32: write(o, V{id}, "= gte_i32", V{x}, V{y}); break;
|
||||
|
||||
case Op::add_i16x2: write(o, V{id}, "= add_i16x2", V{x}, V{y}); break;
|
||||
case Op::sub_i16x2: write(o, V{id}, "= sub_i16x2", V{x}, V{y}); break;
|
||||
case Op::mul_i16x2: write(o, V{id}, "= mul_i16x2", V{x}, V{y}); break;
|
||||
|
||||
case Op::shl_i16x2: write(o, V{id}, "= shl_i16x2", V{x}, Shift{imm}); break;
|
||||
case Op::shr_i16x2: write(o, V{id}, "= shr_i16x2", V{x}, Shift{imm}); break;
|
||||
case Op::sra_i16x2: write(o, V{id}, "= sra_i16x2", V{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i16x2: write(o, V{id}, "= eq_i16x2", V{x}, V{y}); break;
|
||||
case Op::neq_i16x2: write(o, V{id}, "= neq_i16x2", V{x}, V{y}); break;
|
||||
case Op:: lt_i16x2: write(o, V{id}, "= lt_i16x2", V{x}, V{y}); break;
|
||||
case Op::lte_i16x2: write(o, V{id}, "= lte_i16x2", V{x}, V{y}); break;
|
||||
case Op:: gt_i16x2: write(o, V{id}, "= gt_i16x2", V{x}, V{y}); break;
|
||||
case Op::gte_i16x2: write(o, V{id}, "= gte_i16x2", V{x}, V{y}); break;
|
||||
|
||||
case Op::bit_and : write(o, V{id}, "= bit_and" , V{x}, V{y} ); break;
|
||||
case Op::bit_or : write(o, V{id}, "= bit_or" , V{x}, V{y} ); break;
|
||||
case Op::bit_xor : write(o, V{id}, "= bit_xor" , V{x}, V{y} ); break;
|
||||
case Op::bit_clear: write(o, V{id}, "= bit_clear", V{x}, V{y} ); break;
|
||||
case Op::select : write(o, V{id}, "= select" , V{x}, V{y}, V{z}); break;
|
||||
|
||||
case Op::bytes: write(o, V{id}, "= bytes", V{x}, Hex{imm}); break;
|
||||
case Op::extract: write(o, V{id}, "= extract", V{x}, Shift{imm}, V{y}); break;
|
||||
case Op::pack: write(o, V{id}, "= pack", V{x}, V{y}, Shift{imm}); break;
|
||||
|
||||
case Op::to_f32: write(o, V{id}, "= to_f32", V{x}); break;
|
||||
case Op::to_i32: write(o, V{id}, "= to_i32", V{x}); break;
|
||||
}
|
||||
|
||||
write(o, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::dump(SkWStream* o) const {
|
||||
o->writeDecAsText(fProgram.size());
|
||||
o->writeText(" values:\n");
|
||||
dump_builder_program(fProgram, o);
|
||||
}
|
||||
|
||||
void Program::dump(SkWStream* o) const {
|
||||
o->writeDecAsText(fRegs);
|
||||
o->writeText(" registers, ");
|
||||
o->writeDecAsText(fInstructions.size());
|
||||
o->writeText(" instructions:\n");
|
||||
for (int i = 0; i < (int)fInstructions.size(); i++) {
|
||||
if (i == fLoop) {
|
||||
write(o, "loop:\n");
|
||||
}
|
||||
const Program::Instruction& inst = fInstructions[i];
|
||||
Op op = inst.op;
|
||||
Reg d = inst.d,
|
||||
x = inst.x,
|
||||
y = inst.y,
|
||||
z = inst.z;
|
||||
int imm = inst.imm;
|
||||
switch (op) {
|
||||
case Op::store8: write(o, "store8" , Arg{imm}, R{x}); break;
|
||||
case Op::store16: write(o, "store16", Arg{imm}, R{x}); break;
|
||||
case Op::store32: write(o, "store32", Arg{imm}, R{x}); break;
|
||||
|
||||
case Op::load8: write(o, R{d}, "= load8" , Arg{imm}); break;
|
||||
case Op::load16: write(o, R{d}, "= load16", Arg{imm}); break;
|
||||
case Op::load32: write(o, R{d}, "= load32", Arg{imm}); break;
|
||||
|
||||
case Op::gather8: write(o, R{d}, "= gather8" , Arg{imm}, R{x}); break;
|
||||
case Op::gather16: write(o, R{d}, "= gather16", Arg{imm}, R{x}); break;
|
||||
case Op::gather32: write(o, R{d}, "= gather32", Arg{imm}, R{x}); break;
|
||||
|
||||
case Op::uniform8: write(o, R{d}, "= uniform8" , Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform16: write(o, R{d}, "= uniform16", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform32: write(o, R{d}, "= uniform32", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
|
||||
case Op::splat: write(o, R{d}, "= splat", Splat{imm}); break;
|
||||
|
||||
|
||||
case Op::add_f32: write(o, R{d}, "= add_f32", R{x}, R{y} ); break;
|
||||
case Op::sub_f32: write(o, R{d}, "= sub_f32", R{x}, R{y} ); break;
|
||||
case Op::mul_f32: write(o, R{d}, "= mul_f32", R{x}, R{y} ); break;
|
||||
case Op::div_f32: write(o, R{d}, "= div_f32", R{x}, R{y} ); break;
|
||||
case Op::mad_f32: write(o, R{d}, "= mad_f32", R{x}, R{y}, R{z}); break;
|
||||
|
||||
case Op:: eq_f32: write(o, R{d}, "= eq_f32", R{x}, R{y}); break;
|
||||
case Op::neq_f32: write(o, R{d}, "= neq_f32", R{x}, R{y}); break;
|
||||
case Op:: lt_f32: write(o, R{d}, "= lt_f32", R{x}, R{y}); break;
|
||||
case Op::lte_f32: write(o, R{d}, "= lte_f32", R{x}, R{y}); break;
|
||||
case Op:: gt_f32: write(o, R{d}, "= gt_f32", R{x}, R{y}); break;
|
||||
case Op::gte_f32: write(o, R{d}, "= gte_f32", R{x}, R{y}); break;
|
||||
|
||||
|
||||
case Op::add_i32: write(o, R{d}, "= add_i32", R{x}, R{y}); break;
|
||||
case Op::sub_i32: write(o, R{d}, "= sub_i32", R{x}, R{y}); break;
|
||||
case Op::mul_i32: write(o, R{d}, "= mul_i32", R{x}, R{y}); break;
|
||||
|
||||
case Op::shl_i32: write(o, R{d}, "= shl_i32", R{x}, Shift{imm}); break;
|
||||
case Op::shr_i32: write(o, R{d}, "= shr_i32", R{x}, Shift{imm}); break;
|
||||
case Op::sra_i32: write(o, R{d}, "= sra_i32", R{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i32: write(o, R{d}, "= eq_i32", R{x}, R{y}); break;
|
||||
case Op::neq_i32: write(o, R{d}, "= neq_i32", R{x}, R{y}); break;
|
||||
case Op:: lt_i32: write(o, R{d}, "= lt_i32", R{x}, R{y}); break;
|
||||
case Op::lte_i32: write(o, R{d}, "= lte_i32", R{x}, R{y}); break;
|
||||
case Op:: gt_i32: write(o, R{d}, "= gt_i32", R{x}, R{y}); break;
|
||||
case Op::gte_i32: write(o, R{d}, "= gte_i32", R{x}, R{y}); break;
|
||||
|
||||
|
||||
case Op::add_i16x2: write(o, R{d}, "= add_i16x2", R{x}, R{y}); break;
|
||||
case Op::sub_i16x2: write(o, R{d}, "= sub_i16x2", R{x}, R{y}); break;
|
||||
case Op::mul_i16x2: write(o, R{d}, "= mul_i16x2", R{x}, R{y}); break;
|
||||
|
||||
case Op::shl_i16x2: write(o, R{d}, "= shl_i16x2", R{x}, Shift{imm}); break;
|
||||
case Op::shr_i16x2: write(o, R{d}, "= shr_i16x2", R{x}, Shift{imm}); break;
|
||||
case Op::sra_i16x2: write(o, R{d}, "= sra_i16x2", R{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i16x2: write(o, R{d}, "= eq_i16x2", R{x}, R{y}); break;
|
||||
case Op::neq_i16x2: write(o, R{d}, "= neq_i16x2", R{x}, R{y}); break;
|
||||
case Op:: lt_i16x2: write(o, R{d}, "= lt_i16x2", R{x}, R{y}); break;
|
||||
case Op::lte_i16x2: write(o, R{d}, "= lte_i16x2", R{x}, R{y}); break;
|
||||
case Op:: gt_i16x2: write(o, R{d}, "= gt_i16x2", R{x}, R{y}); break;
|
||||
case Op::gte_i16x2: write(o, R{d}, "= gte_i16x2", R{x}, R{y}); break;
|
||||
|
||||
|
||||
case Op::bit_and : write(o, R{d}, "= bit_and" , R{x}, R{y} ); break;
|
||||
case Op::bit_or : write(o, R{d}, "= bit_or" , R{x}, R{y} ); break;
|
||||
case Op::bit_xor : write(o, R{d}, "= bit_xor" , R{x}, R{y} ); break;
|
||||
case Op::bit_clear: write(o, R{d}, "= bit_clear", R{x}, R{y} ); break;
|
||||
case Op::select : write(o, R{d}, "= select" , R{x}, R{y}, R{z}); break;
|
||||
|
||||
case Op::bytes: write(o, R{d}, "= bytes", R{x}, Hex{imm}); break;
|
||||
case Op::extract: write(o, R{d}, "= extract", R{x}, Shift{imm}, R{y}); break;
|
||||
case Op::pack: write(o, R{d}, "= pack", R{x}, R{y}, Shift{imm}); break;
|
||||
|
||||
case Op::to_f32: write(o, R{d}, "= to_f32", R{x}); break;
|
||||
case Op::to_i32: write(o, R{d}, "= to_i32", R{x}); break;
|
||||
}
|
||||
write(o, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Builder -> Program, with liveness and loop hoisting analysis.
|
||||
|
||||
Program Builder::done(const char* debug_name) {
|
||||
// Basic liveness analysis:
|
||||
// an instruction is live until all live instructions that need its input have retired.
|
||||
@ -1084,20 +1349,22 @@ namespace skvm {
|
||||
Program::~Program() { this->dropJIT(); }
|
||||
|
||||
Program::Program(Program&& other) {
|
||||
fInstructions = std::move(other.fInstructions);
|
||||
fRegs = other.fRegs;
|
||||
fLoop = other.fLoop;
|
||||
fStrides = std::move(other.fStrides);
|
||||
fInstructions = std::move(other.fInstructions);
|
||||
fRegs = other.fRegs;
|
||||
fLoop = other.fLoop;
|
||||
fStrides = std::move(other.fStrides);
|
||||
fOriginalProgram = std::move(other.fOriginalProgram);
|
||||
|
||||
std::swap(fJITBuf , other.fJITBuf);
|
||||
std::swap(fJITSize , other.fJITSize);
|
||||
}
|
||||
|
||||
Program& Program::operator=(Program&& other) {
|
||||
fInstructions = std::move(other.fInstructions);
|
||||
fRegs = other.fRegs;
|
||||
fLoop = other.fLoop;
|
||||
fStrides = std::move(other.fStrides);
|
||||
fInstructions = std::move(other.fInstructions);
|
||||
fRegs = other.fRegs;
|
||||
fLoop = other.fLoop;
|
||||
fStrides = std::move(other.fStrides);
|
||||
fOriginalProgram = std::move(other.fOriginalProgram);
|
||||
|
||||
std::swap(fJITBuf , other.fJITBuf);
|
||||
std::swap(fJITSize , other.fJITSize);
|
||||
@ -1108,7 +1375,10 @@ namespace skvm {
|
||||
|
||||
Program::Program(const std::vector<Builder::Instruction>& instructions,
|
||||
const std::vector<int>& strides,
|
||||
const char* debug_name) : fStrides(strides) {
|
||||
const char* debug_name)
|
||||
: fStrides(strides)
|
||||
, fOriginalProgram(instructions)
|
||||
{
|
||||
this->setupInterpreter(instructions);
|
||||
#if defined(SKVM_JIT)
|
||||
this->setupJIT(instructions, debug_name);
|
||||
@ -1255,6 +1525,17 @@ namespace skvm {
|
||||
Assembler* a) const {
|
||||
using A = Assembler;
|
||||
|
||||
auto debug_dump = [&] {
|
||||
#if 0
|
||||
SkDebugfStream stream;
|
||||
this->dump(&stream);
|
||||
dump_builder_program(fOriginalProgram, &stream);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
if (!SkCpu::Supports(SkCpu::HSW)) {
|
||||
return false;
|
||||
@ -1370,6 +1651,9 @@ namespace skvm {
|
||||
} else {
|
||||
// We needed a tmp register but couldn't find one available. :'(
|
||||
// This will cause emit() to return false, in turn causing jit() to fail.
|
||||
if (debug_dump()) {
|
||||
SkDebugf("\nCould not find a register to hold tmp\n");
|
||||
}
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
@ -1402,6 +1686,9 @@ namespace skvm {
|
||||
set_dst((Reg)(found-1));
|
||||
} else {
|
||||
// Same deal as with tmp... all the registers are occupied. Time to fail!
|
||||
if (debug_dump()) {
|
||||
SkDebugf("\nCould not find a register to hold value %d\n", id);
|
||||
}
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
@ -1419,9 +1706,9 @@ namespace skvm {
|
||||
// Now let's actually assemble the instruction!
|
||||
switch (op) {
|
||||
default:
|
||||
#if 0
|
||||
SkDEBUGFAILF("\n%d not yet implemented\n", op);
|
||||
#endif
|
||||
if (debug_dump()) {
|
||||
SkDEBUGFAILF("\n%d not yet implemented\n", op);
|
||||
}
|
||||
return false; // TODO: many new ops
|
||||
|
||||
#if defined(__x86_64__)
|
||||
@ -1877,4 +2164,5 @@ namespace skvm {
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace skvm
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include <functional> // std::hash
|
||||
#include <vector> // std::vector
|
||||
|
||||
class SkWStream;
|
||||
|
||||
namespace skvm {
|
||||
|
||||
class Assembler {
|
||||
@ -425,6 +427,8 @@ namespace skvm {
|
||||
I32 extract(I32 x, int bits, I32 y); // (x >> bits) & y
|
||||
I32 pack (I32 x, I32 y, int bits); // x | (y << bits), assuming (x & (y << bits)) == 0
|
||||
|
||||
void dump(SkWStream*) const;
|
||||
|
||||
private:
|
||||
struct InstructionHash {
|
||||
template <typename T>
|
||||
@ -490,6 +494,8 @@ namespace skvm {
|
||||
// If this Program has been JITted, drop it, forcing interpreter fallback.
|
||||
void dropJIT();
|
||||
|
||||
void dump(SkWStream*) const;
|
||||
|
||||
private:
|
||||
void setupInterpreter(const std::vector<Builder::Instruction>&);
|
||||
void setupJIT (const std::vector<Builder::Instruction>&, const char* debug_name);
|
||||
@ -506,6 +512,9 @@ namespace skvm {
|
||||
int fLoop = 0;
|
||||
std::vector<int> fStrides;
|
||||
|
||||
// We only hang onto these to help debugging.
|
||||
std::vector<Builder::Instruction> fOriginalProgram;
|
||||
|
||||
void* fJITBuf = nullptr;
|
||||
size_t fJITSize = 0;
|
||||
};
|
||||
|
@ -22,262 +22,13 @@ const char* fmt_name(Fmt fmt) {
|
||||
return "";
|
||||
}
|
||||
|
||||
namespace {
|
||||
using namespace skvm;
|
||||
|
||||
struct V { Val id; };
|
||||
struct R { Reg id; };
|
||||
struct Shift { int bits; };
|
||||
struct Splat { int bits; };
|
||||
struct Hex { int bits; };
|
||||
|
||||
static void write(SkWStream* o, const char* s) {
|
||||
o->writeText(s);
|
||||
}
|
||||
|
||||
static void write(SkWStream* o, Arg a) {
|
||||
write(o, "arg(");
|
||||
o->writeDecAsText(a.ix);
|
||||
write(o, ")");
|
||||
}
|
||||
static void write(SkWStream* o, V v) {
|
||||
write(o, "v");
|
||||
o->writeDecAsText(v.id);
|
||||
}
|
||||
static void write(SkWStream* o, R r) {
|
||||
write(o, "r");
|
||||
o->writeDecAsText(r.id);
|
||||
}
|
||||
static void write(SkWStream* o, Shift s) {
|
||||
o->writeDecAsText(s.bits);
|
||||
}
|
||||
static void write(SkWStream* o, Splat s) {
|
||||
float f;
|
||||
memcpy(&f, &s.bits, 4);
|
||||
o->writeHexAsText(s.bits);
|
||||
write(o, " (");
|
||||
o->writeScalarAsText(f);
|
||||
write(o, ")");
|
||||
}
|
||||
static void write(SkWStream* o, Hex h) {
|
||||
o->writeHexAsText(h.bits);
|
||||
}
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
static void write(SkWStream* o, T first, Ts... rest) {
|
||||
write(o, first);
|
||||
write(o, " ");
|
||||
write(o, rest...);
|
||||
}
|
||||
|
||||
static void dump_builder(const Builder& builder, SkWStream* o) {
|
||||
const std::vector<Builder::Instruction> program = builder.program();
|
||||
|
||||
o->writeDecAsText(program.size());
|
||||
o->writeText(" values:\n");
|
||||
for (Val id = 0; id < (Val)program.size(); id++) {
|
||||
const Builder::Instruction& inst = program[id];
|
||||
Op op = inst.op;
|
||||
Val x = inst.x,
|
||||
y = inst.y,
|
||||
z = inst.z;
|
||||
int imm = inst.imm;
|
||||
write(o, inst.death == 0 ? "☠️ " :
|
||||
inst.hoist ? "↑ " : " ");
|
||||
switch (op) {
|
||||
case Op::store8: write(o, "store8" , Arg{imm}, V{x}); break;
|
||||
case Op::store16: write(o, "store16", Arg{imm}, V{x}); break;
|
||||
case Op::store32: write(o, "store32", Arg{imm}, V{x}); break;
|
||||
|
||||
case Op::load8: write(o, V{id}, "= load8" , Arg{imm}); break;
|
||||
case Op::load16: write(o, V{id}, "= load16", Arg{imm}); break;
|
||||
case Op::load32: write(o, V{id}, "= load32", Arg{imm}); break;
|
||||
|
||||
case Op::gather8: write(o, V{id}, "= gather8" , Arg{imm}, V{x}); break;
|
||||
case Op::gather16: write(o, V{id}, "= gather16", Arg{imm}, V{x}); break;
|
||||
case Op::gather32: write(o, V{id}, "= gather32", Arg{imm}, V{x}); break;
|
||||
|
||||
case Op::uniform8: write(o, V{id}, "= uniform8" , Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform16: write(o, V{id}, "= uniform16", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform32: write(o, V{id}, "= uniform32", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
|
||||
case Op::splat: write(o, V{id}, "= splat", Splat{imm}); break;
|
||||
|
||||
|
||||
case Op::add_f32: write(o, V{id}, "= add_f32", V{x}, V{y} ); break;
|
||||
case Op::sub_f32: write(o, V{id}, "= sub_f32", V{x}, V{y} ); break;
|
||||
case Op::mul_f32: write(o, V{id}, "= mul_f32", V{x}, V{y} ); break;
|
||||
case Op::div_f32: write(o, V{id}, "= div_f32", V{x}, V{y} ); break;
|
||||
case Op::mad_f32: write(o, V{id}, "= mad_f32", V{x}, V{y}, V{z}); break;
|
||||
|
||||
case Op:: eq_f32: write(o, V{id}, "= eq_f32", V{x}, V{y}); break;
|
||||
case Op::neq_f32: write(o, V{id}, "= neq_f32", V{x}, V{y}); break;
|
||||
case Op:: lt_f32: write(o, V{id}, "= lt_f32", V{x}, V{y}); break;
|
||||
case Op::lte_f32: write(o, V{id}, "= lte_f32", V{x}, V{y}); break;
|
||||
case Op:: gt_f32: write(o, V{id}, "= gt_f32", V{x}, V{y}); break;
|
||||
case Op::gte_f32: write(o, V{id}, "= gte_f32", V{x}, V{y}); break;
|
||||
|
||||
|
||||
case Op::add_i32: write(o, V{id}, "= add_i32", V{x}, V{y}); break;
|
||||
case Op::sub_i32: write(o, V{id}, "= sub_i32", V{x}, V{y}); break;
|
||||
case Op::mul_i32: write(o, V{id}, "= mul_i32", V{x}, V{y}); break;
|
||||
|
||||
case Op::shl_i32: write(o, V{id}, "= shl_i32", V{x}, Shift{imm}); break;
|
||||
case Op::shr_i32: write(o, V{id}, "= shr_i32", V{x}, Shift{imm}); break;
|
||||
case Op::sra_i32: write(o, V{id}, "= sra_i32", V{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i32: write(o, V{id}, "= eq_i32", V{x}, V{y}); break;
|
||||
case Op::neq_i32: write(o, V{id}, "= neq_i32", V{x}, V{y}); break;
|
||||
case Op:: lt_i32: write(o, V{id}, "= lt_i32", V{x}, V{y}); break;
|
||||
case Op::lte_i32: write(o, V{id}, "= lte_i32", V{x}, V{y}); break;
|
||||
case Op:: gt_i32: write(o, V{id}, "= gt_i32", V{x}, V{y}); break;
|
||||
case Op::gte_i32: write(o, V{id}, "= gte_i32", V{x}, V{y}); break;
|
||||
|
||||
case Op::add_i16x2: write(o, V{id}, "= add_i16x2", V{x}, V{y}); break;
|
||||
case Op::sub_i16x2: write(o, V{id}, "= sub_i16x2", V{x}, V{y}); break;
|
||||
case Op::mul_i16x2: write(o, V{id}, "= mul_i16x2", V{x}, V{y}); break;
|
||||
|
||||
case Op::shl_i16x2: write(o, V{id}, "= shl_i16x2", V{x}, Shift{imm}); break;
|
||||
case Op::shr_i16x2: write(o, V{id}, "= shr_i16x2", V{x}, Shift{imm}); break;
|
||||
case Op::sra_i16x2: write(o, V{id}, "= sra_i16x2", V{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i16x2: write(o, V{id}, "= eq_i16x2", V{x}, V{y}); break;
|
||||
case Op::neq_i16x2: write(o, V{id}, "= neq_i16x2", V{x}, V{y}); break;
|
||||
case Op:: lt_i16x2: write(o, V{id}, "= lt_i16x2", V{x}, V{y}); break;
|
||||
case Op::lte_i16x2: write(o, V{id}, "= lte_i16x2", V{x}, V{y}); break;
|
||||
case Op:: gt_i16x2: write(o, V{id}, "= gt_i16x2", V{x}, V{y}); break;
|
||||
case Op::gte_i16x2: write(o, V{id}, "= gte_i16x2", V{x}, V{y}); break;
|
||||
|
||||
case Op::bit_and : write(o, V{id}, "= bit_and" , V{x}, V{y} ); break;
|
||||
case Op::bit_or : write(o, V{id}, "= bit_or" , V{x}, V{y} ); break;
|
||||
case Op::bit_xor : write(o, V{id}, "= bit_xor" , V{x}, V{y} ); break;
|
||||
case Op::bit_clear: write(o, V{id}, "= bit_clear", V{x}, V{y} ); break;
|
||||
case Op::select : write(o, V{id}, "= select" , V{x}, V{y}, V{z}); break;
|
||||
|
||||
case Op::bytes: write(o, V{id}, "= bytes", V{x}, Hex{imm}); break;
|
||||
case Op::extract: write(o, V{id}, "= extract", V{x}, Shift{imm}, V{y}); break;
|
||||
case Op::pack: write(o, V{id}, "= pack", V{x}, V{y}, Shift{imm}); break;
|
||||
|
||||
case Op::to_f32: write(o, V{id}, "= to_f32", V{x}); break;
|
||||
case Op::to_i32: write(o, V{id}, "= to_i32", V{x}); break;
|
||||
}
|
||||
|
||||
write(o, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_program(const Program& program, SkWStream* o) {
|
||||
const std::vector<Program::Instruction> instructions = program.instructions();
|
||||
const int nregs = program.nregs();
|
||||
const int loop = program.loop();
|
||||
|
||||
o->writeDecAsText(nregs);
|
||||
o->writeText(" registers, ");
|
||||
o->writeDecAsText(instructions.size());
|
||||
o->writeText(" instructions:\n");
|
||||
for (int i = 0; i < (int)instructions.size(); i++) {
|
||||
if (i == loop) {
|
||||
write(o, "loop:\n");
|
||||
}
|
||||
const Program::Instruction& inst = instructions[i];
|
||||
Op op = inst.op;
|
||||
Reg d = inst.d,
|
||||
x = inst.x,
|
||||
y = inst.y,
|
||||
z = inst.z;
|
||||
int imm = inst.imm;
|
||||
switch (op) {
|
||||
case Op::store8: write(o, "store8" , Arg{imm}, R{x}); break;
|
||||
case Op::store16: write(o, "store16", Arg{imm}, R{x}); break;
|
||||
case Op::store32: write(o, "store32", Arg{imm}, R{x}); break;
|
||||
|
||||
case Op::load8: write(o, R{d}, "= load8" , Arg{imm}); break;
|
||||
case Op::load16: write(o, R{d}, "= load16", Arg{imm}); break;
|
||||
case Op::load32: write(o, R{d}, "= load32", Arg{imm}); break;
|
||||
|
||||
case Op::gather8: write(o, R{d}, "= gather8" , Arg{imm}, R{x}); break;
|
||||
case Op::gather16: write(o, R{d}, "= gather16", Arg{imm}, R{x}); break;
|
||||
case Op::gather32: write(o, R{d}, "= gather32", Arg{imm}, R{x}); break;
|
||||
|
||||
case Op::uniform8: write(o, R{d}, "= uniform8" , Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform16: write(o, R{d}, "= uniform16", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
case Op::uniform32: write(o, R{d}, "= uniform32", Arg{imm & 0xffff}, Hex{imm>>16}); break;
|
||||
|
||||
case Op::splat: write(o, R{d}, "= splat", Splat{imm}); break;
|
||||
|
||||
|
||||
case Op::add_f32: write(o, R{d}, "= add_f32", R{x}, R{y} ); break;
|
||||
case Op::sub_f32: write(o, R{d}, "= sub_f32", R{x}, R{y} ); break;
|
||||
case Op::mul_f32: write(o, R{d}, "= mul_f32", R{x}, R{y} ); break;
|
||||
case Op::div_f32: write(o, R{d}, "= div_f32", R{x}, R{y} ); break;
|
||||
case Op::mad_f32: write(o, R{d}, "= mad_f32", R{x}, R{y}, R{z}); break;
|
||||
|
||||
case Op:: eq_f32: write(o, R{d}, "= eq_f32", R{x}, R{y}); break;
|
||||
case Op::neq_f32: write(o, R{d}, "= neq_f32", R{x}, R{y}); break;
|
||||
case Op:: lt_f32: write(o, R{d}, "= lt_f32", R{x}, R{y}); break;
|
||||
case Op::lte_f32: write(o, R{d}, "= lte_f32", R{x}, R{y}); break;
|
||||
case Op:: gt_f32: write(o, R{d}, "= gt_f32", R{x}, R{y}); break;
|
||||
case Op::gte_f32: write(o, R{d}, "= gte_f32", R{x}, R{y}); break;
|
||||
|
||||
|
||||
case Op::add_i32: write(o, R{d}, "= add_i32", R{x}, R{y}); break;
|
||||
case Op::sub_i32: write(o, R{d}, "= sub_i32", R{x}, R{y}); break;
|
||||
case Op::mul_i32: write(o, R{d}, "= mul_i32", R{x}, R{y}); break;
|
||||
|
||||
case Op::shl_i32: write(o, R{d}, "= shl_i32", R{x}, Shift{imm}); break;
|
||||
case Op::shr_i32: write(o, R{d}, "= shr_i32", R{x}, Shift{imm}); break;
|
||||
case Op::sra_i32: write(o, R{d}, "= sra_i32", R{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i32: write(o, R{d}, "= eq_i32", R{x}, R{y}); break;
|
||||
case Op::neq_i32: write(o, R{d}, "= neq_i32", R{x}, R{y}); break;
|
||||
case Op:: lt_i32: write(o, R{d}, "= lt_i32", R{x}, R{y}); break;
|
||||
case Op::lte_i32: write(o, R{d}, "= lte_i32", R{x}, R{y}); break;
|
||||
case Op:: gt_i32: write(o, R{d}, "= gt_i32", R{x}, R{y}); break;
|
||||
case Op::gte_i32: write(o, R{d}, "= gte_i32", R{x}, R{y}); break;
|
||||
|
||||
|
||||
case Op::add_i16x2: write(o, R{d}, "= add_i16x2", R{x}, R{y}); break;
|
||||
case Op::sub_i16x2: write(o, R{d}, "= sub_i16x2", R{x}, R{y}); break;
|
||||
case Op::mul_i16x2: write(o, R{d}, "= mul_i16x2", R{x}, R{y}); break;
|
||||
|
||||
case Op::shl_i16x2: write(o, R{d}, "= shl_i16x2", R{x}, Shift{imm}); break;
|
||||
case Op::shr_i16x2: write(o, R{d}, "= shr_i16x2", R{x}, Shift{imm}); break;
|
||||
case Op::sra_i16x2: write(o, R{d}, "= sra_i16x2", R{x}, Shift{imm}); break;
|
||||
|
||||
case Op:: eq_i16x2: write(o, R{d}, "= eq_i16x2", R{x}, R{y}); break;
|
||||
case Op::neq_i16x2: write(o, R{d}, "= neq_i16x2", R{x}, R{y}); break;
|
||||
case Op:: lt_i16x2: write(o, R{d}, "= lt_i16x2", R{x}, R{y}); break;
|
||||
case Op::lte_i16x2: write(o, R{d}, "= lte_i16x2", R{x}, R{y}); break;
|
||||
case Op:: gt_i16x2: write(o, R{d}, "= gt_i16x2", R{x}, R{y}); break;
|
||||
case Op::gte_i16x2: write(o, R{d}, "= gte_i16x2", R{x}, R{y}); break;
|
||||
|
||||
|
||||
case Op::bit_and : write(o, R{d}, "= bit_and" , R{x}, R{y} ); break;
|
||||
case Op::bit_or : write(o, R{d}, "= bit_or" , R{x}, R{y} ); break;
|
||||
case Op::bit_xor : write(o, R{d}, "= bit_xor" , R{x}, R{y} ); break;
|
||||
case Op::bit_clear: write(o, R{d}, "= bit_clear", R{x}, R{y} ); break;
|
||||
case Op::select : write(o, R{d}, "= select" , R{x}, R{y}, R{z}); break;
|
||||
|
||||
case Op::bytes: write(o, R{d}, "= bytes", R{x}, Hex{imm}); break;
|
||||
case Op::extract: write(o, R{d}, "= extract", R{x}, Shift{imm}, R{y}); break;
|
||||
case Op::pack: write(o, R{d}, "= pack", R{x}, R{y}, Shift{imm}); break;
|
||||
|
||||
case Op::to_f32: write(o, R{d}, "= to_f32", R{x}); break;
|
||||
case Op::to_i32: write(o, R{d}, "= to_i32", R{x}); break;
|
||||
}
|
||||
write(o, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void dump(Builder& builder, SkWStream* o) {
|
||||
skvm::Program program = builder.done();
|
||||
dump_builder(builder, o);
|
||||
o->writeText("\n");
|
||||
dump_program(program, o);
|
||||
o->writeText("\n");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
static void dump(skvm::Builder& builder, SkWStream* o) {
|
||||
skvm::Program program = builder.done();
|
||||
builder.dump(o);
|
||||
o->writeText("\n");
|
||||
program.dump(o);
|
||||
o->writeText("\n");
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
|
||||
|
@ -1,105 +0,0 @@
|
||||
/*
|
||||
* Copyright 2019 Google LLC
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "src/core/SkCpu.h"
|
||||
#include "src/core/SkVM.h"
|
||||
#include "tools/SkVMBuilders.h"
|
||||
#include <chrono>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
void sk_abort_no_print() {
|
||||
abort();
|
||||
}
|
||||
|
||||
void SkDebugf(const char* fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vfprintf(stderr, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static skvm::Program plus_one() {
|
||||
skvm::Builder b;
|
||||
|
||||
skvm::Arg ptr = b.varying<int>();
|
||||
skvm::I32 v = b.load32(ptr);
|
||||
b.store32(ptr, b.add(v, b.splat(1)));
|
||||
|
||||
return b.done("plus_one");
|
||||
}
|
||||
|
||||
static skvm::Program square() {
|
||||
skvm::Builder b;
|
||||
|
||||
skvm::Arg ptr = b.varying<int>();
|
||||
skvm::I32 v = b.load32(ptr);
|
||||
b.store32(ptr, b.mul(v,v));
|
||||
|
||||
return b.done("square");
|
||||
}
|
||||
|
||||
static void print(double val, const char* units) {
|
||||
const char* scales[] = { "", "K", "M", "G", "T" };
|
||||
const char** scale = scales;
|
||||
|
||||
while (val > 10000.0) {
|
||||
val *= 1/1000.0;
|
||||
scale++;
|
||||
}
|
||||
|
||||
printf("%4d %s%s", (int)val, *scale, units);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
static double measure(Fn&& fn) {
|
||||
using clock = std::chrono::steady_clock;
|
||||
|
||||
int loops = 0;
|
||||
auto start = clock::now();
|
||||
std::chrono::duration<double> elapsed;
|
||||
do {
|
||||
fn();
|
||||
loops++;
|
||||
elapsed = clock::now() - start;
|
||||
} while (elapsed < std::chrono::milliseconds(100));
|
||||
|
||||
return loops / elapsed.count();
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
static void time(const char* name, const skvm::Program& program, Args... args) {
|
||||
printf("%20s", name);
|
||||
|
||||
for (int N : { 15, 255, 4095 }) {
|
||||
double loops_per_sec = measure([&]{
|
||||
program.eval(N, args...);
|
||||
});
|
||||
|
||||
printf("\t");
|
||||
print(N*loops_per_sec, "px/s");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
#if defined(__x86_64__)
|
||||
SkCpu::CacheRuntimeFeatures();
|
||||
#endif
|
||||
|
||||
int src[4096],
|
||||
dst[4096];
|
||||
time("plus_one", plus_one(), dst);
|
||||
time( "square", square(), dst);
|
||||
|
||||
time("srcover_f32" , SrcoverBuilder_F32 ().done("srcover_f32" ), src, dst);
|
||||
time("srcover_i32" , SrcoverBuilder_I32 ().done("srcover_i32" ), src, dst);
|
||||
time("srcover_i32_naive", SrcoverBuilder_I32_Naive().done("srcover_i32_naive"), src, dst);
|
||||
time("srcover_i32_SWAR" , SrcoverBuilder_I32_SWAR ().done("srcover_i32_SWAR" ), src, dst);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user