pin down arg() stride (a.k.a. type) info sooner
Arg strides are the reason JIT happens lazily in Program::eval() today instead of proactively in Builder::done() or Program's constructor. It also just really doesn't make sense to delay this information... it's not like you can change it up sanely between calls to eval(). The argument index now comes implicitly from the order of calling arg(). This may seem logically independent, but it prevents a weird situation where you could use the same argument index twice with different strides... not sure what that would mean. Change-Id: I0f5d46e94a1ca112a72675c5492f17c0dd825ce0 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/227390 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
9d8cac831d
commit
2616efda08
@ -23,6 +23,7 @@ namespace skvm {
|
||||
fInstructions = std::move(other.fInstructions);
|
||||
fRegs = other.fRegs;
|
||||
fLoop = other.fLoop;
|
||||
fStrides = std::move(other.fStrides);
|
||||
// Don't bother trying to move other.fJIT*. We can just regenerate it.
|
||||
}
|
||||
|
||||
@ -30,6 +31,7 @@ namespace skvm {
|
||||
fInstructions = std::move(other.fInstructions);
|
||||
fRegs = other.fRegs;
|
||||
fLoop = other.fLoop;
|
||||
fStrides = std::move(other.fStrides);
|
||||
// Don't bother trying to move other.fJIT*. We can just regenerate it,
|
||||
// but we do need to invalidate anything we have cached ourselves.
|
||||
fJITLock.acquire();
|
||||
@ -38,10 +40,14 @@ namespace skvm {
|
||||
return *this;
|
||||
}
|
||||
|
||||
Program::Program(std::vector<Instruction> instructions, int regs, int loop)
|
||||
Program::Program(std::vector<Instruction> instructions,
|
||||
int regs,
|
||||
int loop,
|
||||
std::vector<int> strides)
|
||||
: fInstructions(std::move(instructions))
|
||||
, fRegs(regs)
|
||||
, fLoop(loop) {}
|
||||
, fLoop(loop)
|
||||
, fStrides(std::move(strides)) {}
|
||||
|
||||
|
||||
Program Builder::done() const {
|
||||
@ -182,7 +188,7 @@ namespace skvm {
|
||||
push_instruction(id, inst);
|
||||
}
|
||||
|
||||
return { std::move(program), /*register count = */next_reg, loop };
|
||||
return { std::move(program), /*register count = */next_reg, loop, fStrides };
|
||||
}
|
||||
|
||||
static bool operator==(const Builder::Instruction& a, const Builder::Instruction& b) {
|
||||
@ -215,7 +221,11 @@ namespace skvm {
|
||||
&& fProgram[id].imm == 0;
|
||||
}
|
||||
|
||||
Arg Builder::arg(int ix) { return {ix}; }
|
||||
Arg Builder::arg(int stride) {
|
||||
int ix = (int)fStrides.size();
|
||||
fStrides.push_back(stride);
|
||||
return {ix};
|
||||
}
|
||||
|
||||
void Builder::store8 (Arg ptr, I32 val) { (void)this->push(Op::store8 , val.id,NA,NA, ptr.ix); }
|
||||
void Builder::store32(Arg ptr, I32 val) { (void)this->push(Op::store32, val.id,NA,NA, ptr.ix); }
|
||||
@ -869,7 +879,7 @@ namespace skvm {
|
||||
#if defined(__x86_64__)
|
||||
static void jit(Assembler& a, size_t* code,
|
||||
const std::vector<Program::Instruction>& instructions,
|
||||
int regs, int loop, size_t strides[], int nargs) {
|
||||
int regs, int loop, const int strides[], int nargs) {
|
||||
using A = Assembler;
|
||||
|
||||
SkASSERT(can_jit(regs,nargs));
|
||||
@ -1074,7 +1084,7 @@ namespace skvm {
|
||||
emit(instructions[i], /*scalar=*/false);
|
||||
}
|
||||
for (int i = 0; i < nargs; i++) {
|
||||
a.add(arg[i], K*(int)strides[i]);
|
||||
a.add(arg[i], K*strides[i]);
|
||||
}
|
||||
a.sub(N, K);
|
||||
a.jmp(&body);
|
||||
@ -1087,7 +1097,7 @@ namespace skvm {
|
||||
emit(instructions[i], /*scalar=*/true);
|
||||
}
|
||||
for (int i = 0; i < nargs; i++) {
|
||||
a.add(arg[i], 1*(int)strides[i]);
|
||||
a.add(arg[i], 1*strides[i]);
|
||||
}
|
||||
a.sub(N, 1);
|
||||
a.jmp(&tail);
|
||||
@ -1100,7 +1110,7 @@ namespace skvm {
|
||||
#elif defined(__aarch64__)
|
||||
static void jit(Assembler& a, size_t* code,
|
||||
const std::vector<Program::Instruction>& instructions,
|
||||
int regs, int loop, size_t strides[], int nargs) {
|
||||
int regs, int loop, const int strides[], int nargs) {
|
||||
using A = Assembler;
|
||||
SkASSERT(can_jit(regs,nargs));
|
||||
|
||||
@ -1272,7 +1282,7 @@ namespace skvm {
|
||||
emit(instructions[i], /*scalar=*/false);
|
||||
}
|
||||
for (int i = 0; i < nargs; i++) {
|
||||
a.add(arg[i], arg[i], K*(int)strides[i]);
|
||||
a.add(arg[i], arg[i], K*strides[i]);
|
||||
}
|
||||
a.sub(N, N, K);
|
||||
a.b(&body);
|
||||
@ -1284,7 +1294,7 @@ namespace skvm {
|
||||
emit(instructions[i], /*scalar=*/true);
|
||||
}
|
||||
for (int i = 0; i < nargs; i++) {
|
||||
a.add(arg[i], arg[i], 1*(int)strides[i]);
|
||||
a.add(arg[i], arg[i], 1*strides[i]);
|
||||
}
|
||||
a.sub(N, N, 1);
|
||||
a.b(&tail);
|
||||
@ -1303,8 +1313,9 @@ namespace skvm {
|
||||
Program::JIT::~JIT() { SkASSERT(buf == nullptr); }
|
||||
#endif // defined(SKVM_JIT)
|
||||
|
||||
void Program::eval(int n, void* args[], size_t strides[], int nargs) const {
|
||||
void Program::eval(int n, void* args[]) const {
|
||||
void (*entry)() = nullptr;
|
||||
int nargs = (int)fStrides.size();
|
||||
|
||||
#if defined(SKVM_JIT)
|
||||
// If we can't grab this lock, another thread is probably assembling the program.
|
||||
@ -1317,7 +1328,7 @@ namespace skvm {
|
||||
// First assemble without any buffer to see how much memory we need to mmap.
|
||||
size_t code;
|
||||
Assembler a{nullptr};
|
||||
jit(a, &code, fInstructions, fRegs, fLoop, strides, nargs);
|
||||
jit(a, &code, fInstructions, fRegs, fLoop, fStrides.data(), nargs);
|
||||
|
||||
// mprotect() can only change at a page level granularity, so round a.size() up.
|
||||
size_t page = sysconf(_SC_PAGESIZE), // Probably 4096.
|
||||
@ -1327,7 +1338,7 @@ namespace skvm {
|
||||
mmap(nullptr, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1,0);
|
||||
|
||||
a = Assembler{buf};
|
||||
jit(a,&code, fInstructions, fRegs, fLoop, strides, nargs);
|
||||
jit(a, &code, fInstructions, fRegs, fLoop, fStrides.data(), nargs);
|
||||
|
||||
mprotect(buf,size, PROT_READ|PROT_EXEC);
|
||||
#if defined(__aarch64__)
|
||||
@ -1524,8 +1535,8 @@ namespace skvm {
|
||||
// Looping by marching pointers until *arg == nullptr helps the
|
||||
// compiler to keep this loop scalar. Otherwise it'd create a
|
||||
// rather large and useless autovectorized version.
|
||||
void** arg = args;
|
||||
const size_t* stride = strides;
|
||||
void** arg = args;
|
||||
const int* stride = fStrides.data();
|
||||
for (; *arg; arg++, stride++) {
|
||||
*arg = (void*)( (char*)*arg + times * *stride );
|
||||
}
|
||||
@ -1627,5 +1638,3 @@ namespace skvm {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: argument strides (more generally types) should come earlier, the pointers themselves later.
|
||||
|
@ -250,8 +250,8 @@ namespace skvm {
|
||||
union { Reg z; int imm; };
|
||||
};
|
||||
|
||||
Program(std::vector<Instruction>, int regs, int loop);
|
||||
Program() : Program({}, 0, 0) {}
|
||||
Program(std::vector<Instruction>, int regs, int loop, std::vector<int> strides);
|
||||
Program() : Program({}, 0, 0, {}) {}
|
||||
|
||||
~Program();
|
||||
Program(Program&&);
|
||||
@ -262,8 +262,7 @@ namespace skvm {
|
||||
template <typename... T>
|
||||
void eval(int n, T*... arg) const {
|
||||
void* args[] = { (void*)arg..., nullptr };
|
||||
size_t strides[] = { sizeof(*arg)... };
|
||||
this->eval(n, args, strides, (int)sizeof...(arg));
|
||||
this->eval(n, args);
|
||||
}
|
||||
|
||||
std::vector<Instruction> instructions() const { return fInstructions; }
|
||||
@ -279,11 +278,12 @@ namespace skvm {
|
||||
void (*entry)() = nullptr; // Entry point, offset into buf.
|
||||
};
|
||||
|
||||
void eval(int n, void* args[], size_t strides[], int nargs) const;
|
||||
void eval(int n, void* args[]) const;
|
||||
|
||||
std::vector<Instruction> fInstructions;
|
||||
int fRegs;
|
||||
int fLoop;
|
||||
std::vector<int> fStrides;
|
||||
mutable SkSpinlock fJITLock;
|
||||
mutable JIT fJIT;
|
||||
};
|
||||
@ -304,7 +304,12 @@ namespace skvm {
|
||||
|
||||
Program done() const;
|
||||
|
||||
Arg arg(int);
|
||||
// Declare a varying argument with given stride.
|
||||
Arg arg(int stride);
|
||||
|
||||
// Convenience arg() wrapper for most common stride, sizeof(T).
|
||||
template <typename T>
|
||||
Arg arg() { return this->arg(sizeof(T)); }
|
||||
|
||||
void store8 (Arg ptr, I32 val);
|
||||
void store32(Arg ptr, I32 val);
|
||||
@ -392,6 +397,7 @@ namespace skvm {
|
||||
|
||||
SkTHashMap<Instruction, Val, InstructionHash> fIndex;
|
||||
std::vector<Instruction> fProgram;
|
||||
std::vector<int> fStrides;
|
||||
};
|
||||
|
||||
// TODO: comparison operations, if_then_else
|
||||
|
@ -344,11 +344,14 @@ DEF_TEST(SkVM_LoopCounts, r) {
|
||||
buf[i] = i;
|
||||
}
|
||||
|
||||
|
||||
// buf[i] += 1
|
||||
skvm::Builder b;
|
||||
b.store32(b.arg(0),
|
||||
skvm::Arg arg = b.arg<int>();
|
||||
|
||||
b.store32(arg,
|
||||
b.add(b.splat(1),
|
||||
b.load32(b.arg(0))));
|
||||
b.load32(arg)));
|
||||
|
||||
skvm::Program program = b.done();
|
||||
program.eval(N, buf);
|
||||
|
@ -13,28 +13,28 @@
|
||||
// nesting calls to Builder routines.
|
||||
|
||||
SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
|
||||
skvm::Arg src = arg(0),
|
||||
dst = arg(1);
|
||||
|
||||
auto byte_to_f32 = [&](skvm::I32 byte) {
|
||||
skvm::F32 _1_255 = splat(1/255.0f);
|
||||
return mul(_1_255, to_f32(byte));
|
||||
};
|
||||
|
||||
auto load = [&](skvm::Arg ptr, Fmt fmt,
|
||||
skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
|
||||
auto load = [&](Fmt fmt, skvm::F32* r, skvm::F32* g, skvm::F32* b, skvm::F32* a) {
|
||||
skvm::Arg ptr;
|
||||
switch (fmt) {
|
||||
case Fmt::A8: {
|
||||
ptr = arg<uint8_t>();
|
||||
*r = *g = *b = splat(0.0f);
|
||||
*a = byte_to_f32(load8(ptr));
|
||||
} break;
|
||||
|
||||
case Fmt::G8: {
|
||||
ptr = arg<uint8_t>();
|
||||
*r = *g = *b = byte_to_f32(load8(ptr));
|
||||
*a = splat(1.0f);
|
||||
} break;
|
||||
|
||||
case Fmt::RGBA_8888: {
|
||||
ptr = arg<int>();
|
||||
skvm::I32 rgba = load32(ptr);
|
||||
*r = byte_to_f32(extract(rgba, 0, splat(0xff)));
|
||||
*g = byte_to_f32(extract(rgba, 8, splat(0xff)));
|
||||
@ -42,13 +42,14 @@ SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
|
||||
*a = byte_to_f32(extract(rgba, 24, splat(0xff)));
|
||||
} break;
|
||||
}
|
||||
return ptr;
|
||||
};
|
||||
|
||||
skvm::F32 r,g,b,a;
|
||||
load(src, srcFmt, &r,&g,&b,&a);
|
||||
(void)load(srcFmt, &r,&g,&b,&a);
|
||||
|
||||
skvm::F32 dr,dg,db,da;
|
||||
load(dst, dstFmt, &dr,&dg,&db,&da);
|
||||
skvm::Arg dst = load(dstFmt, &dr,&dg,&db,&da);
|
||||
|
||||
skvm::F32 invA = sub(splat(1.0f), a);
|
||||
r = mad(dr, invA, r);
|
||||
@ -91,8 +92,8 @@ SrcoverBuilder_F32::SrcoverBuilder_F32(Fmt srcFmt, Fmt dstFmt) {
|
||||
}
|
||||
|
||||
SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
|
||||
skvm::Arg src = arg(0),
|
||||
dst = arg(1);
|
||||
skvm::Arg src = arg<int>(),
|
||||
dst = arg<int>();
|
||||
|
||||
auto load = [&](skvm::Arg ptr,
|
||||
skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
|
||||
@ -128,8 +129,8 @@ SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
|
||||
}
|
||||
|
||||
SrcoverBuilder_I32::SrcoverBuilder_I32() {
|
||||
skvm::Arg src = arg(0),
|
||||
dst = arg(1);
|
||||
skvm::Arg src = arg<int>(),
|
||||
dst = arg<int>();
|
||||
|
||||
auto load = [&](skvm::Arg ptr,
|
||||
skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
|
||||
@ -172,8 +173,8 @@ SrcoverBuilder_I32::SrcoverBuilder_I32() {
|
||||
}
|
||||
|
||||
SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
|
||||
skvm::Arg src = arg(0),
|
||||
dst = arg(1);
|
||||
skvm::Arg src = arg<int>(),
|
||||
dst = arg<int>();
|
||||
|
||||
// The s += d*invA adds won't overflow,
|
||||
// so we don't have to unpack s beyond grabbing the alpha channel.
|
||||
|
Loading…
Reference in New Issue
Block a user