convert to phi nodes
Convert our n+args stack homes to phi nodes, essentially performing mem2reg ourselves, eliminating the need for it at runtime. Also, use b.getInt64(k) to create integer constants. Also, print verifyModule() errors to stdout (instead of nowhere). Also, update unit test to make sure we don't run off the end. Bitcode still looks good: define void @skvm-jit-211960346(i64, i8*) { enter: br label %testK testK: ; preds = %loopK, %enter %2 = phi i64 [ %0, %enter ], [ %6, %loopK ] %3 = phi i8* [ %1, %enter ], [ %7, %loopK ] %4 = icmp uge i64 %2, 16 br i1 %4, label %loopK, label %test1 loopK: ; preds = %testK %5 = bitcast i8* %3 to <16 x i32>* store <16 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, <16 x i32>* %5, align 1 %6 = sub i64 %2, 16 %7 = getelementptr i8, i8* %3, i64 64 br label %testK test1: ; preds = %loop1, %testK %8 = phi i64 [ %2, %testK ], [ %12, %loop1 ] %9 = phi i8* [ %3, %testK ], [ %13, %loop1 ] %10 = icmp uge i64 %8, 1 br i1 %10, label %loop1, label %leave loop1: ; preds = %test1 %11 = bitcast i8* %9 to i32* store i32 42, i32* %11, align 1 %12 = sub i64 %8, 1 %13 = getelementptr i8, i8* %9, i64 4 br label %test1 leave: ; preds = %test1 ret void } and the final assembly looks the same: 0x10a3f5000: movabsq $0x10a3f6000, %rax ; imm = 0x10A3F6000 0x10a3f500a: vbroadcastss (%rax), %zmm0 0x10a3f5010: cmpq $0xf, %rdi 0x10a3f5014: jbe 0x10a3f504d 0x10a3f5016: nopw %cs:(%rax,%rax) 0x10a3f5020: vmovups %zmm0, (%rsi) 0x10a3f5026: addq $-0x10, %rdi 0x10a3f502a: addq $0x40, %rsi 0x10a3f502e: cmpq $0xf, %rdi 0x10a3f5032: ja 0x10a3f5020 0x10a3f5034: jmp 0x10a3f504d 0x10a3f5036: nopw %cs:(%rax,%rax) 0x10a3f5040: movl $0x2a, (%rsi) 0x10a3f5046: decq %rdi 0x10a3f5049: addq $0x4, %rsi 0x10a3f504d: testq %rdi, %rdi 0x10a3f5050: jne 0x10a3f5040 0x10a3f5052: vzeroupper 0x10a3f5055: retq Change-Id: I12d11c7d5786c4c3df28a49bb3044be10f0770e0 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/273753 Reviewed-by: Mike Klein <mtklein@google.com> Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
fb3f302bdf
commit
7b3999edcb
@ -20,11 +20,8 @@
|
||||
#include <llvm/Bitcode/BitcodeWriter.h>
|
||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/LegacyPassManager.h>
|
||||
#include <llvm/IR/Verifier.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
|
||||
#include <llvm/Transforms/Utils.h>
|
||||
#endif
|
||||
|
||||
bool gSkVMJITViaDylib{false};
|
||||
@ -1941,8 +1938,9 @@ namespace skvm {
|
||||
|
||||
using IRBuilder = llvm::IRBuilder<>;
|
||||
|
||||
llvm::Value* n;
|
||||
std::vector<llvm::Value*> args;
|
||||
// `n` won't be used in emit, but `args` will be and they're clearest kept together.
|
||||
llvm::PHINode* n;
|
||||
std::vector<llvm::PHINode*> args;
|
||||
std::vector<llvm::Value*> vals(instructions.size());
|
||||
|
||||
auto emit = [&](size_t i, bool scalar, IRBuilder* b) {
|
||||
@ -1957,7 +1955,7 @@ namespace skvm {
|
||||
if (scalar) {
|
||||
v = b->CreateExtractElement(v, (uint64_t)0);
|
||||
}
|
||||
llvm::Value* ptr = b->CreateBitCast(b->CreateLoad(args[immy]),
|
||||
llvm::Value* ptr = b->CreateBitCast(args[immy],
|
||||
v->getType()->getPointerTo());
|
||||
vals[i] = b->CreateAlignedStore(v, ptr, 1);
|
||||
} break;
|
||||
@ -1972,28 +1970,28 @@ namespace skvm {
|
||||
return true;
|
||||
};
|
||||
|
||||
// enter: set up stack homes `n` and `args` for loop counter and uniform/varying pointers.
|
||||
// TODO: manual PHI nodes for these instead of relying on load/store and mem2reg
|
||||
// We can't jump to the first basic block or this would be testK directly.
|
||||
{
|
||||
IRBuilder b(enter);
|
||||
|
||||
llvm::Argument* arg = fn->arg_begin();
|
||||
|
||||
n = b.CreateAlloca(arg->getType());
|
||||
b.CreateStore(arg++, n);
|
||||
|
||||
for (size_t i = 0; i < fStrides.size(); i++) {
|
||||
args.push_back(b.CreateAlloca(arg->getType()));
|
||||
b.CreateStore(arg++, args.back());
|
||||
}
|
||||
b.CreateBr(testK);
|
||||
}
|
||||
|
||||
// testK: if (N >= K) goto loopK; else goto test1;
|
||||
llvm::ConstantInt* i64_K = llvm::ConstantInt::get(i64, K);
|
||||
{
|
||||
IRBuilder b(testK);
|
||||
b.CreateCondBr(b.CreateICmpUGE(b.CreateLoad(n), i64_K), loopK, test1);
|
||||
|
||||
// Set up phi nodes for `n` and each pointer argument from enter; later we'll add loopK.
|
||||
llvm::Argument* arg = fn->arg_begin();
|
||||
|
||||
n = b.CreatePHI(arg->getType(), 2);
|
||||
n->addIncoming(arg++, enter);
|
||||
|
||||
for (size_t i = 0; i < fStrides.size(); i++) {
|
||||
args.push_back(b.CreatePHI(arg->getType(), 2));
|
||||
args.back()->addIncoming(arg++, enter);
|
||||
}
|
||||
|
||||
b.CreateCondBr(b.CreateICmpUGE(n, b.getInt64(K)), loopK, test1);
|
||||
}
|
||||
|
||||
// loopK: ... insts on K x T vectors; N -= K, args += K*stride; goto testK;
|
||||
@ -2004,19 +2002,36 @@ namespace skvm {
|
||||
return;
|
||||
}
|
||||
}
|
||||
b.CreateStore(b.CreateSub(b.CreateLoad(n), i64_K), n);
|
||||
|
||||
// n -= K
|
||||
llvm::Value* n_next = b.CreateSub(n, b.getInt64(K));
|
||||
n->addIncoming(n_next, loopK);
|
||||
|
||||
// Each arg ptr += K
|
||||
for (size_t i = 0; i < fStrides.size(); i++) {
|
||||
b.CreateStore(b.CreateGEP(b.CreateLoad(args[i]),
|
||||
llvm::ConstantInt::get(i64, K * fStrides[i])), args[i]);
|
||||
llvm::Value* arg_next = b.CreateGEP(args[i], b.getInt64(K*fStrides[i]));
|
||||
args[i]->addIncoming(arg_next, loopK);
|
||||
}
|
||||
b.CreateBr(testK);
|
||||
}
|
||||
|
||||
// test1: if (N >= 1) goto loop1; else goto leave;
|
||||
llvm::ConstantInt* i64_1 = llvm::ConstantInt::get(i64, 1);
|
||||
{
|
||||
IRBuilder b(test1);
|
||||
b.CreateCondBr(b.CreateICmpUGE(b.CreateLoad(n), i64_1), loop1, leave);
|
||||
|
||||
// Set up new phi nodes for `n` and each pointer argument, now from testK and loop1.
|
||||
|
||||
llvm::PHINode* n_new = b.CreatePHI(n->getType(), 2);
|
||||
n_new->addIncoming(n, testK);
|
||||
n = n_new;
|
||||
|
||||
for (size_t i = 0; i < fStrides.size(); i++) {
|
||||
llvm::PHINode* arg_new = b.CreatePHI(args[i]->getType(), 2);
|
||||
arg_new->addIncoming(args[i], testK);
|
||||
args[i] = arg_new;
|
||||
}
|
||||
|
||||
b.CreateCondBr(b.CreateICmpUGE(n, b.getInt64(1)), loop1, leave);
|
||||
}
|
||||
|
||||
// loop1: ... insts on scalars; N -= 1, args += stride; goto test1;
|
||||
@ -2027,10 +2042,15 @@ namespace skvm {
|
||||
return;
|
||||
}
|
||||
}
|
||||
b.CreateStore(b.CreateSub(b.CreateLoad(n), i64_1), n);
|
||||
|
||||
// n -= 1
|
||||
llvm::Value* n_next = b.CreateSub(n, b.getInt64(1));
|
||||
n->addIncoming(n_next, loop1);
|
||||
|
||||
// Each arg ptr += K
|
||||
for (size_t i = 0; i < fStrides.size(); i++) {
|
||||
b.CreateStore(b.CreateGEP(b.CreateLoad(args[i]),
|
||||
llvm::ConstantInt::get(i64, fStrides[i])), args[i]);
|
||||
llvm::Value* arg_next = b.CreateGEP(args[i], b.getInt64(fStrides[i]));
|
||||
args[i]->addIncoming(arg_next, loop1);
|
||||
}
|
||||
b.CreateBr(test1);
|
||||
}
|
||||
@ -2041,21 +2061,7 @@ namespace skvm {
|
||||
b.CreateRetVoid();
|
||||
}
|
||||
|
||||
SkASSERT(false == llvm::verifyModule(*mod));
|
||||
|
||||
llvm::legacy::FunctionPassManager fpm(mod.get());
|
||||
#if 0
|
||||
llvm::PassManagerBuilder pmb;
|
||||
pmb. OptLevel = 1;
|
||||
pmb.SizeLevel = 1;
|
||||
// TargetMachine::adjustPassManager(pmb)
|
||||
pmb.populateFunctionPassManager(fpm);
|
||||
while (fpm.run(*fn));
|
||||
#else
|
||||
fpm.add(llvm::createPromoteMemoryToRegisterPass());
|
||||
fpm.run(*fn);
|
||||
SkASSERT(!fpm.run(*fn));
|
||||
#endif
|
||||
SkASSERT(false == llvm::verifyModule(*mod, &llvm::outs()));
|
||||
|
||||
if (false) {
|
||||
SkString path = SkStringPrintf("/tmp/%s.bc", debug_name);
|
||||
|
@ -288,11 +288,14 @@ DEF_TEST(SkVM_LLVM, r) {
|
||||
skvm::Program p = b.done();
|
||||
REPORTER_ASSERT(r, p.hasJIT());
|
||||
|
||||
int buf[17];
|
||||
p.eval(SK_ARRAY_COUNT(buf), buf);
|
||||
for (int v : buf) {
|
||||
REPORTER_ASSERT(r, v == 42);
|
||||
int buf[18];
|
||||
buf[17] = 47;
|
||||
|
||||
p.eval(17, buf);
|
||||
for (int i = 0; i < 17; i++) {
|
||||
REPORTER_ASSERT(r, buf[i] == 42);
|
||||
}
|
||||
REPORTER_ASSERT(r, buf[17] == 47);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user