add(GP64 dst, int imm)

Change-Id: I6aaa1dc3ff3a9529f87ff58a2ae1fc5f59bc626b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/221653
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Mike Klein 2019-06-18 15:26:08 -05:00 committed by Skia Commit-Bot
parent 948045d1a5
commit d3e75a7a1c
3 changed files with 47 additions and 22 deletions

View File

@ -496,10 +496,10 @@ namespace skvm {
void Assembler::vzeroupper() { this->byte(0xc5, 0xf8, 0x77); }
void Assembler::ret() { this->byte(0xc3); }
void Assembler::sub(GP64 dst, int imm) {
int opcode = 0b0000'0000; // opcode 0 does all sorts of things... add, sub, xor, cmp
opcode |= 0b0000'0001; // low bit set for 64-bit operands
opcode |= 0b1000'0000; // top bit set for instructions with any immediate
// Common instruction building for 64-bit opcodes with an immediate argument.
void Assembler::op(int opcode, int opcode_ext, GP64 dst, int imm) {
opcode |= 0b0000'0001; // low bit set for 64-bit operands
opcode |= 0b1000'0000; // top bit set for instructions with any immediate
int imm_bytes = 4;
if (SkTFitsIn<int8_t>(imm)) {
@ -507,14 +507,15 @@ namespace skvm {
opcode |= 0b0000'0010; // second bit set for 8-bit immediate
}
int opcode_ext = 5; // subtract
this->byte(REX{1,dst >= r8,0,0}.bits);
this->byte(opcode);
this->byte(ModRM{ModRM::Direct/*don't understand yet*/, opcode_ext, dst}.bits);
this->byte(&imm, imm_bytes);
}
void Assembler::add(GP64 dst, int imm) { this->op(0,0, dst,imm); }
void Assembler::sub(GP64 dst, int imm) { this->op(0,5, dst,imm); }
static bool can_jit(int regs, int nargs) {
return true
&& SkCpu::Supports(SkCpu::HSW) // TODO: SSE4.1 target?
@ -526,6 +527,8 @@ namespace skvm {
static int jit(Assembler& a,
const std::vector<Program::Instruction>& instructions,
int regs, int loop, size_t strides[], int nargs) {
using A = Assembler;
SkASSERT(can_jit(regs,nargs));
Xbyak::CodeGenerator& X = *a.X;
@ -537,8 +540,13 @@ namespace skvm {
#else
// These registers are used to pass the first 6 arguments,
// so if we stick to these we need not push, pop, spill, or move anything around.
Assembler::GP64 N = Assembler::rdi;
Xbyak::Reg arg[] = { X.rsi, X.rdx, X.rcx, X.r8, X.r9 };
A::GP64 N = A::rdi,
arg[] = { A::rsi, A::rdx, A::rcx, A::r8, A::r9 };
auto xarg = [&](int ix) {
// arg[] is Assembler::GP64, but they should match Xbyak's Reg64 value-wise.
return Xbyak::Reg64{arg[ix]};
};
// All 16 ymm registers are available as scratch, keeping 15 as a temporary for us.
@ -578,20 +586,20 @@ namespace skvm {
case Op::store8:
if (SkCpu::Supports(SkCpu::SKX)) {
// One stop shop! Pack 8x I32 -> U8 and store to ptr.
X.vpmovusdb(X.ptr[arg[y.imm]], r[x]);
X.vpmovusdb(X.ptr[xarg(y.imm)], r[x]);
} else {
X.vpackusdw(tmp, r[x], r[x]); // pack 32-bit -> 16-bit
X.vpermq (tmp, tmp, 0xd8); // u64 tmp[0,1,2,3] = tmp[0,2,1,3]
X.vpackuswb(tmp, tmp, tmp); // pack 16-bit -> 8-bit
X.vmovq(X.ptr[arg[y.imm]], // store low 8 bytes
X.vmovq(X.ptr[xarg(y.imm)], // store low 8 bytes
Xbyak::Xmm{tmp.getIdx()}); // (arg must be an xmm)
}
break;
case Op::store32: X.vmovups(X.ptr[arg[y.imm]], r[x]); break;
case Op::store32: X.vmovups(X.ptr[xarg(y.imm)], r[x]); break;
case Op::load8: X.vpmovzxbd(r[d], X.ptr[arg[y.imm]]); break;
case Op::load32: X.vmovups (r[d], X.ptr[arg[y.imm]]); break;
case Op::load8: X.vpmovzxbd(r[d], X.ptr[xarg(y.imm)]); break;
case Op::load32: X.vmovups (r[d], X.ptr[xarg(y.imm)]); break;
case Op::splat: data4.push_back(Data4{Xbyak::Label(), y.imm});
X.vbroadcastss(r[d], X.ptr[X.rip + data4.back().label]);
@ -685,7 +693,7 @@ namespace skvm {
}
for (int i = 0; i < nargs; i++) {
X.add(arg[i], K*(int)strides[i]);
a.add(arg[i], K*(int)strides[i]);
}
a.sub(N, K);
X.jne("loop");

View File

@ -39,20 +39,23 @@ namespace skvm {
ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15,
};
void byte(const void*, int);
void byte(uint8_t);
template <typename... Rest> void byte(uint8_t, Rest...);
void nop();
void align(int mod);
void vzeroupper();
void ret();
void add(GP64, int imm);
void sub(GP64, int imm);
//private:
std::unique_ptr<Xbyak::CodeGenerator> X;
private:
void byte(const void*, int);
void byte(uint8_t);
template <typename... Rest> void byte(uint8_t, Rest...);
void op(int opcode, int opcode_ext, GP64 dst, int imm);
};
enum class Op : uint8_t {

View File

@ -218,14 +218,28 @@ DEF_TEST(SkVM_Assembler, r) {
});
test_asm(r, [&](Assembler& a) {
a.sub(Assembler::rax, 32); // Always good to test rax.
a.sub(Assembler::rdi, 8); // Last 0x48 REX
a.sub(Assembler::r8 , 4); // First 0x4c REX
a.sub(Assembler::r8 , 1000000); // Requires 4 byte immediate.
a.add(Assembler::rax, 8); // Always good to test rax.
a.sub(Assembler::rax, 32);
a.add(Assembler::rdi, 12); // Last 0x48 REX
a.sub(Assembler::rdi, 8);
a.add(Assembler::r8 , 7); // First 0x4c REX
a.sub(Assembler::r8 , 4);
a.add(Assembler::rsi, 128); // Requires 4 byte immediate.
a.sub(Assembler::r8 , 1000000);
},{
0x48, 0x83, 0b11'000'000, 0x08,
0x48, 0x83, 0b11'101'000, 0x20,
0x48, 0x83, 0b11'000'111, 0x0c,
0x48, 0x83, 0b11'101'111, 0x08,
0x4c, 0x83, 0b11'000'000, 0x07,
0x4c, 0x83, 0b11'101'000, 0x04,
0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
0x4c, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
});
}