JIT today's new _imm ops

- Add YmmOrLabel struct to represent the concept that many
  x86 instructions can take a final argument as either a
  register or memory address, and that they all handle them
  the same way.
- Convert existing overloads like vmulps() to use YmmOrLabel.
- upgrade some other instructions to take YmmOrLabel
- use them to implement today's new _imm ops

This feels like a good spot for implicit constructors, no?

Change-Id: I435028acc3fbfcc16f634cfccc98fe38bbce9d19
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/263207
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2020-01-08 15:49:47 -06:00 committed by Skia Commit-Bot
parent b45558dc33
commit 92ca3baba6
3 changed files with 55 additions and 36 deletions

View File

@ -1060,24 +1060,24 @@ namespace skvm {
this->byte(mod_rm(Mod::Direct, dst&7, y&7));
}
void Assembler::vpaddd (Ymm dst, Ymm x, Ymm y) { this->op(0x66, 0x0f,0xfe, dst,x,y); }
void Assembler::vpsubd (Ymm dst, Ymm x, Ymm y) { this->op(0x66, 0x0f,0xfa, dst,x,y); }
void Assembler::vpmulld(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x380f,0x40, dst,x,y); }
void Assembler::vpaddd (Ymm dst, Ymm x, YmmOrLabel y) { this->op(0x66, 0x0f,0xfe, dst,x,y); }
void Assembler::vpsubd (Ymm dst, Ymm x, YmmOrLabel y) { this->op(0x66, 0x0f,0xfa, dst,x,y); }
void Assembler::vpmulld(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x380f,0x40, dst,x,y); }
void Assembler::vpsubw (Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x0f,0xf9, dst,x,y); }
void Assembler::vpmullw(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x0f,0xd5, dst,x,y); }
void Assembler::vpand (Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x0f,0xdb, dst,x,y); }
void Assembler::vpor (Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x0f,0xeb, dst,x,y); }
void Assembler::vpxor (Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x0f,0xef, dst,x,y); }
void Assembler::vpandn(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x0f,0xdf, dst,x,y); }
void Assembler::vpand (Ymm dst, Ymm x, YmmOrLabel y) { this->op(0x66,0x0f,0xdb, dst,x,y); }
void Assembler::vpor (Ymm dst, Ymm x, YmmOrLabel y) { this->op(0x66,0x0f,0xeb, dst,x,y); }
void Assembler::vpxor (Ymm dst, Ymm x, YmmOrLabel y) { this->op(0x66,0x0f,0xef, dst,x,y); }
void Assembler::vpandn(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x0f,0xdf, dst,x,y); }
void Assembler::vaddps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x58, dst,x,y); }
void Assembler::vsubps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5c, dst,x,y); }
void Assembler::vmulps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x59, dst,x,y); }
void Assembler::vdivps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5e, dst,x,y); }
void Assembler::vminps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5d, dst,x,y); }
void Assembler::vmaxps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5f, dst,x,y); }
void Assembler::vaddps(Ymm dst, Ymm x, YmmOrLabel y) { this->op(0,0x0f,0x58, dst,x,y); }
void Assembler::vsubps(Ymm dst, Ymm x, YmmOrLabel y) { this->op(0,0x0f,0x5c, dst,x,y); }
void Assembler::vmulps(Ymm dst, Ymm x, YmmOrLabel y) { this->op(0,0x0f,0x59, dst,x,y); }
void Assembler::vdivps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5e, dst,x,y); }
void Assembler::vminps(Ymm dst, Ymm x, YmmOrLabel y) { this->op(0,0x0f,0x5d, dst,x,y); }
void Assembler::vmaxps(Ymm dst, Ymm x, YmmOrLabel y) { this->op(0,0x0f,0x5f, dst,x,y); }
void Assembler::vfmadd132ps(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x380f,0x98, dst,x,y); }
void Assembler::vfmadd213ps(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x380f,0xa8, dst,x,y); }
@ -1168,11 +1168,12 @@ namespace skvm {
this->word(this->disp32(l));
}
void Assembler::vpshufb(Ymm dst, Ymm x, Label* l) { this->op(0x66,0x380f,0x00, dst,x,l); }
void Assembler::vpaddd (Ymm dst, Ymm x, Label* l) { this->op(0x66, 0x0f,0xfe, dst,x,l); }
void Assembler::vpsubd (Ymm dst, Ymm x, Label* l) { this->op(0x66, 0x0f,0xfa, dst,x,l); }
void Assembler::vmulps (Ymm dst, Ymm x, Label* l) { this->op( 0, 0x0f,0x59, dst,x,l); }
void Assembler::op(int prefix, int map, int opcode, Ymm dst, Ymm x, YmmOrLabel y) {
y.label ? this->op(prefix,map,opcode,dst,x, y.label)
: this->op(prefix,map,opcode,dst,x, y.ymm );
}
void Assembler::vpshufb(Ymm dst, Ymm x, Label* l) { this->op(0x66,0x380f,0x00, dst,x,l); }
void Assembler::vptest(Ymm dst, Label* l) { this->op(0x66, 0x380f, 0x17, dst, (Ymm)0, l); }
void Assembler::vbroadcastss(Ymm dst, Label* l) { this->op(0x66,0x380f,0x18, dst, (Ymm)0, l); }
@ -2296,7 +2297,11 @@ namespace skvm {
a->vfmadd132ps(dst(),r[z], r[y]); }
break;
case Op::add_f32_imm: a->vaddps(dst(), r[x], &constants[immy].label); break;
case Op::sub_f32_imm: a->vsubps(dst(), r[x], &constants[immy].label); break;
case Op::mul_f32_imm: a->vmulps(dst(), r[x], &constants[immy].label); break;
case Op::min_f32_imm: a->vminps(dst(), r[x], &constants[immy].label); break;
case Op::max_f32_imm: a->vmaxps(dst(), r[x], &constants[immy].label); break;
case Op::add_i32: a->vpaddd (dst(), r[x], r[y]); break;
case Op::sub_i32: a->vpsubd (dst(), r[x], r[y]); break;
@ -2312,6 +2317,10 @@ namespace skvm {
case Op::bit_clear: a->vpandn(dst(), r[y], r[x]); break; // N.B. Y then X.
case Op::select : a->vpblendvb(dst(), r[z], r[y], r[x]); break;
case Op::bit_and_imm: a->vpand (dst(), r[x], &constants[immy].label); break;
case Op::bit_or_imm : a->vpor (dst(), r[x], &constants[immy].label); break;
case Op::bit_xor_imm: a->vpxor (dst(), r[x], &constants[immy].label); break;
case Op::shl_i32: a->vpslld(dst(), r[x], immy); break;
case Op::shr_i32: a->vpsrld(dst(), r[x], immy); break;
case Op::sra_i32: a->vpsrad(dst(), r[x], immy); break;

View File

@ -66,16 +66,35 @@ namespace skvm {
void add(GP64, int imm);
void sub(GP64, int imm);
struct Label {
int offset = 0;
enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet;
std::vector<int> references;
};
struct YmmOrLabel {
Ymm ymm = ymm0;
Label* label = nullptr;
/*implicit*/ YmmOrLabel(Ymm y) : ymm (y) { SkASSERT(!label); }
/*implicit*/ YmmOrLabel(Label* l) : label(l) { SkASSERT( label); }
};
// All dst = x op y.
using DstEqXOpY = void(Ymm dst, Ymm x, Ymm y);
DstEqXOpY vpand, vpor, vpxor, vpandn,
vpaddd, vpsubd, vpmulld,
vpsubw, vpmullw,
vaddps, vsubps, vmulps, vdivps, vminps, vmaxps,
DstEqXOpY vpandn,
vpmulld,
vpsubw, vpmullw,
vdivps,
vfmadd132ps, vfmadd213ps, vfmadd231ps,
vpackusdw, vpackuswb,
vpcmpeqd, vpcmpgtd;
using DstEqXOpYOrLabel = void(Ymm dst, Ymm x, YmmOrLabel y);
DstEqXOpYOrLabel vpand, vpor, vpxor,
vpaddd, vpsubd,
vaddps, vsubps, vmulps, vminps, vmaxps;
// Floating point comparisons are all the same instruction with varying imm.
void vcmpps(Ymm dst, Ymm x, Ymm y, int imm);
void vcmpeqps (Ymm dst, Ymm x, Ymm y) { this->vcmpps(dst,x,y,0); }
@ -93,12 +112,6 @@ namespace skvm {
void vpblendvb(Ymm dst, Ymm x, Ymm y, Ymm z);
struct Label {
int offset = 0;
enum { NotYetSet, ARMDisp19, X86Disp32 } kind = NotYetSet;
std::vector<int> references;
};
Label here();
void label(Label*);
@ -109,17 +122,13 @@ namespace skvm {
void jc (Label*);
void cmp(GP64, int imm);
void vpshufb(Ymm dst, Ymm x, Label*);
void vptest(Ymm dst, Label*);
void vbroadcastss(Ymm dst, Label*);
void vbroadcastss(Ymm dst, Xmm src);
void vbroadcastss(Ymm dst, GP64 ptr, int off); // dst = *(ptr+off)
void vpshufb(Ymm dst, Ymm x, Label*);
void vpaddd (Ymm dst, Ymm x, Label*);
void vpsubd (Ymm dst, Ymm x, Label*);
void vmulps (Ymm dst, Ymm x, Label*);
void vmovups (Ymm dst, GP64 ptr); // dst = *ptr, 256-bit
void vpmovzxwd(Ymm dst, GP64 ptr); // dst = *ptr, 128-bit, each uint16_t expanded to int
void vpmovzxbd(Ymm dst, GP64 ptr); // dst = *ptr, 64-bit, each uint8_t expanded to int
@ -229,6 +238,7 @@ namespace skvm {
// dst = op(x,label) or op(label)
void op(int prefix, int map, int opcode, Ymm dst, Ymm x, Label* l);
void op(int prefix, int map, int opcode, Ymm dst, Ymm x, YmmOrLabel);
// *ptr = ymm or ymm = *ptr, depending on opcode.
void load_store(int prefix, int map, int opcode, Ymm ymm, GP64 ptr);

View File

@ -188,7 +188,7 @@ DEF_TEST(SkVM, r) {
uint32_t src[9];
uint32_t dst[SK_ARRAY_COUNT(src)];
test_interpreter_only(r, std::move(program), [&](const skvm::Program& program) {
test_jit_and_interpreter(r, std::move(program), [&](const skvm::Program& program) {
for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
src[i] = 0xbb007733;
dst[i] = 0xffaaccee;
@ -220,7 +220,7 @@ DEF_TEST(SkVM, r) {
test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
test_interpreter_only(r, SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
test_jit_and_interpreter(r, SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
[&](const skvm::Program& program) {
uint32_t src[9];
uint8_t dst[SK_ARRAY_COUNT(src)];
@ -387,7 +387,7 @@ DEF_TEST(SkVM_bitops, r) {
b.store32(ptr, x);
}
test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
int x = 0x42;
program.eval(1, &x);
REPORTER_ASSERT(r, x == 0x7fff'ffff);
@ -472,7 +472,7 @@ DEF_TEST(SkVM_cmp_f32, r) {
b.store32(b.varying<int>(), m);
}
test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
float in[] = { 0,1,2,3,4,5,6,7,8,9 };
int out[SK_ARRAY_COUNT(in)];