add vbroadcastss(Ymm, GP64, int)

vbroadcastss with a register argument and immediate offset implements
uniform32.  And this turns on the JIT for some new SkVMBlitter paths
that pass more arguments that I'd previously wired up, so add a few
more.

Change-Id: I66db1286dcdb2c4a4ba7c43f2dc2cd13564d4d34
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/232056
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2019-08-02 10:15:51 -05:00 committed by Skia Commit-Bot
parent 3f4a2b3499
commit 788967eb37
3 changed files with 59 additions and 5 deletions

View File

@ -220,6 +220,22 @@ namespace skvm {
return _233((int)mod, reg, rm);
}
static Mod mod(int imm) {
if (imm == 0) { return Mod::Indirect; }
if (SkTFitsIn<int8_t>(imm)) { return Mod::OneByteImm; }
return Mod::FourByteImm;
}
static int imm_bytes(Mod mod) {
switch (mod) {
case Mod::Indirect: return 0;
case Mod::OneByteImm: return 1;
case Mod::FourByteImm: return 4;
case Mod::Direct: SkUNREACHABLE;
}
SkUNREACHABLE;
}
#if 0
// SIB byte encodes a memory address, base + (index * scale).
enum class Scale { One, Two, Four, Eight };
@ -445,9 +461,21 @@ namespace skvm {
}
void Assembler::vbroadcastss(Ymm dst, Label* l) { this->op(0x66,0x380f,0x18, dst,l); }
void Assembler::vpshufb(Ymm dst, Ymm x, Label* l) { this->op(0x66,0x380f,0x00, dst,x,l); }
void Assembler::vbroadcastss(Ymm dst, GP64 ptr, int off) {
int prefix = 0x66,
map = 0x380f,
opcode = 0x18;
VEX v = vex(0, dst>>3, 0, ptr>>3,
map, 0, /*ymm?*/1, prefix);
this->bytes(v.bytes, v.len);
this->byte(opcode);
this->byte(mod_rm(mod(off), dst&7, ptr&7));
this->bytes(&off, imm_bytes(mod(off)));
}
void Assembler::jump(uint8_t condition, Label* l) {
// These conditional jumps can be either 2 bytes (short) or 6 bytes (near):
// 7? one-byte-disp
@ -744,10 +772,14 @@ namespace skvm {
const int nargs = (int)fStrides.size();
if (fJITBuf) {
void** a = args;
const void* b = fJITBuf;
switch (nargs) {
case 0: return ((void(*)(int ))fJITBuf)(n );
case 1: return ((void(*)(int, void* ))fJITBuf)(n, args[0] );
case 2: return ((void(*)(int, void*, void*))fJITBuf)(n, args[0], args[1]);
case 0: return ((void(*)(int ))b)(n );
case 1: return ((void(*)(int,void* ))b)(n,a[0] );
case 2: return ((void(*)(int,void*,void* ))b)(n,a[0],a[1] );
case 3: return ((void(*)(int,void*,void*,void* ))b)(n,a[0],a[1],a[2] );
case 4: return ((void(*)(int,void*,void*,void*,void*))b)(n,a[0],a[1],a[2],a[3]);
default: SkUNREACHABLE; // TODO
}
}
@ -1311,7 +1343,11 @@ namespace skvm {
//
// Now let's actually assemble the instruction!
switch (op) {
default: return false; // TODO: many new ops
default:
#if 0
SkDEBUGFAILF("%d not yet implemented\n", op);
#endif
return false; // TODO: many new ops
#if defined(__x86_64__)
case Op::store8: if (scalar) { a->vpextrb (arg[imm], (A::Xmm)r[x], 0); }
@ -1339,6 +1375,9 @@ namespace skvm {
else { a->vmovups( dst(), arg[imm]); }
break;
case Op::uniform32: a->vbroadcastss(dst(), arg[imm&0xffff], imm>>16);
break;
case Op::splat: a->vbroadcastss(dst(), &splats.find(imm)->label);
break;
// TODO: many of these instructions have variants that

View File

@ -95,6 +95,8 @@ namespace skvm {
void cmp(GP64, int imm);
void vbroadcastss(Ymm dst, Label*);
void vbroadcastss(Ymm dst, GP64 ptr, int off); // dst = *(ptr+off)
void vpshufb(Ymm dst, Ymm x, Label*);
void vmovups (Ymm dst, GP64 ptr); // dst = *ptr, 256-bit

View File

@ -971,6 +971,19 @@ DEF_TEST(SkVM_Assembler, r) {
0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
});
test_asm(r, [&](A& a) {
a.vbroadcastss(A::ymm0, A::rdi, 0);
a.vbroadcastss(A::ymm13, A::r14, 7);
a.vbroadcastss(A::ymm8, A::rdx, -12);
a.vbroadcastss(A::ymm8, A::rdx, 400);
},{
/* VEX */ /*op*/ /*ModRM*/ /*offset*/
0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
});
test_asm(r, [&](A& a) {
A::Label l = a.here();
a.jne(&l);