implement assert_true on ARM

This all comes together as

    uminv tmp, condition
    fmov  gp, tmp
    cbnz  gp, all_true
    brk   0
  all_true:
    ...

The key idea is uminv(vec) will return 0 if any of the inputs are 0,
and non-zero if all of the inputs are non-zero, namely 0xffffffff.

fmov moves that minimum from a vector register to a general purpose
register where we can test it with cbnz, compare and branch if non-zero.
This jumps over the `brk 0` debug trap when all inputs are true.

Change-Id: If5deb77a77f52221d0649e537179743c45eb9cc5
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/254479
Auto-Submit: Mike Klein <mtklein@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2019-11-13 13:19:01 -06:00 committed by Skia Commit-Bot
parent fce27adfc5
commit 37be7715fd
3 changed files with 46 additions and 8 deletions

View File

@ -1151,6 +1151,13 @@ namespace skvm {
void Assembler::uxtlb2h(V d, V n) { this->op(0b0'0'1'011110'0001'000'10100'1, n,d); }
void Assembler::uxtlh2s(V d, V n) { this->op(0b0'0'1'011110'0010'000'10100'1, n,d); }
void Assembler::uminv4s(V d, V n) { this->op(0b0'1'1'01110'10'11000'1'1010'10, n,d); }
void Assembler::brk(int imm16) {
this->word(0b11010100'001'0000000000000000'000'00
| (imm16 & 16_mask) << 5);
}
void Assembler::ret(X n) {
this->word(0b1101011'0'0'10'11111'0000'0'0 << 10
| (n & 5_mask) << 5);
@ -1202,6 +1209,12 @@ namespace skvm {
void Assembler::strs(V src, X dst) { this->op(0b10'111'1'01'00'000000000000, dst, src); }
void Assembler::strb(V src, X dst) { this->op(0b00'111'1'01'00'000000000000, dst, src); }
void Assembler::fmovs(X dst, V src) {
this->word(0b0'0'0'11110'00'1'00'110'000000 << 10
| (src & 5_mask) << 5
| (dst & 5_mask) << 0);
}
void Assembler::ldrq(V dst, Label* l) {
const int imm19 = this->disp19(l);
this->word( 0b10'011'1'00 << 24
@ -1699,16 +1712,18 @@ namespace skvm {
if (!SkCpu::Supports(SkCpu::HSW)) {
return false;
}
A::GP64 N = A::rdi,
arg[] = { A::rsi, A::rdx, A::rcx, A::r8, A::r9 };
A::GP64 N = A::rdi,
scratch = A::rax,
arg[] = { A::rsi, A::rdx, A::rcx, A::r8, A::r9 };
// All 16 ymm registers are available to use.
using Reg = A::Ymm;
uint32_t avail = 0xffff;
#elif defined(__aarch64__)
A::X N = A::x0,
arg[] = { A::x1, A::x2, A::x3, A::x4, A::x5, A::x6, A::x7 };
A::X N = A::x0,
scratch = A::x8,
arg[] = { A::x1, A::x2, A::x3, A::x4, A::x5, A::x6, A::x7 };
// We can use v0-v7 and v16-v31 freely; we'd need to preserve v8-v15.
using Reg = A::V;
@ -1911,8 +1926,8 @@ namespace skvm {
else { a->vmovups( dst(), arg[imm]); }
break;
case Op::uniform8: a->movzbl(A::rax, arg[imm&0xffff], imm>>16);
a->vmovd_direct((A::Xmm)dst(), A::rax);
case Op::uniform8: a->movzbl(scratch, arg[imm&0xffff], imm>>16);
a->vmovd_direct((A::Xmm)dst(), scratch);
a->vbroadcastss(dst(), (A::Xmm)dst());
break;
@ -1989,7 +2004,14 @@ namespace skvm {
break;
#elif defined(__aarch64__)
case Op::assert_true: /*TODO somehow?*/ break;
case Op::assert_true: {
a->uminv4s(tmp(), r[x]); // uminv acts like an all() across the vector.
a->fmovs(scratch, tmp());
A::Label all_true;
a->cbnz(scratch, &all_true);
a->brk(0);
a->label(&all_true);
} break;
case Op::store8: a->xtns2h(tmp(), r[x]);
a->xtnh2b(tmp(), tmp());

View File

@ -174,8 +174,10 @@ namespace skvm {
xtns2h, // u32 -> u16
xtnh2b, // u16 -> u8
uxtlb2h, // u8 -> u16
uxtlh2s; // u16 -> u32
uxtlh2s, // u16 -> u32
uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned
void brk (int imm16);
void ret (X);
void add (X d, X n, int imm12);
void sub (X d, X n, int imm12);
@ -207,6 +209,8 @@ namespace skvm {
void strs(V src, X dst); // 32-bit *dst = src
void strb(V src, X dst); // 8-bit *dst = src
void fmovs(X dst, V src); // dst = 32-bit src[0]
private:
// dst = op(dst, imm)
void op(int opcode, int opcode_ext, GP64 dst, int imm);

View File

@ -1199,6 +1199,9 @@ DEF_TEST(SkVM_Assembler, r) {
});
test_asm(r, [&](A& a) {
a.brk(0);
a.brk(65535);
a.ret(A::x30); // Conventional ret using link register.
a.ret(A::x13); // Can really return using any register if we like.
@ -1222,6 +1225,9 @@ DEF_TEST(SkVM_Assembler, r) {
a.cbnz(A::x2, &l);
a.cbz(A::x2, &l);
},{
0x00,0x00,0x20,0xd4,
0xe0,0xff,0x3f,0xd4,
0xc0,0x03,0x5f,0xd6,
0xa0,0x01,0x5f,0xd6,
@ -1314,6 +1320,9 @@ DEF_TEST(SkVM_Assembler, r) {
a.ldrs (A::v0, A::x0);
a.uxtlb2h(A::v0, A::v0);
a.uxtlh2s(A::v0, A::v0);
a.uminv4s(A::v3, A::v4);
a.fmovs (A::x3, A::v4); // fmov w3,s4
},{
0x00,0x28,0x61,0x0e,
0x00,0x28,0x21,0x0e,
@ -1322,6 +1331,9 @@ DEF_TEST(SkVM_Assembler, r) {
0x00,0x00,0x40,0xbd,
0x00,0xa4,0x08,0x2f,
0x00,0xa4,0x10,0x2f,
0x83,0xa8,0xb1,0x6e,
0x83,0x00,0x26,0x1e,
});
test_asm(r, [&](A& a) {