diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp index f76bccb935..751fe376bb 100644 --- a/src/core/SkVM.cpp +++ b/src/core/SkVM.cpp @@ -1151,6 +1151,13 @@ namespace skvm { void Assembler::uxtlb2h(V d, V n) { this->op(0b0'0'1'011110'0001'000'10100'1, n,d); } void Assembler::uxtlh2s(V d, V n) { this->op(0b0'0'1'011110'0010'000'10100'1, n,d); } + void Assembler::uminv4s(V d, V n) { this->op(0b0'1'1'01110'10'11000'1'1010'10, n,d); } + + void Assembler::brk(int imm16) { + this->word(0b11010100'001'0000000000000000'000'00 + | (imm16 & 16_mask) << 5); + } + void Assembler::ret(X n) { this->word(0b1101011'0'0'10'11111'0000'0'0 << 10 | (n & 5_mask) << 5); @@ -1202,6 +1209,12 @@ namespace skvm { void Assembler::strs(V src, X dst) { this->op(0b10'111'1'01'00'000000000000, dst, src); } void Assembler::strb(V src, X dst) { this->op(0b00'111'1'01'00'000000000000, dst, src); } + void Assembler::fmovs(X dst, V src) { + this->word(0b0'0'0'11110'00'1'00'110'000000 << 10 + | (src & 5_mask) << 5 + | (dst & 5_mask) << 0); + } + void Assembler::ldrq(V dst, Label* l) { const int imm19 = this->disp19(l); this->word( 0b10'011'1'00 << 24 @@ -1699,16 +1712,18 @@ namespace skvm { if (!SkCpu::Supports(SkCpu::HSW)) { return false; } - A::GP64 N = A::rdi, - arg[] = { A::rsi, A::rdx, A::rcx, A::r8, A::r9 }; + A::GP64 N = A::rdi, + scratch = A::rax, + arg[] = { A::rsi, A::rdx, A::rcx, A::r8, A::r9 }; // All 16 ymm registers are available to use. using Reg = A::Ymm; uint32_t avail = 0xffff; #elif defined(__aarch64__) - A::X N = A::x0, - arg[] = { A::x1, A::x2, A::x3, A::x4, A::x5, A::x6, A::x7 }; + A::X N = A::x0, + scratch = A::x8, + arg[] = { A::x1, A::x2, A::x3, A::x4, A::x5, A::x6, A::x7 }; // We can use v0-v7 and v16-v31 freely; we'd need to preserve v8-v15. using Reg = A::V; @@ -1911,8 +1926,8 @@ namespace skvm { else { a->vmovups( dst(), arg[imm]); } break; - case Op::uniform8: a->movzbl(A::rax, arg[imm&0xffff], imm>>16); - a->vmovd_direct((A::Xmm)dst(), A::rax); + case Op::uniform8: a->movzbl(scratch, arg[imm&0xffff], imm>>16); + a->vmovd_direct((A::Xmm)dst(), scratch); a->vbroadcastss(dst(), (A::Xmm)dst()); break; @@ -1989,7 +2004,14 @@ namespace skvm { break; #elif defined(__aarch64__) - case Op::assert_true: /*TODO somehow?*/ break; + case Op::assert_true: { + a->uminv4s(tmp(), r[x]); // uminv acts like an all() across the vector. + a->fmovs(scratch, tmp()); + A::Label all_true; + a->cbnz(scratch, &all_true); + a->brk(0); + a->label(&all_true); + } break; case Op::store8: a->xtns2h(tmp(), r[x]); a->xtnh2b(tmp(), tmp()); diff --git a/src/core/SkVM.h b/src/core/SkVM.h index 9306cbc3f7..d06b14dada 100644 --- a/src/core/SkVM.h +++ b/src/core/SkVM.h @@ -174,8 +174,10 @@ namespace skvm { xtns2h, // u32 -> u16 xtnh2b, // u16 -> u8 uxtlb2h, // u8 -> u16 - uxtlh2s; // u16 -> u32 + uxtlh2s, // u16 -> u32 + uminv4s; // dst[0] = min(n[0],n[1],n[2],n[3]), n as unsigned + void brk (int imm16); void ret (X); void add (X d, X n, int imm12); void sub (X d, X n, int imm12); @@ -207,6 +209,8 @@ namespace skvm { void strs(V src, X dst); // 32-bit *dst = src void strb(V src, X dst); // 8-bit *dst = src + void fmovs(X dst, V src); // dst = 32-bit src[0] + private: // dst = op(dst, imm) void op(int opcode, int opcode_ext, GP64 dst, int imm); diff --git a/tests/SkVMTest.cpp b/tests/SkVMTest.cpp index 7c31f465a8..5d0e7b9555 100644 --- a/tests/SkVMTest.cpp +++ b/tests/SkVMTest.cpp @@ -1199,6 +1199,9 @@ DEF_TEST(SkVM_Assembler, r) { }); test_asm(r, [&](A& a) { + a.brk(0); + a.brk(65535); + a.ret(A::x30); // Conventional ret using link register. a.ret(A::x13); // Can really return using any register if we like. @@ -1222,6 +1225,9 @@ DEF_TEST(SkVM_Assembler, r) { a.cbnz(A::x2, &l); a.cbz(A::x2, &l); },{ + 0x00,0x00,0x20,0xd4, + 0xe0,0xff,0x3f,0xd4, + 0xc0,0x03,0x5f,0xd6, 0xa0,0x01,0x5f,0xd6, @@ -1314,6 +1320,9 @@ DEF_TEST(SkVM_Assembler, r) { a.ldrs (A::v0, A::x0); a.uxtlb2h(A::v0, A::v0); a.uxtlh2s(A::v0, A::v0); + + a.uminv4s(A::v3, A::v4); + a.fmovs (A::x3, A::v4); // fmov w3,s4 },{ 0x00,0x28,0x61,0x0e, 0x00,0x28,0x21,0x0e, @@ -1322,6 +1331,9 @@ DEF_TEST(SkVM_Assembler, r) { 0x00,0x00,0x40,0xbd, 0x00,0xa4,0x08,0x2f, 0x00,0xa4,0x10,0x2f, + + 0x83,0xa8,0xb1,0x6e, + 0x83,0x00,0x26,0x1e, }); test_asm(r, [&](A& a) {