add bsl.16b, cmeq.4s, cmgt.4s

These implement select, eq_i32, lt_i32, and gt_i32 on ARMv8.

Change-Id: Ic36dda1cc425ca91700f9b120594e420ea0f560a
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/248970
Auto-Submit: Mike Klein <mtklein@google.com>
Commit-Queue: Herb Derby <herb@google.com>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Mike Klein 2019-10-16 14:11:27 -05:00 committed by Skia Commit-Bot
parent f29cb70281
commit 97afd2e21c
3 changed files with 28 additions and 10 deletions

View File

@ -978,11 +978,15 @@ namespace skvm {
void Assembler::orr16b(V d, V n, V m) { this->op(0b0'1'0'01110'10'1, m, 0b00011'1, n, d); }
void Assembler::eor16b(V d, V n, V m) { this->op(0b0'1'1'01110'00'1, m, 0b00011'1, n, d); }
void Assembler::bic16b(V d, V n, V m) { this->op(0b0'1'0'01110'01'1, m, 0b00011'1, n, d); }
void Assembler::bsl16b(V d, V n, V m) { this->op(0b0'1'1'01110'01'1, m, 0b00011'1, n, d); }
void Assembler::add4s(V d, V n, V m) { this->op(0b0'1'0'01110'10'1, m, 0b10000'1, n, d); }
void Assembler::sub4s(V d, V n, V m) { this->op(0b0'1'1'01110'10'1, m, 0b10000'1, n, d); }
void Assembler::mul4s(V d, V n, V m) { this->op(0b0'1'0'01110'10'1, m, 0b10011'1, n, d); }
void Assembler::cmeq4s(V d, V n, V m) { this->op(0b0'1'1'01110'10'1, m, 0b10001'1, n, d); }
void Assembler::cmgt4s(V d, V n, V m) { this->op(0b0'1'0'01110'10'1, m, 0b0011'0'1, n, d); }
void Assembler::sub8h(V d, V n, V m) { this->op(0b0'1'1'01110'01'1, m, 0b10000'1, n, d); }
void Assembler::mul8h(V d, V n, V m) { this->op(0b0'1'0'01110'01'1, m, 0b10011'1, n, d); }
@ -1885,9 +1889,9 @@ namespace skvm {
case Op::mul_f32: a->fmul4s(dst(), r[x], r[y]); break;
case Op::div_f32: a->fdiv4s(dst(), r[x], r[y]); break;
case Op::mad_f32:
case Op::mad_f32: // fmla4s is z += x*y
if (avail & (1<<r[z])) { set_dst(r[z]); a->fmla4s( r[z], r[x], r[y]); }
else { a->orr16b(tmp(), r[z], r[z]);
else { a->orr16b(tmp(), r[z], r[z]);
a->fmla4s(tmp(), r[x], r[y]);
if(dst() != tmp()) { a->orr16b(dst(), tmp(), tmp()); } }
break;
@ -1906,10 +1910,21 @@ namespace skvm {
case Op::bit_xor : a->eor16b(dst(), r[x], r[y]); break;
case Op::bit_clear: a->bic16b(dst(), r[x], r[y]); break;
case Op::select: // bsl16b is x = x ? y : z
if (avail & (1<<r[x])) { set_dst(r[x]); a->bsl16b( r[x], r[y], r[z]); }
else { a->orr16b(tmp(), r[x], r[x]);
a->bsl16b(tmp(), r[y], r[z]);
if(dst() != tmp()) { a->orr16b(dst(), tmp(), tmp()); } }
break;
case Op::shl_i32: a-> shl4s(dst(), r[x], imm); break;
case Op::shr_i32: a->ushr4s(dst(), r[x], imm); break;
case Op::sra_i32: a->sshr4s(dst(), r[x], imm); break;
case Op::eq_i32: a->cmeq4s(dst(), r[x], r[y]); break;
case Op::lt_i32: a->cmgt4s(dst(), r[y], r[x]); break;
case Op::gt_i32: a->cmgt4s(dst(), r[x], r[y]); break;
case Op::extract: if (imm) { a->ushr4s(tmp(), r[x], imm);
a->and16b(dst(), tmp(), r[y]); }
else { a->and16b(dst(), r[x], r[y]); }

View File

@ -132,8 +132,9 @@ namespace skvm {
// d = op(n,m)
using DOpNM = void(V d, V n, V m);
DOpNM and16b, orr16b, eor16b, bic16b,
DOpNM and16b, orr16b, eor16b, bic16b, bsl16b,
add4s, sub4s, mul4s,
cmeq4s, cmgt4s,
sub8h, mul8h,
fadd4s, fsub4s, fmul4s, fdiv4s,
tbl;

View File

@ -600,13 +600,7 @@ DEF_TEST(SkVM_select, r) {
b.store32(buf, x);
}
#if defined(SK_CPU_ARM64)
// TODO: missing Op::select for ARMv8?
test_interpreter_only
#else
test_jit_and_interpreter
#endif
(r, b.done(), [&](const skvm::Program& program) {
test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
int buf[] = { 0,1,2,3,4,5,6,7,8 };
program.eval(SK_ARRAY_COUNT(buf), buf);
for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
@ -990,11 +984,15 @@ DEF_TEST(SkVM_Assembler, r) {
a.orr16b(A::v4, A::v3, A::v1);
a.eor16b(A::v4, A::v3, A::v1);
a.bic16b(A::v4, A::v3, A::v1);
a.bsl16b(A::v4, A::v3, A::v1);
a.add4s(A::v4, A::v3, A::v1);
a.sub4s(A::v4, A::v3, A::v1);
a.mul4s(A::v4, A::v3, A::v1);
a.cmeq4s(A::v4, A::v3, A::v1);
a.cmgt4s(A::v4, A::v3, A::v1);
a.sub8h(A::v4, A::v3, A::v1);
a.mul8h(A::v4, A::v3, A::v1);
@ -1009,11 +1007,15 @@ DEF_TEST(SkVM_Assembler, r) {
0x64,0x1c,0xa1,0x4e,
0x64,0x1c,0x21,0x6e,
0x64,0x1c,0x61,0x4e,
0x64,0x1c,0x61,0x6e,
0x64,0x84,0xa1,0x4e,
0x64,0x84,0xa1,0x6e,
0x64,0x9c,0xa1,0x4e,
0x64,0x8c,0xa1,0x6e,
0x64,0x34,0xa1,0x4e,
0x64,0x84,0x61,0x6e,
0x64,0x9c,0x61,0x4e,