instructions for JIT tail support on ARM
This adds a bunch of instructions we'll need to handle the N < 4 tail within the JIT code on ARM. - ldrb/strb are 1-byte load and stores - sub subtracts without setting flags - cmp just sets flags (actually just subs with an xzr destination) - add b and b.lt, just like b.ne - cbz and cbnz... we only need cbz but I accidentally did cbnz first Once I add support for forward jumps, we'll be able to use these instructions to restructure the loop to entry: hoisted setup loop: if N < 4, jump tail (cmp N,#4; b.lt tail) ... handle 4 values ... jump loop (b loop) tail: if N == 0, jump end (cbz N, end) ... handle 1 value ... jump tail (b tail) end: ret Change-Id: I62d2d190f670f758197a25d99dfde13362189993 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/226828 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
7825d4983f
commit
4cfe3ed0f2
@ -625,6 +625,12 @@ namespace skvm {
|
||||
| (n & 5_mask) << 5
|
||||
| (d & 5_mask) << 0);
|
||||
}
|
||||
void Assembler::sub(X d, X n, int imm12) {
|
||||
this->word( 0b1'1'0'10001'00 << 22
|
||||
| (imm12 & 12_mask) << 10
|
||||
| (n & 5_mask) << 5
|
||||
| (d & 5_mask) << 0);
|
||||
}
|
||||
void Assembler::subs(X d, X n, int imm12) {
|
||||
this->word( 0b1'1'1'10001'00 << 22
|
||||
| (imm12 & 12_mask) << 10
|
||||
@ -632,19 +638,33 @@ namespace skvm {
|
||||
| (d & 5_mask) << 0);
|
||||
}
|
||||
|
||||
void Assembler::bne(Label l) {
|
||||
void Assembler::b(Condition cond, Label l) {
|
||||
// Jump in insts from before this one.
|
||||
const int imm19 = (l.offset - here().offset) / 4;
|
||||
this->word( 0b0101010'0 << 24
|
||||
this->word( 0b0101010'0 << 24
|
||||
| (imm19 & 19_mask) << 5
|
||||
| ((int)cond & 4_mask) << 0);
|
||||
}
|
||||
void Assembler::cbz(X t, Label l) {
|
||||
const int imm19 = (l.offset - here().offset) / 4;
|
||||
this->word( 0b1'011010'0 << 24
|
||||
| (imm19 & 19_mask) << 5
|
||||
| 0b0'0001 << 0);
|
||||
| (t & 5_mask) << 0);
|
||||
}
|
||||
void Assembler::cbnz(X t, Label l) {
|
||||
const int imm19 = (l.offset - here().offset) / 4;
|
||||
this->word( 0b1'011010'1 << 24
|
||||
| (imm19 & 19_mask) << 5
|
||||
| (t & 5_mask) << 0);
|
||||
}
|
||||
|
||||
void Assembler::ldrq(V dst, X src) { this->op(0b00'111'1'01'11'000000000000, src, dst); }
|
||||
void Assembler::ldrs(V dst, X src) { this->op(0b10'111'1'01'01'000000000000, src, dst); }
|
||||
void Assembler::ldrb(V dst, X src) { this->op(0b00'111'1'01'01'000000000000, src, dst); }
|
||||
|
||||
void Assembler::strq(V src, X dst) { this->op(0b00'111'1'01'10'000000000000, dst, src); }
|
||||
void Assembler::strs(V src, X dst) { this->op(0b10'111'1'01'00'000000000000, dst, src); }
|
||||
void Assembler::strb(V src, X dst) { this->op(0b00'111'1'01'00'000000000000, dst, src); }
|
||||
|
||||
void Assembler::ldrq(V dst, Label l) {
|
||||
const int imm19 = (l.offset - here().offset) / 4;
|
||||
|
@ -128,14 +128,34 @@ namespace skvm {
|
||||
|
||||
void ret (X);
|
||||
void add (X d, X n, int imm12);
|
||||
void subs(X d, X n, int imm12);
|
||||
void bne (Label);
|
||||
void sub (X d, X n, int imm12);
|
||||
void subs(X d, X n, int imm12); // subtract setting condition flags
|
||||
|
||||
// There's another encoding for unconditional branches that can jump further,
|
||||
// but this one encoded as b.al is simple to implement and should be fine.
|
||||
void b (Label l) { this->b(Condition::al, l); }
|
||||
void bne(Label l) { this->b(Condition::ne, l); }
|
||||
void blt(Label l) { this->b(Condition::lt, l); }
|
||||
|
||||
// "cmp ..." is just an assembler mnemonic for "subs xzr, ..."!
|
||||
void cmp(X n, int imm12) { this->subs(xzr, n, imm12); }
|
||||
|
||||
// Compare and branch if zero/non-zero, as if
|
||||
// cmp(t,0)
|
||||
// beq/bne(l)
|
||||
// but without setting condition flags.
|
||||
void cbz (X t, Label l);
|
||||
void cbnz(X t, Label l);
|
||||
|
||||
void ldrq(V dst, Label); // 128-bit PC-relative load
|
||||
|
||||
void ldrq(V dst, X src); // 128-bit dst = *src
|
||||
void ldrs(V dst, X src); // 32-bit dst[0] = *src
|
||||
void ldrs(V dst, X src); // 32-bit dst = *src
|
||||
void ldrb(V dst, X src); // 8-bit dst = *src
|
||||
|
||||
void strq(V src, X dst); // 128-bit *dst = src
|
||||
void strs(V src, X dst); // 32-bit *dst = src[0]
|
||||
void strs(V src, X dst); // 32-bit *dst = src
|
||||
void strb(V src, X dst); // 8-bit *dst = src
|
||||
|
||||
private:
|
||||
// dst = op(dst, imm)
|
||||
@ -170,6 +190,10 @@ namespace skvm {
|
||||
void op(uint32_t op22, V n, V d) { this->op(op22,0,n,d); }
|
||||
void op(uint32_t op22, X x, V v) { this->op(op22,0,(V)x,v); }
|
||||
|
||||
// Order matters... value is 4-bit encoding for condition code.
|
||||
enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al };
|
||||
void b(Condition, Label);
|
||||
|
||||
uint8_t* fCode;
|
||||
size_t fSize;
|
||||
};
|
||||
|
@ -641,12 +641,22 @@ DEF_TEST(SkVM_Assembler, r) {
|
||||
a.add(A::x2, A::x2, 4);
|
||||
a.add(A::x3, A::x2, 32);
|
||||
|
||||
a.sub(A::x2, A::x2, 4);
|
||||
a.sub(A::x3, A::x2, 32);
|
||||
|
||||
a.subs(A::x2, A::x2, 4);
|
||||
a.subs(A::x3, A::x2, 32);
|
||||
|
||||
a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
|
||||
a.cmp(A::x2, 4);
|
||||
|
||||
A::Label l = a.here();
|
||||
a.bne(l);
|
||||
a.bne(l);
|
||||
a.blt(l);
|
||||
a.b(l);
|
||||
a.cbnz(A::x2, l);
|
||||
a.cbz(A::x2, l);
|
||||
},{
|
||||
0xc0,0x03,0x5f,0xd6,
|
||||
0xa0,0x01,0x5f,0xd6,
|
||||
@ -654,19 +664,29 @@ DEF_TEST(SkVM_Assembler, r) {
|
||||
0x42,0x10,0x00,0x91,
|
||||
0x43,0x80,0x00,0x91,
|
||||
|
||||
0x42,0x10,0x00,0xd1,
|
||||
0x43,0x80,0x00,0xd1,
|
||||
|
||||
0x42,0x10,0x00,0xf1,
|
||||
0x43,0x80,0x00,0xf1,
|
||||
|
||||
0x01,0x00,0x00,0x54,
|
||||
0xe1,0xff,0xff,0x54,
|
||||
0x5f,0x10,0x00,0xf1,
|
||||
0x5f,0x10,0x00,0xf1,
|
||||
|
||||
0x01,0x00,0x00,0x54, // b.ne #0
|
||||
0xe1,0xff,0xff,0x54, // b.ne #-4
|
||||
0xcb,0xff,0xff,0x54, // b.lt #-8
|
||||
0xae,0xff,0xff,0x54, // b.al #-12
|
||||
0x82,0xff,0xff,0xb5, // cbnz x2, #-16
|
||||
0x62,0xff,0xff,0xb4, // cbz x2, #-20
|
||||
});
|
||||
|
||||
test_asm(r, [&](A& a) {
|
||||
a.ldrq(A::v0, A::x8);
|
||||
a.strq(A::v0, A::x8);
|
||||
},{
|
||||
0x00, 0x01, 0xc0, 0x3d,
|
||||
0x00, 0x01, 0x80, 0x3d,
|
||||
0x00,0x01,0xc0,0x3d,
|
||||
0x00,0x01,0x80,0x3d,
|
||||
});
|
||||
|
||||
test_asm(r, [&](A& a) {
|
||||
@ -686,4 +706,12 @@ DEF_TEST(SkVM_Assembler, r) {
|
||||
0x00,0xa4,0x08,0x2f,
|
||||
0x00,0xa4,0x10,0x2f,
|
||||
});
|
||||
|
||||
test_asm(r, [&](A& a) {
|
||||
a.ldrb(A::v0, A::x8);
|
||||
a.strb(A::v0, A::x8);
|
||||
},{
|
||||
0x00,0x01,0x40,0x3d,
|
||||
0x00,0x01,0x00,0x3d,
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user