instructions for JIT tail support on ARM

This adds a bunch of instructions we'll need to handle the N < 4 tail
within the JIT code on ARM.

   - ldrb/strb are 1-byte load and stores
   - sub subtracts without setting flags
   - cmp just sets flags (actually just subs with an xzr destination)
   - add b and b.lt, just like b.ne
   - cbz and cbnz... we only need cbz but I accidentally did cbnz first

Once I add support for forward jumps, we'll be able to use these
instructions to restructure the loop to

    entry:
        hoisted setup
    loop:
        if N < 4, jump tail      (cmp N,#4; b.lt tail)
        ... handle 4 values ...
        jump loop                (b loop)
    tail:
        if N == 0, jump end      (cbz N, end)
        ... handle 1 value ...
        jump tail                (b tail)
    end:
        ret

Change-Id: I62d2d190f670f758197a25d99dfde13362189993
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/226828
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2019-07-11 11:25:37 -05:00 committed by Skia Commit-Bot
parent 7825d4983f
commit 4cfe3ed0f2
3 changed files with 83 additions and 11 deletions

View File

@ -625,6 +625,12 @@ namespace skvm {
| (n & 5_mask) << 5
| (d & 5_mask) << 0);
}
void Assembler::sub(X d, X n, int imm12) {
this->word( 0b1'1'0'10001'00 << 22
| (imm12 & 12_mask) << 10
| (n & 5_mask) << 5
| (d & 5_mask) << 0);
}
void Assembler::subs(X d, X n, int imm12) {
this->word( 0b1'1'1'10001'00 << 22
| (imm12 & 12_mask) << 10
@ -632,19 +638,33 @@ namespace skvm {
| (d & 5_mask) << 0);
}
void Assembler::bne(Label l) {
void Assembler::b(Condition cond, Label l) {
// Jump in insts from before this one.
const int imm19 = (l.offset - here().offset) / 4;
this->word( 0b0101010'0 << 24
this->word( 0b0101010'0 << 24
| (imm19 & 19_mask) << 5
| ((int)cond & 4_mask) << 0);
}
void Assembler::cbz(X t, Label l) {
const int imm19 = (l.offset - here().offset) / 4;
this->word( 0b1'011010'0 << 24
| (imm19 & 19_mask) << 5
| 0b0'0001 << 0);
| (t & 5_mask) << 0);
}
void Assembler::cbnz(X t, Label l) {
const int imm19 = (l.offset - here().offset) / 4;
this->word( 0b1'011010'1 << 24
| (imm19 & 19_mask) << 5
| (t & 5_mask) << 0);
}
void Assembler::ldrq(V dst, X src) { this->op(0b00'111'1'01'11'000000000000, src, dst); }
void Assembler::ldrs(V dst, X src) { this->op(0b10'111'1'01'01'000000000000, src, dst); }
void Assembler::ldrb(V dst, X src) { this->op(0b00'111'1'01'01'000000000000, src, dst); }
void Assembler::strq(V src, X dst) { this->op(0b00'111'1'01'10'000000000000, dst, src); }
void Assembler::strs(V src, X dst) { this->op(0b10'111'1'01'00'000000000000, dst, src); }
void Assembler::strb(V src, X dst) { this->op(0b00'111'1'01'00'000000000000, dst, src); }
void Assembler::ldrq(V dst, Label l) {
const int imm19 = (l.offset - here().offset) / 4;

View File

@ -128,14 +128,34 @@ namespace skvm {
void ret (X);
void add (X d, X n, int imm12);
void subs(X d, X n, int imm12);
void bne (Label);
void sub (X d, X n, int imm12);
void subs(X d, X n, int imm12); // subtract setting condition flags
// There's another encoding for unconditional branches that can jump further,
// but this one encoded as b.al is simple to implement and should be fine.
void b (Label l) { this->b(Condition::al, l); }
void bne(Label l) { this->b(Condition::ne, l); }
void blt(Label l) { this->b(Condition::lt, l); }
// "cmp ..." is just an assembler mnemonic for "subs xzr, ..."!
void cmp(X n, int imm12) { this->subs(xzr, n, imm12); }
// Compare and branch if zero/non-zero, as if
// cmp(t,0)
// beq/bne(l)
// but without setting condition flags.
void cbz (X t, Label l);
void cbnz(X t, Label l);
void ldrq(V dst, Label); // 128-bit PC-relative load
void ldrq(V dst, X src); // 128-bit dst = *src
void ldrs(V dst, X src); // 32-bit dst[0] = *src
void ldrs(V dst, X src); // 32-bit dst = *src
void ldrb(V dst, X src); // 8-bit dst = *src
void strq(V src, X dst); // 128-bit *dst = src
void strs(V src, X dst); // 32-bit *dst = src[0]
void strs(V src, X dst); // 32-bit *dst = src
void strb(V src, X dst); // 8-bit *dst = src
private:
// dst = op(dst, imm)
@ -170,6 +190,10 @@ namespace skvm {
void op(uint32_t op22, V n, V d) { this->op(op22,0,n,d); }
void op(uint32_t op22, X x, V v) { this->op(op22,0,(V)x,v); }
// Order matters... value is 4-bit encoding for condition code.
enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al };
void b(Condition, Label);
uint8_t* fCode;
size_t fSize;
};

View File

@ -641,12 +641,22 @@ DEF_TEST(SkVM_Assembler, r) {
a.add(A::x2, A::x2, 4);
a.add(A::x3, A::x2, 32);
a.sub(A::x2, A::x2, 4);
a.sub(A::x3, A::x2, 32);
a.subs(A::x2, A::x2, 4);
a.subs(A::x3, A::x2, 32);
a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
a.cmp(A::x2, 4);
A::Label l = a.here();
a.bne(l);
a.bne(l);
a.blt(l);
a.b(l);
a.cbnz(A::x2, l);
a.cbz(A::x2, l);
},{
0xc0,0x03,0x5f,0xd6,
0xa0,0x01,0x5f,0xd6,
@ -654,19 +664,29 @@ DEF_TEST(SkVM_Assembler, r) {
0x42,0x10,0x00,0x91,
0x43,0x80,0x00,0x91,
0x42,0x10,0x00,0xd1,
0x43,0x80,0x00,0xd1,
0x42,0x10,0x00,0xf1,
0x43,0x80,0x00,0xf1,
0x01,0x00,0x00,0x54,
0xe1,0xff,0xff,0x54,
0x5f,0x10,0x00,0xf1,
0x5f,0x10,0x00,0xf1,
0x01,0x00,0x00,0x54, // b.ne #0
0xe1,0xff,0xff,0x54, // b.ne #-4
0xcb,0xff,0xff,0x54, // b.lt #-8
0xae,0xff,0xff,0x54, // b.al #-12
0x82,0xff,0xff,0xb5, // cbnz x2, #-16
0x62,0xff,0xff,0xb4, // cbz x2, #-20
});
test_asm(r, [&](A& a) {
a.ldrq(A::v0, A::x8);
a.strq(A::v0, A::x8);
},{
0x00, 0x01, 0xc0, 0x3d,
0x00, 0x01, 0x80, 0x3d,
0x00,0x01,0xc0,0x3d,
0x00,0x01,0x80,0x3d,
});
test_asm(r, [&](A& a) {
@ -686,4 +706,12 @@ DEF_TEST(SkVM_Assembler, r) {
0x00,0xa4,0x08,0x2f,
0x00,0xa4,0x10,0x2f,
});
test_asm(r, [&](A& a) {
a.ldrb(A::v0, A::x8);
a.strb(A::v0, A::x8);
},{
0x00,0x01,0x40,0x3d,
0x00,0x01,0x00,0x3d,
});
}