instructions for JIT tail support on ARM

This adds a bunch of instructions we'll need to handle the N < 4 tail within the JIT code on ARM. - ldrb/strb are 1-byte load and stores - sub subtracts without setting flags - cmp just sets flags (actually just subs with an xzr destination) - add b and b.lt, just like b.ne - cbz and cbnz... we only need cbz but I accidentally did cbnz first Once I add support for forward jumps, we'll be able to use these instructions to restructure the loop to entry: hoisted setup loop: if N < 4, jump tail (cmp N,#4; b.lt tail) ... handle 4 values ... jump loop (b loop) tail: if N == 0, jump end (cbz N, end) ... handle 1 value ... jump tail (b tail) end: ret Change-Id: I62d2d190f670f758197a25d99dfde13362189993 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/226828 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-11 11:25:37 -05:00 · 2019-07-11 11:25:37 -05:00 · 4cfe3ed0f2
commit 4cfe3ed0f2
parent 7825d4983f
3 changed files with 83 additions and 11 deletions
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@ -625,6 +625,12 @@ namespace skvm {
                  | (n     &  5_mask) <<  5
                  | (d     &  5_mask) <<  0);
    }
+    void Assembler::sub(X d, X n, int imm12) {
+        this->word( 0b1'1'0'10001'00  << 22
+                  | (imm12 & 12_mask) << 10
+                  | (n     &  5_mask) <<  5
+                  | (d     &  5_mask) <<  0);
+    }
    void Assembler::subs(X d, X n, int imm12) {
        this->word( 0b1'1'1'10001'00  << 22
                  | (imm12 & 12_mask) << 10
@ -632,19 +638,33 @@ namespace skvm {
                  | (d     &  5_mask) <<  0);
    }

-    void Assembler::bne(Label l) {
+    void Assembler::b(Condition cond, Label l) {
        // Jump in insts from before this one.
        const int imm19 = (l.offset - here().offset) / 4;
-        this->word( 0b0101010'0       << 24
+        this->word( 0b0101010'0           << 24
+                  | (imm19     & 19_mask) <<  5
+                  | ((int)cond &  4_mask) <<  0);
+    }
+    void Assembler::cbz(X t, Label l) {
+        const int imm19 = (l.offset - here().offset) / 4;
+        this->word( 0b1'011010'0      << 24
                  | (imm19 & 19_mask) <<  5
-                  | 0b0'0001          <<  0);
+                  | (t     &  5_mask) <<  0);
+    }
+    void Assembler::cbnz(X t, Label l) {
+        const int imm19 = (l.offset - here().offset) / 4;
+        this->word( 0b1'011010'1      << 24
+                  | (imm19 & 19_mask) <<  5
+                  | (t     &  5_mask) <<  0);
    }

    void Assembler::ldrq(V dst, X src) { this->op(0b00'111'1'01'11'000000000000, src, dst); }
    void Assembler::ldrs(V dst, X src) { this->op(0b10'111'1'01'01'000000000000, src, dst); }
+    void Assembler::ldrb(V dst, X src) { this->op(0b00'111'1'01'01'000000000000, src, dst); }

    void Assembler::strq(V src, X dst) { this->op(0b00'111'1'01'10'000000000000, dst, src); }
    void Assembler::strs(V src, X dst) { this->op(0b10'111'1'01'00'000000000000, dst, src); }
+    void Assembler::strb(V src, X dst) { this->op(0b00'111'1'01'00'000000000000, dst, src); }

    void Assembler::ldrq(V dst, Label l) {
        const int imm19 = (l.offset - here().offset) / 4;
--- a/src/core/SkVM.h
+++ b/src/core/SkVM.h
@ -128,14 +128,34 @@ namespace skvm {

        void ret (X);
        void add (X d, X n, int imm12);
-        void subs(X d, X n, int imm12);
-        void bne (Label);
+        void sub (X d, X n, int imm12);
+        void subs(X d, X n, int imm12);  // subtract setting condition flags
+
+        // There's another encoding for unconditional branches that can jump further,
+        // but this one encoded as b.al is simple to implement and should be fine.
+        void b  (Label l) { this->b(Condition::al, l); }
+        void bne(Label l) { this->b(Condition::ne, l); }
+        void blt(Label l) { this->b(Condition::lt, l); }
+
+        // "cmp ..." is just an assembler mnemonic for "subs xzr, ..."!
+        void cmp(X n, int imm12) { this->subs(xzr, n, imm12); }
+
+        // Compare and branch if zero/non-zero, as if
+        //      cmp(t,0)
+        //      beq/bne(l)
+        // but without setting condition flags.
+        void cbz (X t, Label l);
+        void cbnz(X t, Label l);

        void ldrq(V dst, Label);  // 128-bit PC-relative load
+
        void ldrq(V dst, X src);  // 128-bit dst = *src
-        void ldrs(V dst, X src);  //  32-bit dst[0] = *src
+        void ldrs(V dst, X src);  //  32-bit dst = *src
+        void ldrb(V dst, X src);  //   8-bit dst = *src
+
        void strq(V src, X dst);  // 128-bit *dst = src
-        void strs(V src, X dst);  //  32-bit *dst = src[0]
+        void strs(V src, X dst);  //  32-bit *dst = src
+        void strb(V src, X dst);  //   8-bit *dst = src

    private:
        // dst = op(dst, imm)
@ -170,6 +190,10 @@ namespace skvm {
        void op(uint32_t op22, V n, V d) { this->op(op22,0,n,d); }
        void op(uint32_t op22, X x, V v) { this->op(op22,0,(V)x,v); }

+        // Order matters... value is 4-bit encoding for condition code.
+        enum class Condition { eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,al };
+        void b(Condition, Label);
+
        uint8_t* fCode;
        size_t   fSize;
    };
--- a/tests/SkVMTest.cpp
+++ b/tests/SkVMTest.cpp
@ -641,12 +641,22 @@ DEF_TEST(SkVM_Assembler, r) {
        a.add(A::x2, A::x2,  4);
        a.add(A::x3, A::x2, 32);

+        a.sub(A::x2, A::x2, 4);
+        a.sub(A::x3, A::x2, 32);
+
        a.subs(A::x2, A::x2,  4);
        a.subs(A::x3, A::x2, 32);

+        a.subs(A::xzr, A::x2, 4);  // These are actually the same instruction!
+        a.cmp(A::x2, 4);
+
        A::Label l = a.here();
        a.bne(l);
        a.bne(l);
+        a.blt(l);
+        a.b(l);
+        a.cbnz(A::x2, l);
+        a.cbz(A::x2, l);
    },{
        0xc0,0x03,0x5f,0xd6,
        0xa0,0x01,0x5f,0xd6,
@ -654,19 +664,29 @@ DEF_TEST(SkVM_Assembler, r) {
        0x42,0x10,0x00,0x91,
        0x43,0x80,0x00,0x91,

+        0x42,0x10,0x00,0xd1,
+        0x43,0x80,0x00,0xd1,
+
        0x42,0x10,0x00,0xf1,
        0x43,0x80,0x00,0xf1,

-        0x01,0x00,0x00,0x54,
-        0xe1,0xff,0xff,0x54,
+        0x5f,0x10,0x00,0xf1,
+        0x5f,0x10,0x00,0xf1,
+
+        0x01,0x00,0x00,0x54,   // b.ne #0
+        0xe1,0xff,0xff,0x54,   // b.ne #-4
+        0xcb,0xff,0xff,0x54,   // b.lt #-8
+        0xae,0xff,0xff,0x54,   // b.al #-12
+        0x82,0xff,0xff,0xb5,   // cbnz x2, #-16
+        0x62,0xff,0xff,0xb4,   // cbz x2, #-20
    });

    test_asm(r, [&](A& a) {
        a.ldrq(A::v0, A::x8);
        a.strq(A::v0, A::x8);
    },{
-        0x00, 0x01, 0xc0, 0x3d,
-        0x00, 0x01, 0x80, 0x3d,
+        0x00,0x01,0xc0,0x3d,
+        0x00,0x01,0x80,0x3d,
    });

    test_asm(r, [&](A& a) {
@ -686,4 +706,12 @@ DEF_TEST(SkVM_Assembler, r) {
        0x00,0xa4,0x08,0x2f,
        0x00,0xa4,0x10,0x2f,
    });
+
+    test_asm(r, [&](A& a) {
+        a.ldrb(A::v0, A::x8);
+        a.strb(A::v0, A::x8);
+    },{
+        0x00,0x01,0x40,0x3d,
+        0x00,0x01,0x00,0x3d,
+    });
 }