allow a fourth Val/Reg arg per op

This is a pure refactor, making room for a fourth argument per Op. I'll use this to make store128 work more naturally, taking all 128 bits of its input at once, rather than spreading across two strided 64-bit stores as done today. I've left this for a second CL because this refactor on its own seemed error-prone enough. Change-Id: I704e4f2e165b0bfd0276af921d97bf2cc01e8550 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/355659 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
2021-01-19 11:36:25 -06:00 · 2021-01-19 11:36:25 -06:00 · f3087d8297
commit f3087d8297
parent ea27de5cb7
3 changed files with 57 additions and 44 deletions
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@ -230,7 +230,9 @@ namespace skvm {
        Op  op = inst.op;
        Val  x = inst.x,
             y = inst.y,
-             z = inst.z;
+             z = inst.z,
+             w = inst.w;
+        (void)w;  // TODO: use in store128
        int immA = inst.immA,
            immB = inst.immB;
        switch (op) {
@ -342,7 +344,9 @@ namespace skvm {
            Reg   d = inst.d,
                  x = inst.x,
                  y = inst.y,
-                  z = inst.z;
+                  z = inst.z,
+                  w = inst.w;
+            (void)w;  // TODO: use in store128
            int immA = inst.immA,
                immB = inst.immB;
            switch (op) {
@ -352,7 +356,7 @@ namespace skvm {
                case Op::store16:  write(o, op, Ptr{immA}, R{x}                 ); break;
                case Op::store32:  write(o, op, Ptr{immA}, R{x}                 ); break;
                case Op::store64:  write(o, op, Ptr{immA}, R{x}, R{y}           ); break;
-                case Op::store128: write(o, op, Ptr{immA}, R{x}, R{y}, Hex{immB}); break;
+                case Op::store128: write(o, op, Ptr{immA}, R{x}, R{y}, Hex{immB}); break;  // TODO

                case Op::index: write(o, R{d}, "=", op); break;

@ -425,7 +429,7 @@ namespace skvm {
            if (live[id] == false) {
                live[id] =  true;
                Instruction inst = program[id];
-                for (Val arg : {inst.x, inst.y, inst.z}) {
+                for (Val arg : {inst.x, inst.y, inst.z, inst.w}) {
                    if (arg != NA) { recurse(arg, recurse); }
                }
            }
@ -443,7 +447,7 @@ namespace skvm {
        for (Val id = 0, next = 0; id < (Val)program.size(); id++) {
            if (live[id]) {
                Instruction& inst = program[id];
-                for (Val* arg : {&inst.x, &inst.y, &inst.z}) {
+                for (Val* arg : {&inst.x, &inst.y, &inst.z, &inst.w}) {
                    if (*arg != NA) {
                        *arg = new_id[*arg];
                        SkASSERT(*arg != NA);
@ -465,14 +469,14 @@ namespace skvm {
        std::vector<OptimizedInstruction> optimized(program.size());
        for (Val id = 0; id < (Val)program.size(); id++) {
            Instruction inst = program[id];
-            optimized[id] = {inst.op, inst.x,inst.y,inst.z, inst.immA,inst.immB,
+            optimized[id] = {inst.op, inst.x,inst.y,inst.z,inst.w, inst.immA,inst.immB,
                             /*death=*/id, /*can_hoist=*/true};
        }

        // Each Instruction's inputs need to live at least until that Instruction issues.
        for (Val id = 0; id < (Val)optimized.size(); id++) {
            OptimizedInstruction& inst = optimized[id];
-            for (Val arg : {inst.x, inst.y, inst.z}) {
+            for (Val arg : {inst.x, inst.y, inst.z, inst.w}) {
                // (We're walking in order, so this is the same as max()ing with the existing Val.)
                if (arg != NA) { optimized[arg].death = id; }
            }
@ -487,7 +491,7 @@ namespace skvm {

            // If any of an instruction's inputs can't be hoisted, it can't be hoisted itself.
            if (inst.can_hoist) {
-                for (Val arg : {inst.x, inst.y, inst.z}) {
+                for (Val arg : {inst.x, inst.y, inst.z, inst.w}) {
                    if (arg != NA) { inst.can_hoist &= optimized[arg].can_hoist; }
                }
            }
@ -496,7 +500,7 @@ namespace skvm {
        // Extend the lifetime of any hoisted value that's used in the loop to infinity.
        for (OptimizedInstruction& inst : optimized) {
            if (!inst.can_hoist /*i.e. we're in the loop, so the arguments are used-in-loop*/) {
-                for (Val arg : {inst.x, inst.y, inst.z}) {
+                for (Val arg : {inst.x, inst.y, inst.z, inst.w}) {
                    if (arg != NA && optimized[arg].can_hoist) {
                        optimized[arg].death = (Val)program.size();
                    }
@ -534,6 +538,7 @@ namespace skvm {
            && a.x    == b.x
            && a.y    == b.y
            && a.z    == b.z
+            && a.w    == b.w
            && a.immA == b.immA
            && a.immB == b.immB;
    }
@ -585,47 +590,47 @@ namespace skvm {
    #ifdef SK_DEBUG
        int imm;
        if (this->allImm(cond.id,&imm)) { SkASSERT(imm); return; }
-        (void)push(Op::assert_true, cond.id,debug.id,NA);
+        (void)push(Op::assert_true, cond.id, debug.id);
    #endif
    }

-    void Builder::store8 (Ptr ptr, I32 val) { (void)push(Op::store8 , val.id,NA,NA, ptr.ix); }
-    void Builder::store16(Ptr ptr, I32 val) { (void)push(Op::store16, val.id,NA,NA, ptr.ix); }
-    void Builder::store32(Ptr ptr, I32 val) { (void)push(Op::store32, val.id,NA,NA, ptr.ix); }
+    void Builder::store8 (Ptr ptr, I32 val) { (void)push(Op::store8 , val.id,NA,NA,NA, ptr.ix); }
+    void Builder::store16(Ptr ptr, I32 val) { (void)push(Op::store16, val.id,NA,NA,NA, ptr.ix); }
+    void Builder::store32(Ptr ptr, I32 val) { (void)push(Op::store32, val.id,NA,NA,NA, ptr.ix); }
    void Builder::store64(Ptr ptr, I32 lo, I32 hi) {
-        (void)push(Op::store64, lo.id,hi.id,NA, ptr.ix);
+        (void)push(Op::store64, lo.id,hi.id,NA,NA, ptr.ix);
    }
    void Builder::store128(Ptr ptr, I32 lo, I32 hi, int lane) {
-        (void)push(Op::store128, lo.id,hi.id,NA, ptr.ix,lane);
+        (void)push(Op::store128, lo.id,hi.id,NA,NA, ptr.ix,lane);
    }

-    I32 Builder::index() { return {this, push(Op::index , NA,NA,NA,0) }; }
+    I32 Builder::index() { return {this, push(Op::index)}; }

-    I32 Builder::load8 (Ptr ptr) { return {this, push(Op::load8 , NA,NA,NA, ptr.ix) }; }
-    I32 Builder::load16(Ptr ptr) { return {this, push(Op::load16, NA,NA,NA, ptr.ix) }; }
-    I32 Builder::load32(Ptr ptr) { return {this, push(Op::load32, NA,NA,NA, ptr.ix) }; }
+    I32 Builder::load8 (Ptr ptr) { return {this, push(Op::load8 , NA,NA,NA,NA, ptr.ix) }; }
+    I32 Builder::load16(Ptr ptr) { return {this, push(Op::load16, NA,NA,NA,NA, ptr.ix) }; }
+    I32 Builder::load32(Ptr ptr) { return {this, push(Op::load32, NA,NA,NA,NA, ptr.ix) }; }
    I32 Builder::load64(Ptr ptr, int lane) {
-        return {this, push(Op::load64 , NA,NA,NA, ptr.ix,lane) };
+        return {this, push(Op::load64 , NA,NA,NA,NA, ptr.ix,lane) };
    }
    I32 Builder::load128(Ptr ptr, int lane) {
-        return {this, push(Op::load128, NA,NA,NA, ptr.ix,lane) };
+        return {this, push(Op::load128, NA,NA,NA,NA, ptr.ix,lane) };
    }

    I32 Builder::gather8 (Ptr ptr, int offset, I32 index) {
-        return {this, push(Op::gather8 , index.id,NA,NA, ptr.ix,offset)};
+        return {this, push(Op::gather8 , index.id,NA,NA,NA, ptr.ix,offset)};
    }
    I32 Builder::gather16(Ptr ptr, int offset, I32 index) {
-        return {this, push(Op::gather16, index.id,NA,NA, ptr.ix,offset)};
+        return {this, push(Op::gather16, index.id,NA,NA,NA, ptr.ix,offset)};
    }
    I32 Builder::gather32(Ptr ptr, int offset, I32 index) {
-        return {this, push(Op::gather32, index.id,NA,NA, ptr.ix,offset)};
+        return {this, push(Op::gather32, index.id,NA,NA,NA, ptr.ix,offset)};
    }

    I32 Builder::uniform32(Ptr ptr, int offset) {
-        return {this, push(Op::uniform32, NA,NA,NA, ptr.ix, offset)};
+        return {this, push(Op::uniform32, NA,NA,NA,NA, ptr.ix, offset)};
    }

-    I32 Builder::splat(int n) { return {this, push(Op::splat    , NA,NA,NA, n) }; }
+    I32 Builder::splat(int n) { return {this, push(Op::splat, NA,NA,NA,NA, n) }; }

    // Be careful peepholing float math!  Transformations you might expect to
    // be legal can fail in the face of NaN/Inf, e.g. 0*x is not always 0.
@ -687,7 +692,7 @@ namespace skvm {

    F32 Builder::sqrt(F32 x) {
        if (float X; this->allImm(x.id,&X)) { return splat(std::sqrt(X)); }
-        return {this, this->push(Op::sqrt_f32, x.id,NA,NA)};
+        return {this, this->push(Op::sqrt_f32, x.id)};
    }

    // See http://www.machinedlearnings.com/2011/06/fast-approximate-logarithm-exponential.html.
@ -880,17 +885,17 @@ namespace skvm {
    I32 Builder::shl(I32 x, int bits) {
        if (bits == 0) { return x; }
        if (int X; this->allImm(x.id,&X)) { return splat(X << bits); }
-        return {this, this->push(Op::shl_i32, x.id,NA,NA, bits)};
+        return {this, this->push(Op::shl_i32, x.id,NA,NA,NA, bits)};
    }
    I32 Builder::shr(I32 x, int bits) {
        if (bits == 0) { return x; }
        if (int X; this->allImm(x.id,&X)) { return splat(unsigned(X) >> bits); }
-        return {this, this->push(Op::shr_i32, x.id,NA,NA, bits)};
+        return {this, this->push(Op::shr_i32, x.id,NA,NA,NA, bits)};
    }
    I32 Builder::sra(I32 x, int bits) {
        if (bits == 0) { return x; }
        if (int X; this->allImm(x.id,&X)) { return splat(X >> bits); }
-        return {this, this->push(Op::sra_i32, x.id,NA,NA, bits)};
+        return {this, this->push(Op::sra_i32, x.id,NA,NA,NA, bits)};
    }

    I32 Builder:: eq(F32 x, F32 y) {
@ -2469,7 +2474,7 @@ namespace skvm {
        std::vector<llvm::Value*> vals(instructions.size());

        auto emit = [&](size_t i, bool scalar, IRBuilder* b) {
-            auto [op, x,y,z, immA,immB, death,can_hoist] = instructions[i];
+            auto [op, x,y,z,w, immA,immB, death,can_hoist] = instructions[i];

            llvm::Type *i1    = llvm::Type::getInt1Ty (*ctx),
                       *i8    = llvm::Type::getInt8Ty (*ctx),
@ -2968,6 +2973,7 @@ namespace skvm {
                lookup_register(inst.x),
                lookup_register(inst.y),
                lookup_register(inst.z),
+                lookup_register(inst.w),
                inst.immA,
                inst.immB,
            };
@ -3191,7 +3197,8 @@ namespace skvm {
            const Op op = inst.op;
            const Val x = inst.x,
                      y = inst.y,
-                      z = inst.z;
+                      z = inst.z,
+                      w = inst.w;
            const int immA = inst.immA,
                      immB = inst.immB;

@ -3241,17 +3248,19 @@ namespace skvm {
                regs[r] = NA;
            };

-            // Which register holds dst,x,y,z for this instruction?  NA if none does yet.
+            // Which register holds dst,x,y,z,w for this instruction?  NA if none does yet.
            int rd = NA,
                rx = NA,
                ry = NA,
-                rz = NA;
+                rz = NA,
+                rw = NA;

            auto update_regs = [&](Reg r, Val v) {
                if (v == id) { rd = r; }
                if (v ==  x) { rx = r; }
                if (v ==  y) { ry = r; }
                if (v ==  z) { rz = r; }
+                if (v ==  w) { rw = r; }
                return r;
            };

@ -3261,6 +3270,7 @@ namespace skvm {
                if (v ==  x && rx != NA) { return rx; }
                if (v ==  y && ry != NA) { return ry; }
                if (v ==  z && rz != NA) { return rz; }
+                if (v ==  w && rw != NA) { return rw; }

                // Search inter-instruction register map.
                for (auto [r,val] : SkMakeEnumerate(regs)) {
@ -3912,6 +3922,7 @@ namespace skvm {
            if (rx != NA && regs[rx] != NA && dies_here(regs[rx])) { regs[rx] = NA; }
            if (ry != NA && regs[ry] != NA && dies_here(regs[ry])) { regs[ry] = NA; }
            if (rz != NA && regs[rz] != NA && dies_here(regs[rz])) { regs[rz] = NA; }
+            if (rw != NA && regs[rw] != NA && dies_here(regs[rw])) { regs[rw] = NA; }
            return true;
        };

--- a/src/core/SkVM.h
+++ b/src/core/SkVM.h
@ -546,9 +546,9 @@ namespace skvm {

    SK_BEGIN_REQUIRE_DENSE
    struct Instruction {
-        Op  op;         // v* = op(x,y,z,imm), where * == index of this Instruction.
-        Val x,y,z;      // Enough arguments for mad().
-        int immA,immB;  // Immediate bit pattern, shift count, argument index, etc.
+        Op  op;         // v* = op(x,y,z,w,immA,immB), where * == index of this Instruction.
+        Val x,y,z,w;    // Enough arguments for Op::store128.
+        int immA,immB;  // Immediate bit pattern, shift count, pointer index, byte offset, etc.
    };
    SK_END_REQUIRE_DENSE

@ -559,7 +559,7 @@ namespace skvm {

    struct OptimizedInstruction {
        Op op;
-        Val x,y,z;
+        Val x,y,z,w;
        int immA,immB;

        Val  death;
@ -898,8 +898,8 @@ namespace skvm {

        Val push(Instruction);
    private:
-        Val push(Op op, Val x, Val y=NA, Val z=NA, int immA=0, int immB=0) {
-            return this->push(Instruction{op, x,y,z, immA,immB});
+        Val push(Op op, Val x=NA, Val y=NA, Val z=NA, Val w=NA, int immA=0, int immB=0) {
+            return this->push(Instruction{op, x,y,z,w, immA,immB});
        }

        bool allImm() const;
@ -926,10 +926,10 @@ namespace skvm {

    using Reg = int;

-    // d = op(x, y/imm, z/imm)
+    // d = op(x,y,z,w, immA,immB)
    struct InterpreterInstruction {
        Op  op;
-        Reg d,x,y,z;
+        Reg d,x,y,z,w;
        int immA,immB;
    };

--- a/src/opts/SkVM_opts.h
+++ b/src/opts/SkVM_opts.h
@ -86,11 +86,13 @@ namespace SK_OPTS_NS {
            for (int i = start; i < ninsts; i++) {
                InterpreterInstruction inst = insts[i];

-                // d = op(x,y/imm,z/imm)
+                // d = op(x,y,z,w, immA,immB)
                Reg   d = inst.d,
                      x = inst.x,
                      y = inst.y,
-                      z = inst.z;
+                      z = inst.z,
+                      w = inst.w;
+                (void)w;  // TODO: use in store128
                int immA = inst.immA,
                    immB = inst.immB;