remove i16x2 ops

These are neat but mostly just a distraction for now.
I've left all the assembly in place and unit tested
to make putting these back easy when we want to.

Change-Id: Id2bd05eca363baf9c4e31125ee79e722ded54cb7
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/283307
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Mike Klein 2020-04-13 13:26:45 -05:00 committed by Skia Commit-Bot
parent cb5110443f
commit 45d9cc86b3
8 changed files with 8 additions and 394 deletions

View File

@ -12,8 +12,8 @@
namespace {
enum Mode {Opts, RP, F32, I32_Naive, I32, I32_SWAR};
static const char* kMode_name[] = { "Opts", "RP","F32", "I32_Naive", "I32", "I32_SWAR" };
enum Mode {Opts, RP, F32, I32_Naive};
static const char* kMode_name[] = { "Opts", "RP","F32", "I32_Naive" };
}
@ -36,8 +36,6 @@ private:
if (fMode == F32 ) { fProgram = SrcoverBuilder_F32 {}.done(); }
if (fMode == I32_Naive) { fProgram = SrcoverBuilder_I32_Naive{}.done(); }
if (fMode == I32 ) { fProgram = SrcoverBuilder_I32 {}.done(); }
if (fMode == I32_SWAR ) { fProgram = SrcoverBuilder_I32_SWAR {}.done(); }
if (fMode == RP) {
fSrcCtx = { fSrc.data(), 0 };
@ -111,22 +109,6 @@ DEF_BENCH(return (new SkVMBench{ 256, I32_Naive});)
DEF_BENCH(return (new SkVMBench{1024, I32_Naive});)
DEF_BENCH(return (new SkVMBench{4096, I32_Naive});)
DEF_BENCH(return (new SkVMBench{ 1, I32});)
DEF_BENCH(return (new SkVMBench{ 4, I32});)
DEF_BENCH(return (new SkVMBench{ 15, I32});)
DEF_BENCH(return (new SkVMBench{ 63, I32});)
DEF_BENCH(return (new SkVMBench{ 256, I32});)
DEF_BENCH(return (new SkVMBench{1024, I32});)
DEF_BENCH(return (new SkVMBench{4096, I32});)
DEF_BENCH(return (new SkVMBench{ 1, I32_SWAR});)
DEF_BENCH(return (new SkVMBench{ 4, I32_SWAR});)
DEF_BENCH(return (new SkVMBench{ 15, I32_SWAR});)
DEF_BENCH(return (new SkVMBench{ 63, I32_SWAR});)
DEF_BENCH(return (new SkVMBench{ 256, I32_SWAR});)
DEF_BENCH(return (new SkVMBench{1024, I32_SWAR});)
DEF_BENCH(return (new SkVMBench{4096, I32_SWAR});)
class SkVM_Overhead : public Benchmark {
public:
explicit SkVM_Overhead(bool rp) : fRP(rp) {}

View File

@ -606,122 +606,6 @@ loop:
31 r7 = pack r6 r7 16
32 store32 arg(1) r7
I32 8888 over 8888
33 values (originally 33):
v0 = load32 arg(0)
v1 = shr_i32 v0 24
↑ v2 = splat 100 (3.5873241e-43)
v3 = sub_i32 v2 v1
v4 = load32 arg(1)
v5 = shr_i32 v4 16
↑ v6 = splat FF (3.5733111e-43)
v7 = bit_and v6 v5
v8 = mul_i16x2 v7 v3
v9 = shr_i32 v8 8
v10 = shr_i32 v0 16
v11 = bit_and v6 v10
v12 = add_i32 v11 v9
v13 = shr_i32 v4 24
v14 = mul_i16x2 v13 v3
v15 = shr_i32 v14 8
v16 = add_i32 v1 v15
v17 = pack v12 v16 8
v18 = shr_i32 v4 8
v19 = bit_and v6 v18
v20 = mul_i16x2 v19 v3
v21 = shr_i32 v20 8
v22 = shr_i32 v0 8
v23 = bit_and v6 v22
v24 = add_i32 v23 v21
v25 = bit_and v6 v4
v26 = mul_i16x2 v25 v3
v27 = shr_i32 v26 8
v28 = bit_and v6 v0
v29 = add_i32 v28 v27
v30 = pack v29 v24 8
v31 = pack v30 v17 16
store32 arg(1) v31
8 registers, 33 instructions:
0 r0 = splat 100 (3.5873241e-43)
1 r1 = splat FF (3.5733111e-43)
loop:
2 r2 = load32 arg(0)
3 r3 = shr_i32 r2 24
4 r4 = sub_i32 r0 r3
5 r5 = load32 arg(1)
6 r6 = shr_i32 r5 16
7 r6 = bit_and r1 r6
8 r6 = mul_i16x2 r6 r4
9 r6 = shr_i32 r6 8
10 r7 = shr_i32 r2 16
11 r7 = bit_and r1 r7
12 r6 = add_i32 r7 r6
13 r7 = shr_i32 r5 24
14 r7 = mul_i16x2 r7 r4
15 r7 = shr_i32 r7 8
16 r7 = add_i32 r3 r7
17 r7 = pack r6 r7 8
18 r6 = shr_i32 r5 8
19 r6 = bit_and r1 r6
20 r6 = mul_i16x2 r6 r4
21 r6 = shr_i32 r6 8
22 r3 = shr_i32 r2 8
23 r3 = bit_and r1 r3
24 r6 = add_i32 r3 r6
25 r5 = bit_and r1 r5
26 r4 = mul_i16x2 r5 r4
27 r4 = shr_i32 r4 8
28 r2 = bit_and r1 r2
29 r4 = add_i32 r2 r4
30 r6 = pack r4 r6 8
31 r7 = pack r6 r7 16
32 store32 arg(1) r7
I32 (SWAR) 8888 over 8888
19 values (originally 20):
v0 = load32 arg(0)
v1 = shr_i32 v0 8
↑ v2 = splat FF0000 (2.3418052e-38)
v3 = bit_and v2 v1
v4 = shr_i32 v0 24
v5 = bit_or v4 v3
↑ v6 = splat 1000100 (2.3510604e-38)
v7 = sub_i16x2 v6 v5
v8 = load32 arg(1)
v9 = shr_i16x2 v8 8
v10 = mul_i16x2 v9 v7
↑ v11 = splat FF00FF (2.3418409e-38)
v12 = bit_clear v10 v11
v13 = bit_and v8 v11
v14 = mul_i16x2 v13 v7
v15 = shr_i16x2 v14 8
v16 = bit_or v15 v12
v17 = add_i32 v0 v16
store32 arg(1) v17
7 registers, 19 instructions:
0 r0 = splat FF0000 (2.3418052e-38)
1 r1 = splat 1000100 (2.3510604e-38)
2 r2 = splat FF00FF (2.3418409e-38)
loop:
3 r3 = load32 arg(0)
4 r4 = shr_i32 r3 8
5 r4 = bit_and r0 r4
6 r5 = shr_i32 r3 24
7 r4 = bit_or r5 r4
8 r4 = sub_i16x2 r1 r4
9 r5 = load32 arg(1)
10 r6 = shr_i16x2 r5 8
11 r6 = mul_i16x2 r6 r4
12 r6 = bit_clear r6 r2
13 r5 = bit_and r5 r2
14 r4 = mul_i16x2 r5 r4
15 r4 = shr_i16x2 r4 8
16 r6 = bit_or r4 r6
17 r6 = add_i32 r3 r6
18 store32 arg(1) r6
23 values (originally 23):
v0 = load32 arg(1)
v1 = shr_i32 v0 24

View File

@ -271,17 +271,6 @@ namespace skvm {
case Op:: eq_i32: write(o, V{id}, "=", op, V{x}, V{y}); break;
case Op:: gt_i32: write(o, V{id}, "=", op, V{x}, V{y}); break;
case Op::add_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
case Op::sub_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
case Op::mul_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
case Op::shl_i16x2: write(o, V{id}, "=", op, V{x}, Shift{immy}); break;
case Op::shr_i16x2: write(o, V{id}, "=", op, V{x}, Shift{immy}); break;
case Op::sra_i16x2: write(o, V{id}, "=", op, V{x}, Shift{immy}); break;
case Op:: eq_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
case Op:: gt_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
case Op::bit_and : write(o, V{id}, "=", op, V{x}, V{y} ); break;
case Op::bit_or : write(o, V{id}, "=", op, V{x}, V{y} ); break;
case Op::bit_xor : write(o, V{id}, "=", op, V{x}, V{y} ); break;
@ -384,18 +373,6 @@ namespace skvm {
case Op:: eq_i32: write(o, R{d}, "=", op, R{x}, R{y}); break;
case Op:: gt_i32: write(o, R{d}, "=", op, R{x}, R{y}); break;
case Op::add_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
case Op::sub_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
case Op::mul_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
case Op::shl_i16x2: write(o, R{d}, "=", op, R{x}, Shift{immy}); break;
case Op::shr_i16x2: write(o, R{d}, "=", op, R{x}, Shift{immy}); break;
case Op::sra_i16x2: write(o, R{d}, "=", op, R{x}, Shift{immy}); break;
case Op:: eq_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
case Op:: gt_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
case Op::bit_and : write(o, R{d}, "=", op, R{x}, R{y} ); break;
case Op::bit_or : write(o, R{d}, "=", op, R{x}, R{y} ); break;
case Op::bit_xor : write(o, R{d}, "=", op, R{x}, R{y} ); break;
@ -1028,10 +1005,6 @@ namespace skvm {
return {this, this->push(Op::mul_i32, x.id, y.id)};
}
I32 Builder::add_16x2(I32 x, I32 y) { return {this, this->push(Op::add_i16x2, x.id, y.id)}; }
I32 Builder::sub_16x2(I32 x, I32 y) { return {this, this->push(Op::sub_i16x2, x.id, y.id)}; }
I32 Builder::mul_16x2(I32 x, I32 y) { return {this, this->push(Op::mul_i16x2, x.id, y.id)}; }
I32 Builder::shl(I32 x, int bits) {
if (bits == 0) { return x; }
if (int X; this->allImm(x.id,&X)) { return splat(X << bits); }
@ -1048,10 +1021,6 @@ namespace skvm {
return {this, this->push(Op::sra_i32, x.id,NA,NA, bits)};
}
I32 Builder::shl_16x2(I32 x, int k) { return {this, this->push(Op::shl_i16x2, x.id,NA,NA, k)}; }
I32 Builder::shr_16x2(I32 x, int k) { return {this, this->push(Op::shr_i16x2, x.id,NA,NA, k)}; }
I32 Builder::sra_16x2(I32 x, int k) { return {this, this->push(Op::sra_i16x2, x.id,NA,NA, k)}; }
I32 Builder:: eq(F32 x, F32 y) {
if (float X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X==Y ? ~0 : 0); }
return {this, this->push(Op::eq_f32, x.id, y.id)};
@ -1094,15 +1063,6 @@ namespace skvm {
I32 Builder:: lt(I32 x, I32 y) { return y>x; }
I32 Builder::lte(I32 x, I32 y) { return y>=x; }
I32 Builder:: eq_16x2(I32 x, I32 y) { return {this, this->push(Op:: eq_i16x2, x.id, y.id)}; }
I32 Builder:: gt_16x2(I32 x, I32 y) { return {this, this->push(Op:: gt_i16x2, x.id, y.id)}; }
I32 Builder::neq_16x2(I32 x, I32 y) { return ~eq_16x2(x,y); }
I32 Builder::gte_16x2(I32 x, I32 y) { return ~lt_16x2(x,y); }
I32 Builder:: lt_16x2(I32 x, I32 y) { return gt_16x2(y,x); }
I32 Builder::lte_16x2(I32 x, I32 y) { return gte_16x2(y,x); }
I32 Builder::bit_and(I32 x, I32 y) {
if (x.id == y.id) { return x; }
if (int X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X&Y); }
@ -2461,18 +2421,15 @@ namespace skvm {
llvm::Type *i1 = llvm::Type::getInt1Ty (*ctx),
*i8 = llvm::Type::getInt8Ty (*ctx),
*i16 = llvm::Type::getInt16Ty(*ctx),
*i16x2 = llvm::VectorType::get(i16, 2),
*f32 = llvm::Type::getFloatTy(*ctx),
*I1 = scalar ? i1 : llvm::VectorType::get(i1 , K ),
*I8 = scalar ? i8 : llvm::VectorType::get(i8 , K ),
*I16 = scalar ? i16 : llvm::VectorType::get(i16, K ),
*I16x2 = scalar ? i16x2 : llvm::VectorType::get(i16, K*2),
*I32 = scalar ? i32 : llvm::VectorType::get(i32, K ),
*F32 = scalar ? f32 : llvm::VectorType::get(f32, K );
auto I = [&](llvm::Value* v) { return b->CreateBitCast(v, I32 ); };
auto F = [&](llvm::Value* v) { return b->CreateBitCast(v, F32 ); };
auto x2 = [&](llvm::Value* v) { return b->CreateBitCast(v, I16x2); };
auto S = [&](llvm::Type* dst, llvm::Value* v) { return b->CreateSExt(v, dst); };
@ -2639,20 +2596,6 @@ namespace skvm {
#endif
} break;
case Op::add_i16x2: vals[i] = I(b->CreateAdd(x2(vals[x]), x2(vals[y]))); break;
case Op::sub_i16x2: vals[i] = I(b->CreateSub(x2(vals[x]), x2(vals[y]))); break;
case Op::mul_i16x2: vals[i] = I(b->CreateMul(x2(vals[x]), x2(vals[y]))); break;
case Op::shl_i16x2: vals[i] = I(b->CreateShl (x2(vals[x]), immy)); break;
case Op::sra_i16x2: vals[i] = I(b->CreateAShr(x2(vals[x]), immy)); break;
case Op::shr_i16x2: vals[i] = I(b->CreateLShr(x2(vals[x]), immy)); break;
case Op:: eq_i16x2:
vals[i] = I(S(I16x2, b->CreateICmpEQ (x2(vals[x]), x2(vals[y]))));
break;
case Op:: gt_i16x2:
vals[i] = I(S(I16x2, b->CreateICmpSGT(x2(vals[x]), x2(vals[y]))));
break;
}
return true;
};
@ -3357,10 +3300,6 @@ namespace skvm {
case Op::sub_i32: a->vpsubd (dst(), r[x], r[y]); break;
case Op::mul_i32: a->vpmulld(dst(), r[x], r[y]); break;
case Op::sub_i16x2: a->vpsubw (dst(), r[x], r[y]); break;
case Op::mul_i16x2: a->vpmullw(dst(), r[x], r[y]); break;
case Op::shr_i16x2: a->vpsrlw (dst(), r[x], immy); break;
case Op::bit_and : a->vpand (dst(), r[x], r[y]); break;
case Op::bit_or : a->vpor (dst(), r[x], r[y]); break;
case Op::bit_xor : a->vpxor (dst(), r[x], r[y]); break;
@ -3480,10 +3419,6 @@ namespace skvm {
case Op::sub_i32: a->sub4s(dst(), r[x], r[y]); break;
case Op::mul_i32: a->mul4s(dst(), r[x], r[y]); break;
case Op::sub_i16x2: a->sub8h (dst(), r[x], r[y]); break;
case Op::mul_i16x2: a->mul8h (dst(), r[x], r[y]); break;
case Op::shr_i16x2: a->ushr8h(dst(), r[x], immy); break;
case Op::bit_and : a->and16b(dst(), r[x], r[y]); break;
case Op::bit_or : a->orr16b(dst(), r[x], r[y]); break;
case Op::bit_xor : a->eor16b(dst(), r[x], r[y]); break;

View File

@ -310,26 +310,24 @@ namespace skvm {
M(gather8) M(gather16) M(gather32) \
M(uniform8) M(uniform16) M(uniform32) \
M(splat) \
M(add_f32) M(add_i32) M(add_i16x2) \
M(sub_f32) M(sub_i32) M(sub_i16x2) \
M(mul_f32) M(mul_i32) M(mul_i16x2) \
M(add_f32) M(add_i32) \
M(sub_f32) M(sub_i32) \
M(mul_f32) M(mul_i32) \
M(div_f32) \
M(min_f32) \
M(max_f32) \
M(fma_f32) M(fms_f32) M(fnma_f32) \
M(sqrt_f32) \
M(shl_i32) M(shl_i16x2) \
M(shr_i32) M(shr_i16x2) \
M(sra_i32) M(sra_i16x2) \
M(shl_i32) M(shr_i32) M(sra_i32) \
M(add_f32_imm) \
M(sub_f32_imm) \
M(mul_f32_imm) \
M(min_f32_imm) \
M(max_f32_imm) \
M(floor) M(trunc) M(round) M(to_f32) \
M( eq_f32) M( eq_i32) M( eq_i16x2) \
M( eq_f32) M( eq_i32) \
M(neq_f32) \
M( gt_f32) M( gt_i32) M( gt_i16x2) \
M( gt_f32) M( gt_i32) \
M(gte_f32) \
M(bit_and) \
M(bit_or) \
@ -623,22 +621,6 @@ namespace skvm {
F32 to_f32(I32 x);
F32 bit_cast(I32 x) { return {x.builder, x.id}; }
// Treat each 32-bit lane as a pair of 16-bit ints.
I32 add_16x2(I32, I32); I32 add_16x2(I32a x, I32a y) { return add_16x2(_(x), _(y)); }
I32 sub_16x2(I32, I32); I32 sub_16x2(I32a x, I32a y) { return sub_16x2(_(x), _(y)); }
I32 mul_16x2(I32, I32); I32 mul_16x2(I32a x, I32a y) { return mul_16x2(_(x), _(y)); }
I32 shl_16x2(I32 x, int bits);
I32 shr_16x2(I32 x, int bits);
I32 sra_16x2(I32 x, int bits);
I32 eq_16x2(I32, I32); I32 eq_16x2(I32a x, I32a y) { return eq_16x2(_(x), _(y)); }
I32 neq_16x2(I32, I32); I32 neq_16x2(I32a x, I32a y) { return neq_16x2(_(x), _(y)); }
I32 lt_16x2(I32, I32); I32 lt_16x2(I32a x, I32a y) { return lt_16x2(_(x), _(y)); }
I32 lte_16x2(I32, I32); I32 lte_16x2(I32a x, I32a y) { return lte_16x2(_(x), _(y)); }
I32 gt_16x2(I32, I32); I32 gt_16x2(I32a x, I32a y) { return gt_16x2(_(x), _(y)); }
I32 gte_16x2(I32, I32); I32 gte_16x2(I32a x, I32a y) { return gte_16x2(_(x), _(y)); }
// Bitwise operations.
I32 bit_and (I32, I32); I32 bit_and (I32a x, I32a y) { return bit_and (_(x), _(y)); }
I32 bit_or (I32, I32); I32 bit_or (I32a x, I32a y) { return bit_or (_(x), _(y)); }

View File

@ -28,15 +28,10 @@ namespace SK_OPTS_NS {
using U16 = skvx::Vec<K, uint16_t>;
using U8 = skvx::Vec<K, uint8_t>;
using I16x2 = skvx::Vec<2*K, int16_t>;
using U16x2 = skvx::Vec<2*K, uint16_t>;
union Slot {
F32 f32;
I32 i32;
U32 u32;
I16x2 i16x2;
U16x2 u16x2;
};
Slot few_regs[16];
@ -222,18 +217,10 @@ namespace SK_OPTS_NS {
CASE(Op::sub_i32): r(d).i32 = r(x).i32 - r(y).i32; break;
CASE(Op::mul_i32): r(d).i32 = r(x).i32 * r(y).i32; break;
CASE(Op::add_i16x2): r(d).i16x2 = r(x).i16x2 + r(y).i16x2; break;
CASE(Op::sub_i16x2): r(d).i16x2 = r(x).i16x2 - r(y).i16x2; break;
CASE(Op::mul_i16x2): r(d).i16x2 = r(x).i16x2 * r(y).i16x2; break;
CASE(Op::shl_i32): r(d).i32 = r(x).i32 << immy; break;
CASE(Op::sra_i32): r(d).i32 = r(x).i32 >> immy; break;
CASE(Op::shr_i32): r(d).u32 = r(x).u32 >> immy; break;
CASE(Op::shl_i16x2): r(d).i16x2 = r(x).i16x2 << immy; break;
CASE(Op::sra_i16x2): r(d).i16x2 = r(x).i16x2 >> immy; break;
CASE(Op::shr_i16x2): r(d).u16x2 = r(x).u16x2 >> immy; break;
CASE(Op:: eq_f32): r(d).i32 = r(x).f32 == r(y).f32; break;
CASE(Op::neq_f32): r(d).i32 = r(x).f32 != r(y).f32; break;
CASE(Op:: gt_f32): r(d).i32 = r(x).f32 > r(y).f32; break;
@ -242,9 +229,6 @@ namespace SK_OPTS_NS {
CASE(Op:: eq_i32): r(d).i32 = r(x).i32 == r(y).i32; break;
CASE(Op:: gt_i32): r(d).i32 = r(x).i32 > r(y).i32; break;
CASE(Op:: eq_i16x2): r(d).i16x2 = r(x).i16x2 == r(y).i16x2; break;
CASE(Op:: gt_i16x2): r(d).i16x2 = r(x).i16x2 > r(y).i16x2; break;
CASE(Op::bit_and ): r(d).i32 = r(x).i32 & r(y).i32; break;
CASE(Op::bit_or ): r(d).i32 = r(x).i32 | r(y).i32; break;
CASE(Op::bit_xor ): r(d).i32 = r(x).i32 ^ r(y).i32; break;

View File

@ -75,16 +75,6 @@ DEF_TEST(SkVM, r) {
buf.writeText("I32 (Naive) 8888 over 8888\n");
dump(builder, &buf);
}
{
SrcoverBuilder_I32 builder;
buf.writeText("I32 8888 over 8888\n");
dump(builder, &buf);
}
{
SrcoverBuilder_I32_SWAR builder;
buf.writeText("I32 (SWAR) 8888 over 8888\n");
dump(builder, &buf);
}
{
// Demonstrate the value of program reordering.
@ -177,8 +167,6 @@ DEF_TEST(SkVM, r) {
test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
[&](const skvm::Program& program) {
@ -616,70 +604,6 @@ DEF_TEST(SkVM_index, r) {
});
}
DEF_TEST(SkVM_i16x2, r) {
skvm::Builder b;
{
skvm::Arg buf = b.varying<int>();
skvm::I32 x = b.load32(buf),
y = b.add_16x2(x,x), // y = 2x
z = b.mul_16x2(x,y), // z = 2x^2
w = b.sub_16x2(z,x), // w = x(2x-1)
v = b.shl_16x2(w,7), // These shifts will be a no-op
u = b.sra_16x2(v,7); // for all but x=12 and x=13.
b.store32(buf, u);
}
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
uint16_t buf[] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13 };
program.eval(SK_ARRAY_COUNT(buf)/2, buf);
for (int i = 0; i < 12; i++) {
REPORTER_ASSERT(r, buf[i] == i*(2*i-1));
}
REPORTER_ASSERT(r, buf[12] == 0xff14); // 12*23 = 0x114
REPORTER_ASSERT(r, buf[13] == 0xff45); // 13*25 = 0x145
});
}
DEF_TEST(SkVM_cmp_i16, r) {
skvm::Builder b;
{
skvm::Arg buf = b.varying<int>();
skvm::I32 x = b.load32(buf);
auto to_bit = [&](int shift, skvm::I32 mask) {
return b.shl_16x2(b.bit_and(mask, b.splat(0x0001'0001)), shift);
};
skvm::I32 m = b.splat(0);
m = b.bit_or(m, to_bit(0, b. eq_16x2(x, b.splat(0x0000'0000))));
m = b.bit_or(m, to_bit(1, b.neq_16x2(x, b.splat(0x0001'0001))));
m = b.bit_or(m, to_bit(2, b. lt_16x2(x, b.splat(0x0002'0002))));
m = b.bit_or(m, to_bit(3, b.lte_16x2(x, b.splat(0x0003'0003))));
m = b.bit_or(m, to_bit(4, b. gt_16x2(x, b.splat(0x0004'0004))));
m = b.bit_or(m, to_bit(5, b.gte_16x2(x, b.splat(0x0005'0005))));
b.store32(buf, m);
}
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
int16_t buf[] = { 0,1, 2,3, 4,5, 6,7, 8,9 };
program.eval(SK_ARRAY_COUNT(buf)/2, buf);
REPORTER_ASSERT(r, buf[0] == 0b001111);
REPORTER_ASSERT(r, buf[1] == 0b001100);
REPORTER_ASSERT(r, buf[2] == 0b001010);
REPORTER_ASSERT(r, buf[3] == 0b001010);
REPORTER_ASSERT(r, buf[4] == 0b000010);
for (int i = 5; i < (int)SK_ARRAY_COUNT(buf); i++) {
REPORTER_ASSERT(r, buf[i] == 0b110010);
}
});
}
DEF_TEST(SkVM_mad, r) {
// This program is designed to exercise the tricky corners of instruction
// and register selection for Op::mad_f32.

View File

@ -117,72 +117,3 @@ SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
r = pack(r, b, 16);
store32(dst, r);
}
SrcoverBuilder_I32::SrcoverBuilder_I32() {
skvm::Arg src = varying<int>(),
dst = varying<int>();
auto load = [&](skvm::Arg ptr,
skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
skvm::I32 rgba = load32(ptr);
*r = extract(rgba, 0, splat(0xff));
*g = extract(rgba, 8, splat(0xff));
*b = extract(rgba, 16, splat(0xff));
*a = extract(rgba, 24, splat(0xff));
};
skvm::I32 r,g,b,a;
load(src, &r,&g,&b,&a);
skvm::I32 dr,dg,db,da;
load(dst, &dr,&dg,&db,&da);
// (xy + x)/256 is a good approximation of (xy + 127)/255
//
// == (d*(255-a) + d)/256
// == (d*(255-a+1) )/256
// == (d*(256-a ) )/256
// We're doing 8x8 bit multiplies in 32-bit lanes.
// Since the inputs and results both fit in 16 bits,
// we can use mul_16x2, which tends to be faster than mul.
//
// (The top 2 zero bytes of the inputs will also multiply
// with each other to produce zero... perfect.)
skvm::I32 invA = sub(splat(256), a);
r = add(r, shr(mul_16x2(dr, invA), 8));
g = add(g, shr(mul_16x2(dg, invA), 8));
b = add(b, shr(mul_16x2(db, invA), 8));
a = add(a, shr(mul_16x2(da, invA), 8));
r = pack(r, g, 8);
b = pack(b, a, 8);
r = pack(r, b, 16);
store32(dst, r);
}
SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
skvm::Arg src = varying<int>(),
dst = varying<int>();
// The s += d*invA adds won't overflow,
// so we don't have to unpack s beyond grabbing the alpha channel.
skvm::I32 s = load32(src),
ax2 = extract(s, 24, splat(0x000000ff))
| extract(s, 8, splat(0x00ff0000));
// We'll use the same approximation math as above, this time making sure to
// use both i16 multiplies to our benefit, one for r/g, the other for b/a.
skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2);
skvm::I32 d = load32(dst),
rb = bit_and (d, splat(0x00ff00ff)),
ga = shr_16x2(d, 8);
rb = shr_16x2(mul_16x2(rb, invAx2), 8); // Put the high 8 bits back in the low lane.
ga = mul_16x2(ga, invAx2); // Keep the high 8 bits up high...
ga = bit_clear(ga, splat(0x00ff00ff)); // ...and mask off the low bits.
store32(dst, add(s, bit_or(rb, ga)));
}

View File

@ -22,12 +22,4 @@ struct SrcoverBuilder_I32_Naive : public skvm::Builder {
SrcoverBuilder_I32_Naive(); // 8888 over 8888
};
struct SrcoverBuilder_I32 : public skvm::Builder {
SrcoverBuilder_I32(); // 8888 over 8888
};
struct SrcoverBuilder_I32_SWAR : public skvm::Builder {
SrcoverBuilder_I32_SWAR(); // 8888 over 8888
};
#endif//SkVMBuilders_DEFINED