remove i16x2 ops
These are neat but mostly just a distraction for now. I've left all the assembly in place and unit tested to make putting these back easy when we want to. Change-Id: Id2bd05eca363baf9c4e31125ee79e722ded54cb7 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/283307 Commit-Queue: Mike Klein <mtklein@google.com> Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
parent
cb5110443f
commit
45d9cc86b3
@ -12,8 +12,8 @@
|
||||
|
||||
namespace {
|
||||
|
||||
enum Mode {Opts, RP, F32, I32_Naive, I32, I32_SWAR};
|
||||
static const char* kMode_name[] = { "Opts", "RP","F32", "I32_Naive", "I32", "I32_SWAR" };
|
||||
enum Mode {Opts, RP, F32, I32_Naive};
|
||||
static const char* kMode_name[] = { "Opts", "RP","F32", "I32_Naive" };
|
||||
|
||||
}
|
||||
|
||||
@ -36,8 +36,6 @@ private:
|
||||
|
||||
if (fMode == F32 ) { fProgram = SrcoverBuilder_F32 {}.done(); }
|
||||
if (fMode == I32_Naive) { fProgram = SrcoverBuilder_I32_Naive{}.done(); }
|
||||
if (fMode == I32 ) { fProgram = SrcoverBuilder_I32 {}.done(); }
|
||||
if (fMode == I32_SWAR ) { fProgram = SrcoverBuilder_I32_SWAR {}.done(); }
|
||||
|
||||
if (fMode == RP) {
|
||||
fSrcCtx = { fSrc.data(), 0 };
|
||||
@ -111,22 +109,6 @@ DEF_BENCH(return (new SkVMBench{ 256, I32_Naive});)
|
||||
DEF_BENCH(return (new SkVMBench{1024, I32_Naive});)
|
||||
DEF_BENCH(return (new SkVMBench{4096, I32_Naive});)
|
||||
|
||||
DEF_BENCH(return (new SkVMBench{ 1, I32});)
|
||||
DEF_BENCH(return (new SkVMBench{ 4, I32});)
|
||||
DEF_BENCH(return (new SkVMBench{ 15, I32});)
|
||||
DEF_BENCH(return (new SkVMBench{ 63, I32});)
|
||||
DEF_BENCH(return (new SkVMBench{ 256, I32});)
|
||||
DEF_BENCH(return (new SkVMBench{1024, I32});)
|
||||
DEF_BENCH(return (new SkVMBench{4096, I32});)
|
||||
|
||||
DEF_BENCH(return (new SkVMBench{ 1, I32_SWAR});)
|
||||
DEF_BENCH(return (new SkVMBench{ 4, I32_SWAR});)
|
||||
DEF_BENCH(return (new SkVMBench{ 15, I32_SWAR});)
|
||||
DEF_BENCH(return (new SkVMBench{ 63, I32_SWAR});)
|
||||
DEF_BENCH(return (new SkVMBench{ 256, I32_SWAR});)
|
||||
DEF_BENCH(return (new SkVMBench{1024, I32_SWAR});)
|
||||
DEF_BENCH(return (new SkVMBench{4096, I32_SWAR});)
|
||||
|
||||
class SkVM_Overhead : public Benchmark {
|
||||
public:
|
||||
explicit SkVM_Overhead(bool rp) : fRP(rp) {}
|
||||
|
@ -606,122 +606,6 @@ loop:
|
||||
31 r7 = pack r6 r7 16
|
||||
32 store32 arg(1) r7
|
||||
|
||||
I32 8888 over 8888
|
||||
33 values (originally 33):
|
||||
v0 = load32 arg(0)
|
||||
v1 = shr_i32 v0 24
|
||||
↑ v2 = splat 100 (3.5873241e-43)
|
||||
v3 = sub_i32 v2 v1
|
||||
v4 = load32 arg(1)
|
||||
v5 = shr_i32 v4 16
|
||||
↑ v6 = splat FF (3.5733111e-43)
|
||||
v7 = bit_and v6 v5
|
||||
v8 = mul_i16x2 v7 v3
|
||||
v9 = shr_i32 v8 8
|
||||
v10 = shr_i32 v0 16
|
||||
v11 = bit_and v6 v10
|
||||
v12 = add_i32 v11 v9
|
||||
v13 = shr_i32 v4 24
|
||||
v14 = mul_i16x2 v13 v3
|
||||
v15 = shr_i32 v14 8
|
||||
v16 = add_i32 v1 v15
|
||||
v17 = pack v12 v16 8
|
||||
v18 = shr_i32 v4 8
|
||||
v19 = bit_and v6 v18
|
||||
v20 = mul_i16x2 v19 v3
|
||||
v21 = shr_i32 v20 8
|
||||
v22 = shr_i32 v0 8
|
||||
v23 = bit_and v6 v22
|
||||
v24 = add_i32 v23 v21
|
||||
v25 = bit_and v6 v4
|
||||
v26 = mul_i16x2 v25 v3
|
||||
v27 = shr_i32 v26 8
|
||||
v28 = bit_and v6 v0
|
||||
v29 = add_i32 v28 v27
|
||||
v30 = pack v29 v24 8
|
||||
v31 = pack v30 v17 16
|
||||
store32 arg(1) v31
|
||||
|
||||
8 registers, 33 instructions:
|
||||
0 r0 = splat 100 (3.5873241e-43)
|
||||
1 r1 = splat FF (3.5733111e-43)
|
||||
loop:
|
||||
2 r2 = load32 arg(0)
|
||||
3 r3 = shr_i32 r2 24
|
||||
4 r4 = sub_i32 r0 r3
|
||||
5 r5 = load32 arg(1)
|
||||
6 r6 = shr_i32 r5 16
|
||||
7 r6 = bit_and r1 r6
|
||||
8 r6 = mul_i16x2 r6 r4
|
||||
9 r6 = shr_i32 r6 8
|
||||
10 r7 = shr_i32 r2 16
|
||||
11 r7 = bit_and r1 r7
|
||||
12 r6 = add_i32 r7 r6
|
||||
13 r7 = shr_i32 r5 24
|
||||
14 r7 = mul_i16x2 r7 r4
|
||||
15 r7 = shr_i32 r7 8
|
||||
16 r7 = add_i32 r3 r7
|
||||
17 r7 = pack r6 r7 8
|
||||
18 r6 = shr_i32 r5 8
|
||||
19 r6 = bit_and r1 r6
|
||||
20 r6 = mul_i16x2 r6 r4
|
||||
21 r6 = shr_i32 r6 8
|
||||
22 r3 = shr_i32 r2 8
|
||||
23 r3 = bit_and r1 r3
|
||||
24 r6 = add_i32 r3 r6
|
||||
25 r5 = bit_and r1 r5
|
||||
26 r4 = mul_i16x2 r5 r4
|
||||
27 r4 = shr_i32 r4 8
|
||||
28 r2 = bit_and r1 r2
|
||||
29 r4 = add_i32 r2 r4
|
||||
30 r6 = pack r4 r6 8
|
||||
31 r7 = pack r6 r7 16
|
||||
32 store32 arg(1) r7
|
||||
|
||||
I32 (SWAR) 8888 over 8888
|
||||
19 values (originally 20):
|
||||
v0 = load32 arg(0)
|
||||
v1 = shr_i32 v0 8
|
||||
↑ v2 = splat FF0000 (2.3418052e-38)
|
||||
v3 = bit_and v2 v1
|
||||
v4 = shr_i32 v0 24
|
||||
v5 = bit_or v4 v3
|
||||
↑ v6 = splat 1000100 (2.3510604e-38)
|
||||
v7 = sub_i16x2 v6 v5
|
||||
v8 = load32 arg(1)
|
||||
v9 = shr_i16x2 v8 8
|
||||
v10 = mul_i16x2 v9 v7
|
||||
↑ v11 = splat FF00FF (2.3418409e-38)
|
||||
v12 = bit_clear v10 v11
|
||||
v13 = bit_and v8 v11
|
||||
v14 = mul_i16x2 v13 v7
|
||||
v15 = shr_i16x2 v14 8
|
||||
v16 = bit_or v15 v12
|
||||
v17 = add_i32 v0 v16
|
||||
store32 arg(1) v17
|
||||
|
||||
7 registers, 19 instructions:
|
||||
0 r0 = splat FF0000 (2.3418052e-38)
|
||||
1 r1 = splat 1000100 (2.3510604e-38)
|
||||
2 r2 = splat FF00FF (2.3418409e-38)
|
||||
loop:
|
||||
3 r3 = load32 arg(0)
|
||||
4 r4 = shr_i32 r3 8
|
||||
5 r4 = bit_and r0 r4
|
||||
6 r5 = shr_i32 r3 24
|
||||
7 r4 = bit_or r5 r4
|
||||
8 r4 = sub_i16x2 r1 r4
|
||||
9 r5 = load32 arg(1)
|
||||
10 r6 = shr_i16x2 r5 8
|
||||
11 r6 = mul_i16x2 r6 r4
|
||||
12 r6 = bit_clear r6 r2
|
||||
13 r5 = bit_and r5 r2
|
||||
14 r4 = mul_i16x2 r5 r4
|
||||
15 r4 = shr_i16x2 r4 8
|
||||
16 r6 = bit_or r4 r6
|
||||
17 r6 = add_i32 r3 r6
|
||||
18 store32 arg(1) r6
|
||||
|
||||
23 values (originally 23):
|
||||
v0 = load32 arg(1)
|
||||
v1 = shr_i32 v0 24
|
||||
|
@ -271,17 +271,6 @@ namespace skvm {
|
||||
case Op:: eq_i32: write(o, V{id}, "=", op, V{x}, V{y}); break;
|
||||
case Op:: gt_i32: write(o, V{id}, "=", op, V{x}, V{y}); break;
|
||||
|
||||
case Op::add_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
|
||||
case Op::sub_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
|
||||
case Op::mul_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
|
||||
|
||||
case Op::shl_i16x2: write(o, V{id}, "=", op, V{x}, Shift{immy}); break;
|
||||
case Op::shr_i16x2: write(o, V{id}, "=", op, V{x}, Shift{immy}); break;
|
||||
case Op::sra_i16x2: write(o, V{id}, "=", op, V{x}, Shift{immy}); break;
|
||||
|
||||
case Op:: eq_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
|
||||
case Op:: gt_i16x2: write(o, V{id}, "=", op, V{x}, V{y}); break;
|
||||
|
||||
case Op::bit_and : write(o, V{id}, "=", op, V{x}, V{y} ); break;
|
||||
case Op::bit_or : write(o, V{id}, "=", op, V{x}, V{y} ); break;
|
||||
case Op::bit_xor : write(o, V{id}, "=", op, V{x}, V{y} ); break;
|
||||
@ -384,18 +373,6 @@ namespace skvm {
|
||||
case Op:: eq_i32: write(o, R{d}, "=", op, R{x}, R{y}); break;
|
||||
case Op:: gt_i32: write(o, R{d}, "=", op, R{x}, R{y}); break;
|
||||
|
||||
case Op::add_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
|
||||
case Op::sub_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
|
||||
case Op::mul_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
|
||||
|
||||
case Op::shl_i16x2: write(o, R{d}, "=", op, R{x}, Shift{immy}); break;
|
||||
case Op::shr_i16x2: write(o, R{d}, "=", op, R{x}, Shift{immy}); break;
|
||||
case Op::sra_i16x2: write(o, R{d}, "=", op, R{x}, Shift{immy}); break;
|
||||
|
||||
case Op:: eq_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
|
||||
case Op:: gt_i16x2: write(o, R{d}, "=", op, R{x}, R{y}); break;
|
||||
|
||||
|
||||
case Op::bit_and : write(o, R{d}, "=", op, R{x}, R{y} ); break;
|
||||
case Op::bit_or : write(o, R{d}, "=", op, R{x}, R{y} ); break;
|
||||
case Op::bit_xor : write(o, R{d}, "=", op, R{x}, R{y} ); break;
|
||||
@ -1028,10 +1005,6 @@ namespace skvm {
|
||||
return {this, this->push(Op::mul_i32, x.id, y.id)};
|
||||
}
|
||||
|
||||
I32 Builder::add_16x2(I32 x, I32 y) { return {this, this->push(Op::add_i16x2, x.id, y.id)}; }
|
||||
I32 Builder::sub_16x2(I32 x, I32 y) { return {this, this->push(Op::sub_i16x2, x.id, y.id)}; }
|
||||
I32 Builder::mul_16x2(I32 x, I32 y) { return {this, this->push(Op::mul_i16x2, x.id, y.id)}; }
|
||||
|
||||
I32 Builder::shl(I32 x, int bits) {
|
||||
if (bits == 0) { return x; }
|
||||
if (int X; this->allImm(x.id,&X)) { return splat(X << bits); }
|
||||
@ -1048,10 +1021,6 @@ namespace skvm {
|
||||
return {this, this->push(Op::sra_i32, x.id,NA,NA, bits)};
|
||||
}
|
||||
|
||||
I32 Builder::shl_16x2(I32 x, int k) { return {this, this->push(Op::shl_i16x2, x.id,NA,NA, k)}; }
|
||||
I32 Builder::shr_16x2(I32 x, int k) { return {this, this->push(Op::shr_i16x2, x.id,NA,NA, k)}; }
|
||||
I32 Builder::sra_16x2(I32 x, int k) { return {this, this->push(Op::sra_i16x2, x.id,NA,NA, k)}; }
|
||||
|
||||
I32 Builder:: eq(F32 x, F32 y) {
|
||||
if (float X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X==Y ? ~0 : 0); }
|
||||
return {this, this->push(Op::eq_f32, x.id, y.id)};
|
||||
@ -1094,15 +1063,6 @@ namespace skvm {
|
||||
I32 Builder:: lt(I32 x, I32 y) { return y>x; }
|
||||
I32 Builder::lte(I32 x, I32 y) { return y>=x; }
|
||||
|
||||
I32 Builder:: eq_16x2(I32 x, I32 y) { return {this, this->push(Op:: eq_i16x2, x.id, y.id)}; }
|
||||
I32 Builder:: gt_16x2(I32 x, I32 y) { return {this, this->push(Op:: gt_i16x2, x.id, y.id)}; }
|
||||
|
||||
I32 Builder::neq_16x2(I32 x, I32 y) { return ~eq_16x2(x,y); }
|
||||
I32 Builder::gte_16x2(I32 x, I32 y) { return ~lt_16x2(x,y); }
|
||||
|
||||
I32 Builder:: lt_16x2(I32 x, I32 y) { return gt_16x2(y,x); }
|
||||
I32 Builder::lte_16x2(I32 x, I32 y) { return gte_16x2(y,x); }
|
||||
|
||||
I32 Builder::bit_and(I32 x, I32 y) {
|
||||
if (x.id == y.id) { return x; }
|
||||
if (int X,Y; this->allImm(x.id,&X, y.id,&Y)) { return splat(X&Y); }
|
||||
@ -2461,18 +2421,15 @@ namespace skvm {
|
||||
llvm::Type *i1 = llvm::Type::getInt1Ty (*ctx),
|
||||
*i8 = llvm::Type::getInt8Ty (*ctx),
|
||||
*i16 = llvm::Type::getInt16Ty(*ctx),
|
||||
*i16x2 = llvm::VectorType::get(i16, 2),
|
||||
*f32 = llvm::Type::getFloatTy(*ctx),
|
||||
*I1 = scalar ? i1 : llvm::VectorType::get(i1 , K ),
|
||||
*I8 = scalar ? i8 : llvm::VectorType::get(i8 , K ),
|
||||
*I16 = scalar ? i16 : llvm::VectorType::get(i16, K ),
|
||||
*I16x2 = scalar ? i16x2 : llvm::VectorType::get(i16, K*2),
|
||||
*I32 = scalar ? i32 : llvm::VectorType::get(i32, K ),
|
||||
*F32 = scalar ? f32 : llvm::VectorType::get(f32, K );
|
||||
|
||||
auto I = [&](llvm::Value* v) { return b->CreateBitCast(v, I32 ); };
|
||||
auto F = [&](llvm::Value* v) { return b->CreateBitCast(v, F32 ); };
|
||||
auto x2 = [&](llvm::Value* v) { return b->CreateBitCast(v, I16x2); };
|
||||
|
||||
auto S = [&](llvm::Type* dst, llvm::Value* v) { return b->CreateSExt(v, dst); };
|
||||
|
||||
@ -2639,20 +2596,6 @@ namespace skvm {
|
||||
#endif
|
||||
} break;
|
||||
|
||||
case Op::add_i16x2: vals[i] = I(b->CreateAdd(x2(vals[x]), x2(vals[y]))); break;
|
||||
case Op::sub_i16x2: vals[i] = I(b->CreateSub(x2(vals[x]), x2(vals[y]))); break;
|
||||
case Op::mul_i16x2: vals[i] = I(b->CreateMul(x2(vals[x]), x2(vals[y]))); break;
|
||||
|
||||
case Op::shl_i16x2: vals[i] = I(b->CreateShl (x2(vals[x]), immy)); break;
|
||||
case Op::sra_i16x2: vals[i] = I(b->CreateAShr(x2(vals[x]), immy)); break;
|
||||
case Op::shr_i16x2: vals[i] = I(b->CreateLShr(x2(vals[x]), immy)); break;
|
||||
|
||||
case Op:: eq_i16x2:
|
||||
vals[i] = I(S(I16x2, b->CreateICmpEQ (x2(vals[x]), x2(vals[y]))));
|
||||
break;
|
||||
case Op:: gt_i16x2:
|
||||
vals[i] = I(S(I16x2, b->CreateICmpSGT(x2(vals[x]), x2(vals[y]))));
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
@ -3357,10 +3300,6 @@ namespace skvm {
|
||||
case Op::sub_i32: a->vpsubd (dst(), r[x], r[y]); break;
|
||||
case Op::mul_i32: a->vpmulld(dst(), r[x], r[y]); break;
|
||||
|
||||
case Op::sub_i16x2: a->vpsubw (dst(), r[x], r[y]); break;
|
||||
case Op::mul_i16x2: a->vpmullw(dst(), r[x], r[y]); break;
|
||||
case Op::shr_i16x2: a->vpsrlw (dst(), r[x], immy); break;
|
||||
|
||||
case Op::bit_and : a->vpand (dst(), r[x], r[y]); break;
|
||||
case Op::bit_or : a->vpor (dst(), r[x], r[y]); break;
|
||||
case Op::bit_xor : a->vpxor (dst(), r[x], r[y]); break;
|
||||
@ -3480,10 +3419,6 @@ namespace skvm {
|
||||
case Op::sub_i32: a->sub4s(dst(), r[x], r[y]); break;
|
||||
case Op::mul_i32: a->mul4s(dst(), r[x], r[y]); break;
|
||||
|
||||
case Op::sub_i16x2: a->sub8h (dst(), r[x], r[y]); break;
|
||||
case Op::mul_i16x2: a->mul8h (dst(), r[x], r[y]); break;
|
||||
case Op::shr_i16x2: a->ushr8h(dst(), r[x], immy); break;
|
||||
|
||||
case Op::bit_and : a->and16b(dst(), r[x], r[y]); break;
|
||||
case Op::bit_or : a->orr16b(dst(), r[x], r[y]); break;
|
||||
case Op::bit_xor : a->eor16b(dst(), r[x], r[y]); break;
|
||||
|
@ -310,26 +310,24 @@ namespace skvm {
|
||||
M(gather8) M(gather16) M(gather32) \
|
||||
M(uniform8) M(uniform16) M(uniform32) \
|
||||
M(splat) \
|
||||
M(add_f32) M(add_i32) M(add_i16x2) \
|
||||
M(sub_f32) M(sub_i32) M(sub_i16x2) \
|
||||
M(mul_f32) M(mul_i32) M(mul_i16x2) \
|
||||
M(add_f32) M(add_i32) \
|
||||
M(sub_f32) M(sub_i32) \
|
||||
M(mul_f32) M(mul_i32) \
|
||||
M(div_f32) \
|
||||
M(min_f32) \
|
||||
M(max_f32) \
|
||||
M(fma_f32) M(fms_f32) M(fnma_f32) \
|
||||
M(sqrt_f32) \
|
||||
M(shl_i32) M(shl_i16x2) \
|
||||
M(shr_i32) M(shr_i16x2) \
|
||||
M(sra_i32) M(sra_i16x2) \
|
||||
M(shl_i32) M(shr_i32) M(sra_i32) \
|
||||
M(add_f32_imm) \
|
||||
M(sub_f32_imm) \
|
||||
M(mul_f32_imm) \
|
||||
M(min_f32_imm) \
|
||||
M(max_f32_imm) \
|
||||
M(floor) M(trunc) M(round) M(to_f32) \
|
||||
M( eq_f32) M( eq_i32) M( eq_i16x2) \
|
||||
M( eq_f32) M( eq_i32) \
|
||||
M(neq_f32) \
|
||||
M( gt_f32) M( gt_i32) M( gt_i16x2) \
|
||||
M( gt_f32) M( gt_i32) \
|
||||
M(gte_f32) \
|
||||
M(bit_and) \
|
||||
M(bit_or) \
|
||||
@ -623,22 +621,6 @@ namespace skvm {
|
||||
F32 to_f32(I32 x);
|
||||
F32 bit_cast(I32 x) { return {x.builder, x.id}; }
|
||||
|
||||
// Treat each 32-bit lane as a pair of 16-bit ints.
|
||||
I32 add_16x2(I32, I32); I32 add_16x2(I32a x, I32a y) { return add_16x2(_(x), _(y)); }
|
||||
I32 sub_16x2(I32, I32); I32 sub_16x2(I32a x, I32a y) { return sub_16x2(_(x), _(y)); }
|
||||
I32 mul_16x2(I32, I32); I32 mul_16x2(I32a x, I32a y) { return mul_16x2(_(x), _(y)); }
|
||||
|
||||
I32 shl_16x2(I32 x, int bits);
|
||||
I32 shr_16x2(I32 x, int bits);
|
||||
I32 sra_16x2(I32 x, int bits);
|
||||
|
||||
I32 eq_16x2(I32, I32); I32 eq_16x2(I32a x, I32a y) { return eq_16x2(_(x), _(y)); }
|
||||
I32 neq_16x2(I32, I32); I32 neq_16x2(I32a x, I32a y) { return neq_16x2(_(x), _(y)); }
|
||||
I32 lt_16x2(I32, I32); I32 lt_16x2(I32a x, I32a y) { return lt_16x2(_(x), _(y)); }
|
||||
I32 lte_16x2(I32, I32); I32 lte_16x2(I32a x, I32a y) { return lte_16x2(_(x), _(y)); }
|
||||
I32 gt_16x2(I32, I32); I32 gt_16x2(I32a x, I32a y) { return gt_16x2(_(x), _(y)); }
|
||||
I32 gte_16x2(I32, I32); I32 gte_16x2(I32a x, I32a y) { return gte_16x2(_(x), _(y)); }
|
||||
|
||||
// Bitwise operations.
|
||||
I32 bit_and (I32, I32); I32 bit_and (I32a x, I32a y) { return bit_and (_(x), _(y)); }
|
||||
I32 bit_or (I32, I32); I32 bit_or (I32a x, I32a y) { return bit_or (_(x), _(y)); }
|
||||
|
@ -28,15 +28,10 @@ namespace SK_OPTS_NS {
|
||||
using U16 = skvx::Vec<K, uint16_t>;
|
||||
using U8 = skvx::Vec<K, uint8_t>;
|
||||
|
||||
using I16x2 = skvx::Vec<2*K, int16_t>;
|
||||
using U16x2 = skvx::Vec<2*K, uint16_t>;
|
||||
|
||||
union Slot {
|
||||
F32 f32;
|
||||
I32 i32;
|
||||
U32 u32;
|
||||
I16x2 i16x2;
|
||||
U16x2 u16x2;
|
||||
};
|
||||
|
||||
Slot few_regs[16];
|
||||
@ -222,18 +217,10 @@ namespace SK_OPTS_NS {
|
||||
CASE(Op::sub_i32): r(d).i32 = r(x).i32 - r(y).i32; break;
|
||||
CASE(Op::mul_i32): r(d).i32 = r(x).i32 * r(y).i32; break;
|
||||
|
||||
CASE(Op::add_i16x2): r(d).i16x2 = r(x).i16x2 + r(y).i16x2; break;
|
||||
CASE(Op::sub_i16x2): r(d).i16x2 = r(x).i16x2 - r(y).i16x2; break;
|
||||
CASE(Op::mul_i16x2): r(d).i16x2 = r(x).i16x2 * r(y).i16x2; break;
|
||||
|
||||
CASE(Op::shl_i32): r(d).i32 = r(x).i32 << immy; break;
|
||||
CASE(Op::sra_i32): r(d).i32 = r(x).i32 >> immy; break;
|
||||
CASE(Op::shr_i32): r(d).u32 = r(x).u32 >> immy; break;
|
||||
|
||||
CASE(Op::shl_i16x2): r(d).i16x2 = r(x).i16x2 << immy; break;
|
||||
CASE(Op::sra_i16x2): r(d).i16x2 = r(x).i16x2 >> immy; break;
|
||||
CASE(Op::shr_i16x2): r(d).u16x2 = r(x).u16x2 >> immy; break;
|
||||
|
||||
CASE(Op:: eq_f32): r(d).i32 = r(x).f32 == r(y).f32; break;
|
||||
CASE(Op::neq_f32): r(d).i32 = r(x).f32 != r(y).f32; break;
|
||||
CASE(Op:: gt_f32): r(d).i32 = r(x).f32 > r(y).f32; break;
|
||||
@ -242,9 +229,6 @@ namespace SK_OPTS_NS {
|
||||
CASE(Op:: eq_i32): r(d).i32 = r(x).i32 == r(y).i32; break;
|
||||
CASE(Op:: gt_i32): r(d).i32 = r(x).i32 > r(y).i32; break;
|
||||
|
||||
CASE(Op:: eq_i16x2): r(d).i16x2 = r(x).i16x2 == r(y).i16x2; break;
|
||||
CASE(Op:: gt_i16x2): r(d).i16x2 = r(x).i16x2 > r(y).i16x2; break;
|
||||
|
||||
CASE(Op::bit_and ): r(d).i32 = r(x).i32 & r(y).i32; break;
|
||||
CASE(Op::bit_or ): r(d).i32 = r(x).i32 | r(y).i32; break;
|
||||
CASE(Op::bit_xor ): r(d).i32 = r(x).i32 ^ r(y).i32; break;
|
||||
|
@ -75,16 +75,6 @@ DEF_TEST(SkVM, r) {
|
||||
buf.writeText("I32 (Naive) 8888 over 8888\n");
|
||||
dump(builder, &buf);
|
||||
}
|
||||
{
|
||||
SrcoverBuilder_I32 builder;
|
||||
buf.writeText("I32 8888 over 8888\n");
|
||||
dump(builder, &buf);
|
||||
}
|
||||
{
|
||||
SrcoverBuilder_I32_SWAR builder;
|
||||
buf.writeText("I32 (SWAR) 8888 over 8888\n");
|
||||
dump(builder, &buf);
|
||||
}
|
||||
|
||||
{
|
||||
// Demonstrate the value of program reordering.
|
||||
@ -177,8 +167,6 @@ DEF_TEST(SkVM, r) {
|
||||
|
||||
test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
|
||||
test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
|
||||
test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
|
||||
test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
|
||||
|
||||
test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
|
||||
[&](const skvm::Program& program) {
|
||||
@ -616,70 +604,6 @@ DEF_TEST(SkVM_index, r) {
|
||||
});
|
||||
}
|
||||
|
||||
DEF_TEST(SkVM_i16x2, r) {
|
||||
skvm::Builder b;
|
||||
{
|
||||
skvm::Arg buf = b.varying<int>();
|
||||
|
||||
skvm::I32 x = b.load32(buf),
|
||||
y = b.add_16x2(x,x), // y = 2x
|
||||
z = b.mul_16x2(x,y), // z = 2x^2
|
||||
w = b.sub_16x2(z,x), // w = x(2x-1)
|
||||
v = b.shl_16x2(w,7), // These shifts will be a no-op
|
||||
u = b.sra_16x2(v,7); // for all but x=12 and x=13.
|
||||
b.store32(buf, u);
|
||||
}
|
||||
|
||||
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
|
||||
uint16_t buf[] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13 };
|
||||
|
||||
program.eval(SK_ARRAY_COUNT(buf)/2, buf);
|
||||
for (int i = 0; i < 12; i++) {
|
||||
REPORTER_ASSERT(r, buf[i] == i*(2*i-1));
|
||||
}
|
||||
REPORTER_ASSERT(r, buf[12] == 0xff14); // 12*23 = 0x114
|
||||
REPORTER_ASSERT(r, buf[13] == 0xff45); // 13*25 = 0x145
|
||||
});
|
||||
}
|
||||
|
||||
DEF_TEST(SkVM_cmp_i16, r) {
|
||||
skvm::Builder b;
|
||||
{
|
||||
skvm::Arg buf = b.varying<int>();
|
||||
skvm::I32 x = b.load32(buf);
|
||||
|
||||
auto to_bit = [&](int shift, skvm::I32 mask) {
|
||||
return b.shl_16x2(b.bit_and(mask, b.splat(0x0001'0001)), shift);
|
||||
};
|
||||
|
||||
skvm::I32 m = b.splat(0);
|
||||
m = b.bit_or(m, to_bit(0, b. eq_16x2(x, b.splat(0x0000'0000))));
|
||||
m = b.bit_or(m, to_bit(1, b.neq_16x2(x, b.splat(0x0001'0001))));
|
||||
m = b.bit_or(m, to_bit(2, b. lt_16x2(x, b.splat(0x0002'0002))));
|
||||
m = b.bit_or(m, to_bit(3, b.lte_16x2(x, b.splat(0x0003'0003))));
|
||||
m = b.bit_or(m, to_bit(4, b. gt_16x2(x, b.splat(0x0004'0004))));
|
||||
m = b.bit_or(m, to_bit(5, b.gte_16x2(x, b.splat(0x0005'0005))));
|
||||
|
||||
b.store32(buf, m);
|
||||
}
|
||||
|
||||
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
|
||||
int16_t buf[] = { 0,1, 2,3, 4,5, 6,7, 8,9 };
|
||||
|
||||
program.eval(SK_ARRAY_COUNT(buf)/2, buf);
|
||||
|
||||
REPORTER_ASSERT(r, buf[0] == 0b001111);
|
||||
REPORTER_ASSERT(r, buf[1] == 0b001100);
|
||||
REPORTER_ASSERT(r, buf[2] == 0b001010);
|
||||
REPORTER_ASSERT(r, buf[3] == 0b001010);
|
||||
REPORTER_ASSERT(r, buf[4] == 0b000010);
|
||||
for (int i = 5; i < (int)SK_ARRAY_COUNT(buf); i++) {
|
||||
REPORTER_ASSERT(r, buf[i] == 0b110010);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
DEF_TEST(SkVM_mad, r) {
|
||||
// This program is designed to exercise the tricky corners of instruction
|
||||
// and register selection for Op::mad_f32.
|
||||
|
@ -117,72 +117,3 @@ SrcoverBuilder_I32_Naive::SrcoverBuilder_I32_Naive() {
|
||||
r = pack(r, b, 16);
|
||||
store32(dst, r);
|
||||
}
|
||||
|
||||
SrcoverBuilder_I32::SrcoverBuilder_I32() {
|
||||
skvm::Arg src = varying<int>(),
|
||||
dst = varying<int>();
|
||||
|
||||
auto load = [&](skvm::Arg ptr,
|
||||
skvm::I32* r, skvm::I32* g, skvm::I32* b, skvm::I32* a) {
|
||||
skvm::I32 rgba = load32(ptr);
|
||||
*r = extract(rgba, 0, splat(0xff));
|
||||
*g = extract(rgba, 8, splat(0xff));
|
||||
*b = extract(rgba, 16, splat(0xff));
|
||||
*a = extract(rgba, 24, splat(0xff));
|
||||
};
|
||||
|
||||
skvm::I32 r,g,b,a;
|
||||
load(src, &r,&g,&b,&a);
|
||||
|
||||
skvm::I32 dr,dg,db,da;
|
||||
load(dst, &dr,&dg,&db,&da);
|
||||
|
||||
// (xy + x)/256 is a good approximation of (xy + 127)/255
|
||||
//
|
||||
// == (d*(255-a) + d)/256
|
||||
// == (d*(255-a+1) )/256
|
||||
// == (d*(256-a ) )/256
|
||||
|
||||
// We're doing 8x8 bit multiplies in 32-bit lanes.
|
||||
// Since the inputs and results both fit in 16 bits,
|
||||
// we can use mul_16x2, which tends to be faster than mul.
|
||||
//
|
||||
// (The top 2 zero bytes of the inputs will also multiply
|
||||
// with each other to produce zero... perfect.)
|
||||
|
||||
skvm::I32 invA = sub(splat(256), a);
|
||||
r = add(r, shr(mul_16x2(dr, invA), 8));
|
||||
g = add(g, shr(mul_16x2(dg, invA), 8));
|
||||
b = add(b, shr(mul_16x2(db, invA), 8));
|
||||
a = add(a, shr(mul_16x2(da, invA), 8));
|
||||
|
||||
r = pack(r, g, 8);
|
||||
b = pack(b, a, 8);
|
||||
r = pack(r, b, 16);
|
||||
store32(dst, r);
|
||||
}
|
||||
|
||||
SrcoverBuilder_I32_SWAR::SrcoverBuilder_I32_SWAR() {
|
||||
skvm::Arg src = varying<int>(),
|
||||
dst = varying<int>();
|
||||
|
||||
// The s += d*invA adds won't overflow,
|
||||
// so we don't have to unpack s beyond grabbing the alpha channel.
|
||||
skvm::I32 s = load32(src),
|
||||
ax2 = extract(s, 24, splat(0x000000ff))
|
||||
| extract(s, 8, splat(0x00ff0000));
|
||||
|
||||
// We'll use the same approximation math as above, this time making sure to
|
||||
// use both i16 multiplies to our benefit, one for r/g, the other for b/a.
|
||||
skvm::I32 invAx2 = sub_16x2(splat(0x01000100), ax2);
|
||||
|
||||
skvm::I32 d = load32(dst),
|
||||
rb = bit_and (d, splat(0x00ff00ff)),
|
||||
ga = shr_16x2(d, 8);
|
||||
|
||||
rb = shr_16x2(mul_16x2(rb, invAx2), 8); // Put the high 8 bits back in the low lane.
|
||||
ga = mul_16x2(ga, invAx2); // Keep the high 8 bits up high...
|
||||
ga = bit_clear(ga, splat(0x00ff00ff)); // ...and mask off the low bits.
|
||||
|
||||
store32(dst, add(s, bit_or(rb, ga)));
|
||||
}
|
||||
|
@ -22,12 +22,4 @@ struct SrcoverBuilder_I32_Naive : public skvm::Builder {
|
||||
SrcoverBuilder_I32_Naive(); // 8888 over 8888
|
||||
};
|
||||
|
||||
struct SrcoverBuilder_I32 : public skvm::Builder {
|
||||
SrcoverBuilder_I32(); // 8888 over 8888
|
||||
};
|
||||
|
||||
struct SrcoverBuilder_I32_SWAR : public skvm::Builder {
|
||||
SrcoverBuilder_I32_SWAR(); // 8888 over 8888
|
||||
};
|
||||
|
||||
#endif//SkVMBuilders_DEFINED
|
||||
|
Loading…
Reference in New Issue
Block a user