refactor bit ops

- Remove extract... it's not going to have any special impl.
  I've left it on skvm::Builder as an inline compound method.
- Add no-op shift short circuits.
- Add immediate ops for bit_{and,or,xor,clear}.

This comes from me noticing that the masks for extract today are always
immediates, and then when I started converting it to be (I32, int shift,
int mask), I realized it might be even better to break it up into its
component pieces.  There's no backend that can do extract any better
than shift-then-mask, so might as well leave it that way so we can
dedup, reorder, and specialize those micro ops.

Will follow up soon to get this all JITing again,
and these can-we-JIT test changes will be reverted.

Change-Id: I0835bcd825e417104ccc7efc79e9a0f2f4897841
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/263217
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2020-01-08 14:06:52 -06:00 committed by Skia Commit-Bot
parent 4a6e849207
commit a6434a5ef5
4 changed files with 555 additions and 500 deletions

File diff suppressed because it is too large Load Diff

View File

@ -217,11 +217,14 @@ namespace skvm {
case Op::bit_or : write(o, V{id}, "= bit_or" , V{x}, V{y} ); break;
case Op::bit_xor : write(o, V{id}, "= bit_xor" , V{x}, V{y} ); break;
case Op::bit_clear: write(o, V{id}, "= bit_clear", V{x}, V{y} ); break;
case Op::select : write(o, V{id}, "= select" , V{x}, V{y}, V{z}); break;
case Op::bytes: write(o, V{id}, "= bytes", V{x}, Hex{immy}); break;
case Op::extract: write(o, V{id}, "= extract", V{x}, Shift{immy}, V{z}); break;
case Op::pack: write(o, V{id}, "= pack", V{x}, V{y}, Shift{immz}); break;
case Op::bit_and_imm: write(o, V{id}, "= bit_and" , V{x}, Hex{immy}); break;
case Op::bit_or_imm : write(o, V{id}, "= bit_or" , V{x}, Hex{immy}); break;
case Op::bit_xor_imm: write(o, V{id}, "= bit_xor" , V{x}, Hex{immy}); break;
case Op::select: write(o, V{id}, "= select", V{x}, V{y}, V{z}); break;
case Op::bytes: write(o, V{id}, "= bytes", V{x}, Hex{immy}); break;
case Op::pack: write(o, V{id}, "= pack", V{x}, V{y}, Shift{immz}); break;
case Op::floor: write(o, V{id}, "= floor", V{x}); break;
case Op::to_f32: write(o, V{id}, "= to_f32", V{x}); break;
@ -339,11 +342,14 @@ namespace skvm {
case Op::bit_or : write(o, R{d}, "= bit_or" , R{x}, R{y} ); break;
case Op::bit_xor : write(o, R{d}, "= bit_xor" , R{x}, R{y} ); break;
case Op::bit_clear: write(o, R{d}, "= bit_clear", R{x}, R{y} ); break;
case Op::select : write(o, R{d}, "= select" , R{x}, R{y}, R{z}); break;
case Op::bytes: write(o, R{d}, "= bytes", R{x}, Hex{immy}); break;
case Op::extract: write(o, R{d}, "= extract", R{x}, Shift{immy}, R{z}); break;
case Op::pack: write(o, R{d}, "= pack", R{x}, R{y}, Shift{immz}); break;
case Op::bit_and_imm: write(o, R{d}, "= bit_and" , R{x}, Hex{immy}); break;
case Op::bit_or_imm : write(o, R{d}, "= bit_or" , R{x}, Hex{immy}); break;
case Op::bit_xor_imm: write(o, R{d}, "= bit_xor" , R{x}, Hex{immy}); break;
case Op::select: write(o, R{d}, "= select", R{x}, R{y}, R{z}); break;
case Op::bytes: write(o, R{d}, "= bytes", R{x}, Hex{immy}); break;
case Op::pack: write(o, R{d}, "= pack", R{x}, R{y}, Shift{immz}); break;
case Op::floor: write(o, R{d}, "= floor", R{x}); break;
case Op::to_f32: write(o, R{d}, "= to_f32", R{x}); break;
@ -672,16 +678,19 @@ namespace skvm {
I32 Builder::mul_16x2(I32 x, I32 y) { return {this->push(Op::mul_i16x2, x.id, y.id)}; }
I32 Builder::shl(I32 x, int bits) {
if (bits == 0) { return x; }
int X;
if (this->allImm(x.id,&X)) { return this->splat(X << bits); }
return {this->push(Op::shl_i32, x.id,NA,NA, bits)};
}
I32 Builder::shr(I32 x, int bits) {
if (bits == 0) { return x; }
int X;
if (this->allImm(x.id,&X)) { return this->splat(unsigned(X) >> bits); }
return {this->push(Op::shr_i32, x.id,NA,NA, bits)};
}
I32 Builder::sra(I32 x, int bits) {
if (bits == 0) { return x; }
int X;
if (this->allImm(x.id,&X)) { return this->splat(X >> bits); }
return {this->push(Op::sra_i32, x.id,NA,NA, bits)};
@ -739,36 +748,49 @@ namespace skvm {
I32 Builder::bit_and(I32 x, I32 y) {
int X,Y;
if (this->allImm(x.id,&X, y.id,&Y)) { return this->splat(X&Y); }
#if defined(SK_CPU_X86)
int imm;
if (this->allImm(y.id, &imm)) { return {this->push(Op::bit_and_imm, x.id,NA,NA, imm)}; }
if (this->allImm(x.id, &imm)) { return {this->push(Op::bit_and_imm, y.id,NA,NA, imm)}; }
#endif
return {this->push(Op::bit_and, x.id, y.id)};
}
I32 Builder::bit_or(I32 x, I32 y) {
int X,Y;
if (this->allImm(x.id,&X, y.id,&Y)) { return this->splat(X|Y); }
#if defined(SK_CPU_X86)
int imm;
if (this->allImm(y.id, &imm)) { return {this->push(Op::bit_or_imm, x.id,NA,NA, imm)}; }
if (this->allImm(x.id, &imm)) { return {this->push(Op::bit_or_imm, y.id,NA,NA, imm)}; }
#endif
return {this->push(Op::bit_or, x.id, y.id)};
}
I32 Builder::bit_xor(I32 x, I32 y) {
int X,Y;
if (this->allImm(x.id,&X, y.id,&Y)) { return this->splat(X^Y); }
#if defined(SK_CPU_X86)
int imm;
if (this->allImm(y.id, &imm)) { return {this->push(Op::bit_xor_imm, x.id,NA,NA, imm)}; }
if (this->allImm(x.id, &imm)) { return {this->push(Op::bit_xor_imm, y.id,NA,NA, imm)}; }
#endif
return {this->push(Op::bit_xor, x.id, y.id)};
}
I32 Builder::bit_clear(I32 x, I32 y) {
int X,Y;
if (this->allImm(x.id,&X, y.id,&Y)) { return this->splat(X&~Y); }
#if defined(SK_CPU_X86)
int imm;
if (this->allImm(y.id, &imm)) { return this->bit_and(x, this->splat(~imm)); }
#endif
return {this->push(Op::bit_clear, x.id, y.id)};
}
I32 Builder::select(I32 x, I32 y, I32 z) {
int X,Y,Z;
if (this->allImm(x.id,&X, y.id,&Y, z.id,&Z)) { return this->splat(X?Y:Z); }
return {this->push(Op::select, x.id, y.id, z.id)};
}
I32 Builder::extract(I32 x, int bits, I32 z) {
int X,Z;
if (this->allImm(x.id,&X, z.id,&Z)) { return this->splat( (unsigned(X)>>bits)&Z ); }
return {this->push(Op::extract, x.id,NA,z.id, bits,0)};
}
I32 Builder::pack(I32 x, I32 y, int bits) {
int X,Y;
if (this->allImm(x.id,&X, y.id,&Y)) { return this->splat(X|(Y<<bits)); }
@ -1773,11 +1795,13 @@ namespace skvm {
CASE(Op::bit_xor ): r(d).i32 = r(x).i32 ^ r(y).i32; break;
CASE(Op::bit_clear): r(d).i32 = r(x).i32 & ~r(y).i32; break;
CASE(Op::bit_and_imm): r(d).i32 = r(x).i32 & immy; break;
CASE(Op::bit_or_imm ): r(d).i32 = r(x).i32 | immy; break;
CASE(Op::bit_xor_imm): r(d).i32 = r(x).i32 ^ immy; break;
CASE(Op::select): r(d).i32 = skvx::if_then_else(r(x).i32, r(y).i32, r(z).i32);
break;
CASE(Op::extract): r(d).u32 = (r(x).u32 >> immy) & r(z).u32; break;
CASE(Op::pack): r(d).u32 = r(x).u32 | (r(y).u32 << immz); break;
CASE(Op::bytes): {
@ -2294,11 +2318,6 @@ namespace skvm {
case Op:: gt_f32: a->vcmpltps (dst(), r[y], r[x]); break;
case Op::gte_f32: a->vcmpleps (dst(), r[y], r[x]); break;
case Op::extract: if (immy == 0) { a->vpand (dst(), r[x], r[z]); }
else { a->vpsrld(tmp(), r[x], immy);
a->vpand (dst(), tmp(), r[z]); }
break;
case Op::pack: a->vpslld(tmp(), r[y], immz);
a->vpor (dst(), tmp(), r[x]);
break;
@ -2399,11 +2418,6 @@ namespace skvm {
case Op::eq_i32: a->cmeq4s(dst(), r[x], r[y]); break;
case Op::gt_i32: a->cmgt4s(dst(), r[x], r[y]); break;
case Op::extract: if (immy) { a->ushr4s(tmp(), r[x], immy);
a->and16b(dst(), tmp(), r[z]); }
else { a->and16b(dst(), r[x], r[z]); }
break;
case Op::pack:
if (avail & (1<<r[x])) { set_dst(r[x]); a->sli4s ( r[x], r[y], immz); }
else { a->shl4s (tmp(), r[y], immz);

View File

@ -296,9 +296,12 @@ namespace skvm {
bit_or,
bit_xor,
bit_clear,
select,
bytes, extract, pack,
bit_and_imm,
bit_or_imm,
bit_xor_imm,
select, bytes, pack,
};
using Val = int;
@ -498,7 +501,7 @@ namespace skvm {
// - bytes(x, 0x0404) transforms an RGBA pixel into an A0A0 bit pattern.
I32 bytes (I32 x, int control);
I32 extract(I32 x, int bits, I32 z); // (x >> bits) & z
I32 extract(I32 x, int bits, I32 z) { return bit_and(z, shr(x, bits)); }
I32 pack (I32 x, I32 y, int bits); // x | (y << bits), assuming (x & (y << bits)) == 0
// Common idioms used in several places, worth centralizing for consistency.

View File

@ -188,7 +188,7 @@ DEF_TEST(SkVM, r) {
uint32_t src[9];
uint32_t dst[SK_ARRAY_COUNT(src)];
test_jit_and_interpreter(r, std::move(program), [&](const skvm::Program& program) {
test_interpreter_only(r, std::move(program), [&](const skvm::Program& program) {
for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
src[i] = 0xbb007733;
dst[i] = 0xffaaccee;
@ -220,7 +220,7 @@ DEF_TEST(SkVM, r) {
test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
test_jit_and_interpreter(r, SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
test_interpreter_only(r, SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
[&](const skvm::Program& program) {
uint32_t src[9];
uint8_t dst[SK_ARRAY_COUNT(src)];
@ -387,7 +387,7 @@ DEF_TEST(SkVM_bitops, r) {
b.store32(ptr, x);
}
test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
int x = 0x42;
program.eval(1, &x);
REPORTER_ASSERT(r, x == 0x7fff'ffff);
@ -472,7 +472,7 @@ DEF_TEST(SkVM_cmp_f32, r) {
b.store32(b.varying<int>(), m);
}
test_jit_and_interpreter(r, b.done(), [&](const skvm::Program& program) {
test_interpreter_only(r, b.done(), [&](const skvm::Program& program) {
float in[] = { 0,1,2,3,4,5,6,7,8,9 };
int out[SK_ARRAY_COUNT(in)];