add 64-bit load/store ops

This adds load/store ops for 64-bit values, with two load64 instructions
returning the low and high 32-bits each, and store64 taking both.

These are implemented in the interpreter and tested but not yet JIT'd
or hooked up for loading and storing 64-bit PixelFormats.  Hopefully
those two CLs to follow shortly.

Change-Id: I7e5fc3f0ee5a421adc9fb355d0b6b661f424b505
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303380
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2020-07-16 13:03:18 -05:00 committed by Skia Commit-Bot
parent bde4bb7efe
commit 6732da0b21
4 changed files with 127 additions and 44 deletions

View File

@ -266,12 +266,15 @@ namespace skvm {
case Op::store8: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
case Op::store16: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
case Op::store32: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
case Op::store64: write(o, op, Arg{immz}, V{x}, V{y}, fs(id)...); break;
case Op::index: write(o, V{id}, "=", op, fs(id)...); break;
case Op::load8: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
case Op::load16: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
case Op::load32: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
case Op::load64_lo: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
case Op::load64_hi: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
case Op::gather8: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}, fs(id)...); break;
case Op::gather16: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}, fs(id)...); break;
@ -388,12 +391,15 @@ namespace skvm {
case Op::store8: write(o, op, Arg{immy}, R{x}); break;
case Op::store16: write(o, op, Arg{immy}, R{x}); break;
case Op::store32: write(o, op, Arg{immy}, R{x}); break;
case Op::store64: write(o, op, Arg{immz}, R{x}, R{y}); break;
case Op::index: write(o, R{d}, "=", op); break;
case Op::load8: write(o, R{d}, "=", op, Arg{immy}); break;
case Op::load16: write(o, R{d}, "=", op, Arg{immy}); break;
case Op::load32: write(o, R{d}, "=", op, Arg{immy}); break;
case Op::load64_lo: write(o, R{d}, "=", op, Arg{immy}); break;
case Op::load64_hi: write(o, R{d}, "=", op, Arg{immy}); break;
case Op::gather8: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
case Op::gather16: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
@ -680,12 +686,17 @@ namespace skvm {
void Builder::store8 (Arg ptr, I32 val) { (void)push(Op::store8 , val.id,NA,NA, ptr.ix); }
void Builder::store16(Arg ptr, I32 val) { (void)push(Op::store16, val.id,NA,NA, ptr.ix); }
void Builder::store32(Arg ptr, I32 val) { (void)push(Op::store32, val.id,NA,NA, ptr.ix); }
void Builder::store64(Arg ptr, I32 lo, I32 hi) {
(void)push(Op::store64, lo.id,hi.id,NA, NA,ptr.ix);
}
I32 Builder::index() { return {this, push(Op::index , NA,NA,NA,0) }; }
I32 Builder::load8 (Arg ptr) { return {this, push(Op::load8 , NA,NA,NA, ptr.ix) }; }
I32 Builder::load16(Arg ptr) { return {this, push(Op::load16, NA,NA,NA, ptr.ix) }; }
I32 Builder::load32(Arg ptr) { return {this, push(Op::load32, NA,NA,NA, ptr.ix) }; }
I32 Builder::load16 (Arg ptr) { return {this, push(Op::load16 , NA,NA,NA, ptr.ix) }; }
I32 Builder::load32 (Arg ptr) { return {this, push(Op::load32 , NA,NA,NA, ptr.ix) }; }
I32 Builder::load64_lo(Arg ptr) { return {this, push(Op::load64_lo, NA,NA,NA, ptr.ix) }; }
I32 Builder::load64_hi(Arg ptr) { return {this, push(Op::load64_hi, NA,NA,NA, ptr.ix) }; }
I32 Builder::gather8 (Arg ptr, int offset, I32 index) {
return {this, push(Op::gather8 , index.id,NA,NA, ptr.ix,offset)};
@ -3305,6 +3316,12 @@ namespace skvm {
(void)constants[immy];
break;
case Op::load64_lo:
case Op::load64_hi:
case Op::store64:
// TODO
return false;
#if defined(__x86_64__) || defined(_M_X64)
case Op::assert_true: {
a->vptest (r(x), &constants[0xffffffff]);

View File

@ -366,12 +366,12 @@ namespace skvm {
int disp19(Label*);
};
// Order matters a little: Ops <=store32 are treated as having side effects.
// Order matters a little: Ops <=store64 are treated as having side effects.
#define SKVM_OPS(M) \
M(assert_true) \
M(store8) M(store16) M(store32) \
M(store8) M(store16) M(store32) M(store64) \
M(index) \
M(load8) M(load16) M(load32) \
M(load8) M(load16) M(load32) M(load64_lo) M(load64_hi) \
M(gather8) M(gather16) M(gather32) \
M(uniform8) M(uniform16) M(uniform32) \
M(splat) \
@ -405,7 +405,7 @@ namespace skvm {
};
static inline bool has_side_effect(Op op) {
return op <= Op::store32;
return op <= Op::store64;
}
static inline bool is_always_varying(Op op) {
return op <= Op::gather32 && op != Op::assert_true;
@ -573,6 +573,7 @@ namespace skvm {
void store16(Arg ptr, I32 val);
void store32(Arg ptr, I32 val);
void storeF (Arg ptr, F32 val) { store32(ptr, bit_cast(val)); }
void store64(Arg ptr, I32 lo, I32 hi);
// Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval().
I32 index();
@ -582,6 +583,8 @@ namespace skvm {
I32 load16(Arg ptr);
I32 load32(Arg ptr);
F32 loadF (Arg ptr) { return bit_cast(load32(ptr)); }
I32 load64_lo(Arg ptr);
I32 load64_hi(Arg ptr);
// Load u8,u16,i32 uniform with byte-count offset.
I32 uniform8 (Arg ptr, int offset);
@ -968,6 +971,7 @@ namespace skvm {
static inline void store16(Arg ptr, I32 val) { val->store16(ptr, val); }
static inline void store32(Arg ptr, I32 val) { val->store32(ptr, val); }
static inline void storeF (Arg ptr, F32 val) { val->storeF (ptr, val); }
static inline void store64(Arg ptr, I32 lo, I32 hi) { lo ->store64(ptr, lo,hi); }
static inline I32 gather8 (Arg ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); }
static inline I32 gather16(Arg ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); }

View File

@ -24,6 +24,7 @@ namespace SK_OPTS_NS {
#endif
using I32 = skvx::Vec<K, int>;
using F32 = skvx::Vec<K, float>;
using U64 = skvx::Vec<K, uint64_t>;
using U32 = skvx::Vec<K, uint32_t>;
using U16 = skvx::Vec<K, uint16_t>;
using U8 = skvx::Vec<K, uint8_t>;
@ -86,18 +87,31 @@ namespace SK_OPTS_NS {
STRIDE_1(Op::store8 ): memcpy(args[immy], &r[x].i32, 1); break;
STRIDE_1(Op::store16): memcpy(args[immy], &r[x].i32, 2); break;
STRIDE_1(Op::store32): memcpy(args[immy], &r[x].i32, 4); break;
STRIDE_1(Op::store64): memcpy((char*)args[immz]+0, &r[x].i32, 4);
memcpy((char*)args[immz]+4, &r[y].i32, 4); break;
STRIDE_K(Op::store8 ): skvx::cast<uint8_t> (r[x].i32).store(args[immy]); break;
STRIDE_K(Op::store16): skvx::cast<uint16_t>(r[x].i32).store(args[immy]); break;
STRIDE_K(Op::store32): (r[x].i32).store(args[immy]); break;
STRIDE_K(Op::store64): (skvx::cast<uint64_t>(r[x].u32) << 0 |
skvx::cast<uint64_t>(r[y].u32) << 32).store(args[immz]);
break;
STRIDE_1(Op::load8 ): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 1); break;
STRIDE_1(Op::load16): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 2); break;
STRIDE_1(Op::load32): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 4); break;
STRIDE_1(Op::load64_lo):
r[d].i32 = 0; memcpy(&r[d].i32, (char*)args[immy] + 0, 4); break;
STRIDE_1(Op::load64_hi):
r[d].i32 = 0; memcpy(&r[d].i32, (char*)args[immy] + 4, 4); break;
STRIDE_K(Op::load8 ): r[d].i32= skvx::cast<int>(U8 ::Load(args[immy])); break;
STRIDE_K(Op::load16): r[d].i32= skvx::cast<int>(U16::Load(args[immy])); break;
STRIDE_K(Op::load32): r[d].i32= I32::Load(args[immy]) ; break;
STRIDE_K(Op::load64_lo):
r[d].i32 = skvx::cast<int>(U64::Load(args[immy]) & 0xffff'ffff); break;
STRIDE_K(Op::load64_hi):
r[d].i32 = skvx::cast<int>(U64::Load(args[immy]) >> 32); break;
// The pointer we base our gather on is loaded indirectly from a uniform:
// - args[immy] is the uniform holding our gather base pointer somewhere;

View File

@ -2212,3 +2212,51 @@ DEF_TEST(SkVM_halfs, r) {
});
}
}
DEF_TEST(SkVM_64bit, r) {
uint32_t lo[65],
hi[65];
uint64_t wide[65];
for (int i = 0; i < 65; i++) {
lo[i] = 2*i+0;
hi[i] = 2*i+1;
wide[i] = ((uint64_t)lo[i] << 0)
| ((uint64_t)hi[i] << 32);
}
{
skvm::Builder b;
{
skvm::Arg wide = b.varying<uint64_t>(),
lo = b.varying<int>(),
hi = b.varying<int>();
b.store32(lo, b.load64_lo(wide));
b.store32(hi, b.load64_hi(wide));
}
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
uint32_t l[65], h[65];
program.eval(65, wide,l,h);
for (int i = 0; i < 65; i++) {
REPORTER_ASSERT(r, l[i] == lo[i]);
REPORTER_ASSERT(r, h[i] == hi[i]);
}
});
}
{
skvm::Builder b;
{
skvm::Arg wide = b.varying<uint64_t>(),
lo = b.varying<int>(),
hi = b.varying<int>();
b.store64(wide, b.load32(lo), b.load32(hi));
}
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
uint64_t w[65];
program.eval(65, w,lo,hi);
for (int i = 0; i < 65; i++) {
REPORTER_ASSERT(r, w[i] == wide[i]);
}
});
}
}