add 64-bit load/store ops
This adds load/store ops for 64-bit values, with two load64 instructions returning the low and high 32-bits each, and store64 taking both. These are implemented in the interpreter and tested but not yet JIT'd or hooked up for loading and storing 64-bit PixelFormats. Hopefully those two CLs to follow shortly. Change-Id: I7e5fc3f0ee5a421adc9fb355d0b6b661f424b505 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303380 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
bde4bb7efe
commit
6732da0b21
@ -266,12 +266,15 @@ namespace skvm {
|
||||
case Op::store8: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
|
||||
case Op::store16: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
|
||||
case Op::store32: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
|
||||
case Op::store64: write(o, op, Arg{immz}, V{x}, V{y}, fs(id)...); break;
|
||||
|
||||
case Op::index: write(o, V{id}, "=", op, fs(id)...); break;
|
||||
|
||||
case Op::load8: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
case Op::load16: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
case Op::load32: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
case Op::load8: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
case Op::load16: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
case Op::load32: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
case Op::load64_lo: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
case Op::load64_hi: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
|
||||
|
||||
case Op::gather8: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}, fs(id)...); break;
|
||||
case Op::gather16: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}, fs(id)...); break;
|
||||
@ -388,12 +391,15 @@ namespace skvm {
|
||||
case Op::store8: write(o, op, Arg{immy}, R{x}); break;
|
||||
case Op::store16: write(o, op, Arg{immy}, R{x}); break;
|
||||
case Op::store32: write(o, op, Arg{immy}, R{x}); break;
|
||||
case Op::store64: write(o, op, Arg{immz}, R{x}, R{y}); break;
|
||||
|
||||
case Op::index: write(o, R{d}, "=", op); break;
|
||||
|
||||
case Op::load8: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
case Op::load16: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
case Op::load32: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
case Op::load8: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
case Op::load16: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
case Op::load32: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
case Op::load64_lo: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
case Op::load64_hi: write(o, R{d}, "=", op, Arg{immy}); break;
|
||||
|
||||
case Op::gather8: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
|
||||
case Op::gather16: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
|
||||
@ -680,12 +686,17 @@ namespace skvm {
|
||||
void Builder::store8 (Arg ptr, I32 val) { (void)push(Op::store8 , val.id,NA,NA, ptr.ix); }
|
||||
void Builder::store16(Arg ptr, I32 val) { (void)push(Op::store16, val.id,NA,NA, ptr.ix); }
|
||||
void Builder::store32(Arg ptr, I32 val) { (void)push(Op::store32, val.id,NA,NA, ptr.ix); }
|
||||
void Builder::store64(Arg ptr, I32 lo, I32 hi) {
|
||||
(void)push(Op::store64, lo.id,hi.id,NA, NA,ptr.ix);
|
||||
}
|
||||
|
||||
I32 Builder::index() { return {this, push(Op::index , NA,NA,NA,0) }; }
|
||||
|
||||
I32 Builder::load8 (Arg ptr) { return {this, push(Op::load8 , NA,NA,NA, ptr.ix) }; }
|
||||
I32 Builder::load16(Arg ptr) { return {this, push(Op::load16, NA,NA,NA, ptr.ix) }; }
|
||||
I32 Builder::load32(Arg ptr) { return {this, push(Op::load32, NA,NA,NA, ptr.ix) }; }
|
||||
I32 Builder::load8 (Arg ptr) { return {this, push(Op::load8 , NA,NA,NA, ptr.ix) }; }
|
||||
I32 Builder::load16 (Arg ptr) { return {this, push(Op::load16 , NA,NA,NA, ptr.ix) }; }
|
||||
I32 Builder::load32 (Arg ptr) { return {this, push(Op::load32 , NA,NA,NA, ptr.ix) }; }
|
||||
I32 Builder::load64_lo(Arg ptr) { return {this, push(Op::load64_lo, NA,NA,NA, ptr.ix) }; }
|
||||
I32 Builder::load64_hi(Arg ptr) { return {this, push(Op::load64_hi, NA,NA,NA, ptr.ix) }; }
|
||||
|
||||
I32 Builder::gather8 (Arg ptr, int offset, I32 index) {
|
||||
return {this, push(Op::gather8 , index.id,NA,NA, ptr.ix,offset)};
|
||||
@ -3305,6 +3316,12 @@ namespace skvm {
|
||||
(void)constants[immy];
|
||||
break;
|
||||
|
||||
case Op::load64_lo:
|
||||
case Op::load64_hi:
|
||||
case Op::store64:
|
||||
// TODO
|
||||
return false;
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
case Op::assert_true: {
|
||||
a->vptest (r(x), &constants[0xffffffff]);
|
||||
|
@ -366,36 +366,36 @@ namespace skvm {
|
||||
int disp19(Label*);
|
||||
};
|
||||
|
||||
// Order matters a little: Ops <=store32 are treated as having side effects.
|
||||
#define SKVM_OPS(M) \
|
||||
M(assert_true) \
|
||||
M(store8) M(store16) M(store32) \
|
||||
M(index) \
|
||||
M(load8) M(load16) M(load32) \
|
||||
M(gather8) M(gather16) M(gather32) \
|
||||
M(uniform8) M(uniform16) M(uniform32) \
|
||||
M(splat) \
|
||||
M(add_f32) M(add_i32) \
|
||||
M(sub_f32) M(sub_i32) \
|
||||
M(mul_f32) M(mul_i32) \
|
||||
M(div_f32) \
|
||||
M(min_f32) \
|
||||
M(max_f32) \
|
||||
M(fma_f32) M(fms_f32) M(fnma_f32) \
|
||||
M(sqrt_f32) \
|
||||
M(shl_i32) M(shr_i32) M(sra_i32) \
|
||||
M(ceil) M(floor) \
|
||||
M(trunc) M(round) M(to_half) M(from_half) \
|
||||
M(to_f32) \
|
||||
M( eq_f32) M( eq_i32) \
|
||||
M(neq_f32) \
|
||||
M( gt_f32) M( gt_i32) \
|
||||
M(gte_f32) \
|
||||
M(bit_and) \
|
||||
M(bit_or) \
|
||||
M(bit_xor) \
|
||||
M(bit_clear) \
|
||||
M(select) M(pack) \
|
||||
// Order matters a little: Ops <=store64 are treated as having side effects.
|
||||
#define SKVM_OPS(M) \
|
||||
M(assert_true) \
|
||||
M(store8) M(store16) M(store32) M(store64) \
|
||||
M(index) \
|
||||
M(load8) M(load16) M(load32) M(load64_lo) M(load64_hi) \
|
||||
M(gather8) M(gather16) M(gather32) \
|
||||
M(uniform8) M(uniform16) M(uniform32) \
|
||||
M(splat) \
|
||||
M(add_f32) M(add_i32) \
|
||||
M(sub_f32) M(sub_i32) \
|
||||
M(mul_f32) M(mul_i32) \
|
||||
M(div_f32) \
|
||||
M(min_f32) \
|
||||
M(max_f32) \
|
||||
M(fma_f32) M(fms_f32) M(fnma_f32) \
|
||||
M(sqrt_f32) \
|
||||
M(shl_i32) M(shr_i32) M(sra_i32) \
|
||||
M(ceil) M(floor) \
|
||||
M(trunc) M(round) M(to_half) M(from_half) \
|
||||
M(to_f32) \
|
||||
M( eq_f32) M( eq_i32) \
|
||||
M(neq_f32) \
|
||||
M( gt_f32) M( gt_i32) \
|
||||
M(gte_f32) \
|
||||
M(bit_and) \
|
||||
M(bit_or) \
|
||||
M(bit_xor) \
|
||||
M(bit_clear) \
|
||||
M(select) M(pack) \
|
||||
// End of SKVM_OPS
|
||||
|
||||
enum class Op : int {
|
||||
@ -405,7 +405,7 @@ namespace skvm {
|
||||
};
|
||||
|
||||
static inline bool has_side_effect(Op op) {
|
||||
return op <= Op::store32;
|
||||
return op <= Op::store64;
|
||||
}
|
||||
static inline bool is_always_varying(Op op) {
|
||||
return op <= Op::gather32 && op != Op::assert_true;
|
||||
@ -573,6 +573,7 @@ namespace skvm {
|
||||
void store16(Arg ptr, I32 val);
|
||||
void store32(Arg ptr, I32 val);
|
||||
void storeF (Arg ptr, F32 val) { store32(ptr, bit_cast(val)); }
|
||||
void store64(Arg ptr, I32 lo, I32 hi);
|
||||
|
||||
// Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval().
|
||||
I32 index();
|
||||
@ -582,6 +583,8 @@ namespace skvm {
|
||||
I32 load16(Arg ptr);
|
||||
I32 load32(Arg ptr);
|
||||
F32 loadF (Arg ptr) { return bit_cast(load32(ptr)); }
|
||||
I32 load64_lo(Arg ptr);
|
||||
I32 load64_hi(Arg ptr);
|
||||
|
||||
// Load u8,u16,i32 uniform with byte-count offset.
|
||||
I32 uniform8 (Arg ptr, int offset);
|
||||
@ -964,10 +967,11 @@ namespace skvm {
|
||||
static inline void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); }
|
||||
static inline void assert_true(I32 cond) { cond->assert_true(cond); }
|
||||
|
||||
static inline void store8 (Arg ptr, I32 val) { val->store8 (ptr, val); }
|
||||
static inline void store16(Arg ptr, I32 val) { val->store16(ptr, val); }
|
||||
static inline void store32(Arg ptr, I32 val) { val->store32(ptr, val); }
|
||||
static inline void storeF (Arg ptr, F32 val) { val->storeF (ptr, val); }
|
||||
static inline void store8 (Arg ptr, I32 val) { val->store8 (ptr, val); }
|
||||
static inline void store16(Arg ptr, I32 val) { val->store16(ptr, val); }
|
||||
static inline void store32(Arg ptr, I32 val) { val->store32(ptr, val); }
|
||||
static inline void storeF (Arg ptr, F32 val) { val->storeF (ptr, val); }
|
||||
static inline void store64(Arg ptr, I32 lo, I32 hi) { lo ->store64(ptr, lo,hi); }
|
||||
|
||||
static inline I32 gather8 (Arg ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); }
|
||||
static inline I32 gather16(Arg ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); }
|
||||
|
@ -24,6 +24,7 @@ namespace SK_OPTS_NS {
|
||||
#endif
|
||||
using I32 = skvx::Vec<K, int>;
|
||||
using F32 = skvx::Vec<K, float>;
|
||||
using U64 = skvx::Vec<K, uint64_t>;
|
||||
using U32 = skvx::Vec<K, uint32_t>;
|
||||
using U16 = skvx::Vec<K, uint16_t>;
|
||||
using U8 = skvx::Vec<K, uint8_t>;
|
||||
@ -86,18 +87,31 @@ namespace SK_OPTS_NS {
|
||||
STRIDE_1(Op::store8 ): memcpy(args[immy], &r[x].i32, 1); break;
|
||||
STRIDE_1(Op::store16): memcpy(args[immy], &r[x].i32, 2); break;
|
||||
STRIDE_1(Op::store32): memcpy(args[immy], &r[x].i32, 4); break;
|
||||
STRIDE_1(Op::store64): memcpy((char*)args[immz]+0, &r[x].i32, 4);
|
||||
memcpy((char*)args[immz]+4, &r[y].i32, 4); break;
|
||||
|
||||
STRIDE_K(Op::store8 ): skvx::cast<uint8_t> (r[x].i32).store(args[immy]); break;
|
||||
STRIDE_K(Op::store16): skvx::cast<uint16_t>(r[x].i32).store(args[immy]); break;
|
||||
STRIDE_K(Op::store32): (r[x].i32).store(args[immy]); break;
|
||||
STRIDE_K(Op::store64): (skvx::cast<uint64_t>(r[x].u32) << 0 |
|
||||
skvx::cast<uint64_t>(r[y].u32) << 32).store(args[immz]);
|
||||
break;
|
||||
|
||||
STRIDE_1(Op::load8 ): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 1); break;
|
||||
STRIDE_1(Op::load16): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 2); break;
|
||||
STRIDE_1(Op::load32): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 4); break;
|
||||
STRIDE_1(Op::load64_lo):
|
||||
r[d].i32 = 0; memcpy(&r[d].i32, (char*)args[immy] + 0, 4); break;
|
||||
STRIDE_1(Op::load64_hi):
|
||||
r[d].i32 = 0; memcpy(&r[d].i32, (char*)args[immy] + 4, 4); break;
|
||||
|
||||
STRIDE_K(Op::load8 ): r[d].i32= skvx::cast<int>(U8 ::Load(args[immy])); break;
|
||||
STRIDE_K(Op::load16): r[d].i32= skvx::cast<int>(U16::Load(args[immy])); break;
|
||||
STRIDE_K(Op::load32): r[d].i32= I32::Load(args[immy]) ; break;
|
||||
STRIDE_K(Op::load64_lo):
|
||||
r[d].i32 = skvx::cast<int>(U64::Load(args[immy]) & 0xffff'ffff); break;
|
||||
STRIDE_K(Op::load64_hi):
|
||||
r[d].i32 = skvx::cast<int>(U64::Load(args[immy]) >> 32); break;
|
||||
|
||||
// The pointer we base our gather on is loaded indirectly from a uniform:
|
||||
// - args[immy] is the uniform holding our gather base pointer somewhere;
|
||||
|
@ -2212,3 +2212,51 @@ DEF_TEST(SkVM_halfs, r) {
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
DEF_TEST(SkVM_64bit, r) {
|
||||
uint32_t lo[65],
|
||||
hi[65];
|
||||
uint64_t wide[65];
|
||||
for (int i = 0; i < 65; i++) {
|
||||
lo[i] = 2*i+0;
|
||||
hi[i] = 2*i+1;
|
||||
wide[i] = ((uint64_t)lo[i] << 0)
|
||||
| ((uint64_t)hi[i] << 32);
|
||||
}
|
||||
|
||||
{
|
||||
skvm::Builder b;
|
||||
{
|
||||
skvm::Arg wide = b.varying<uint64_t>(),
|
||||
lo = b.varying<int>(),
|
||||
hi = b.varying<int>();
|
||||
b.store32(lo, b.load64_lo(wide));
|
||||
b.store32(hi, b.load64_hi(wide));
|
||||
}
|
||||
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
|
||||
uint32_t l[65], h[65];
|
||||
program.eval(65, wide,l,h);
|
||||
for (int i = 0; i < 65; i++) {
|
||||
REPORTER_ASSERT(r, l[i] == lo[i]);
|
||||
REPORTER_ASSERT(r, h[i] == hi[i]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
skvm::Builder b;
|
||||
{
|
||||
skvm::Arg wide = b.varying<uint64_t>(),
|
||||
lo = b.varying<int>(),
|
||||
hi = b.varying<int>();
|
||||
b.store64(wide, b.load32(lo), b.load32(hi));
|
||||
}
|
||||
test_jit_and_interpreter(b.done(), [&](const skvm::Program& program){
|
||||
uint64_t w[65];
|
||||
program.eval(65, w,lo,hi);
|
||||
for (int i = 0; i < 65; i++) {
|
||||
REPORTER_ASSERT(r, w[i] == wide[i]);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user