remove intermediate lambdas in skvm interpreter
I was poking through the skvm interpreter and happened to be in Debug -O1 mode, and noticed that the assembly was filled with the asserts that these lambdas exist to introduce. First, I noticed they could be cut down to one condition to check, SkASSERT((unsigned)thing < (unsigned)limit); but then I realized, hey, this is what ASAN is for... just drop them! This cleans up the assembly and emphasizes the more important asserts. Change-Id: I1e0e69035775887bbf6fb62ca78b4a5721c24504 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/302032 Reviewed-by: Brian Osman <brianosman@google.com> Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
8c7c709aaa
commit
4284f75ed7
@ -37,7 +37,7 @@ namespace SK_OPTS_NS {
|
||||
Slot few_regs[16];
|
||||
std::unique_ptr<char[]> many_regs;
|
||||
|
||||
Slot* regs = few_regs;
|
||||
Slot* r = few_regs;
|
||||
|
||||
if (nregs > (int)SK_ARRAY_COUNT(few_regs)) {
|
||||
// Annoyingly we can't trust that malloc() or new will work with Slot because
|
||||
@ -49,19 +49,10 @@ namespace SK_OPTS_NS {
|
||||
addr += alignof(Slot) -
|
||||
(addr & (alignof(Slot) - 1));
|
||||
SkASSERT((addr & (alignof(Slot) - 1)) == 0);
|
||||
regs = (Slot*)addr;
|
||||
r = (Slot*)addr;
|
||||
}
|
||||
|
||||
|
||||
auto r = [&](Reg id) -> Slot& {
|
||||
SkASSERT(0 <= id && id < nregs);
|
||||
return regs[id];
|
||||
};
|
||||
auto arg = [&](int ix) {
|
||||
SkASSERT(0 <= ix && ix < nargs);
|
||||
return args[ix];
|
||||
};
|
||||
|
||||
// Step each argument pointer ahead by its stride a number of times.
|
||||
auto step_args = [&](int times) {
|
||||
for (int i = 0; i < nargs; i++) {
|
||||
@ -92,64 +83,64 @@ namespace SK_OPTS_NS {
|
||||
|
||||
#define STRIDE_1(op) case 2*(int)op
|
||||
#define STRIDE_K(op) case 2*(int)op + 1
|
||||
STRIDE_1(Op::store8 ): memcpy(arg(immy), &r(x).i32, 1); break;
|
||||
STRIDE_1(Op::store16): memcpy(arg(immy), &r(x).i32, 2); break;
|
||||
STRIDE_1(Op::store32): memcpy(arg(immy), &r(x).i32, 4); break;
|
||||
STRIDE_1(Op::store8 ): memcpy(args[immy], &r[x].i32, 1); break;
|
||||
STRIDE_1(Op::store16): memcpy(args[immy], &r[x].i32, 2); break;
|
||||
STRIDE_1(Op::store32): memcpy(args[immy], &r[x].i32, 4); break;
|
||||
|
||||
STRIDE_K(Op::store8 ): skvx::cast<uint8_t> (r(x).i32).store(arg(immy)); break;
|
||||
STRIDE_K(Op::store16): skvx::cast<uint16_t>(r(x).i32).store(arg(immy)); break;
|
||||
STRIDE_K(Op::store32): (r(x).i32).store(arg(immy)); break;
|
||||
STRIDE_K(Op::store8 ): skvx::cast<uint8_t> (r[x].i32).store(args[immy]); break;
|
||||
STRIDE_K(Op::store16): skvx::cast<uint16_t>(r[x].i32).store(args[immy]); break;
|
||||
STRIDE_K(Op::store32): (r[x].i32).store(args[immy]); break;
|
||||
|
||||
STRIDE_1(Op::load8 ): r(d).i32 = 0; memcpy(&r(d).i32, arg(immy), 1); break;
|
||||
STRIDE_1(Op::load16): r(d).i32 = 0; memcpy(&r(d).i32, arg(immy), 2); break;
|
||||
STRIDE_1(Op::load32): r(d).i32 = 0; memcpy(&r(d).i32, arg(immy), 4); break;
|
||||
STRIDE_1(Op::load8 ): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 1); break;
|
||||
STRIDE_1(Op::load16): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 2); break;
|
||||
STRIDE_1(Op::load32): r[d].i32 = 0; memcpy(&r[d].i32, args[immy], 4); break;
|
||||
|
||||
STRIDE_K(Op::load8 ): r(d).i32= skvx::cast<int>(U8 ::Load(arg(immy))); break;
|
||||
STRIDE_K(Op::load16): r(d).i32= skvx::cast<int>(U16::Load(arg(immy))); break;
|
||||
STRIDE_K(Op::load32): r(d).i32= I32::Load(arg(immy)) ; break;
|
||||
STRIDE_K(Op::load8 ): r[d].i32= skvx::cast<int>(U8 ::Load(args[immy])); break;
|
||||
STRIDE_K(Op::load16): r[d].i32= skvx::cast<int>(U16::Load(args[immy])); break;
|
||||
STRIDE_K(Op::load32): r[d].i32= I32::Load(args[immy]) ; break;
|
||||
|
||||
// The pointer we base our gather on is loaded indirectly from a uniform:
|
||||
// - arg(immy) is the uniform holding our gather base pointer somewhere;
|
||||
// - (const uint8_t*)arg(immy) + immz points to the gather base pointer;
|
||||
// - args[immy] is the uniform holding our gather base pointer somewhere;
|
||||
// - (const uint8_t*)args[immy] + immz points to the gather base pointer;
|
||||
// - memcpy() loads the gather base and into a pointer of the right type.
|
||||
// After all that we have an ordinary (uniform) pointer `ptr` to load from,
|
||||
// and we then gather from it using the varying indices in r(x).
|
||||
// and we then gather from it using the varying indices in r[x].
|
||||
STRIDE_1(Op::gather8):
|
||||
for (int i = 0; i < K; i++) {
|
||||
const uint8_t* ptr;
|
||||
memcpy(&ptr, (const uint8_t*)arg(immy) + immz, sizeof(ptr));
|
||||
r(d).i32[i] = (i==0) ? ptr[ r(x).i32[i] ] : 0;
|
||||
memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr));
|
||||
r[d].i32[i] = (i==0) ? ptr[ r[x].i32[i] ] : 0;
|
||||
} break;
|
||||
STRIDE_1(Op::gather16):
|
||||
for (int i = 0; i < K; i++) {
|
||||
const uint16_t* ptr;
|
||||
memcpy(&ptr, (const uint8_t*)arg(immy) + immz, sizeof(ptr));
|
||||
r(d).i32[i] = (i==0) ? ptr[ r(x).i32[i] ] : 0;
|
||||
memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr));
|
||||
r[d].i32[i] = (i==0) ? ptr[ r[x].i32[i] ] : 0;
|
||||
} break;
|
||||
STRIDE_1(Op::gather32):
|
||||
for (int i = 0; i < K; i++) {
|
||||
const int* ptr;
|
||||
memcpy(&ptr, (const uint8_t*)arg(immy) + immz, sizeof(ptr));
|
||||
r(d).i32[i] = (i==0) ? ptr[ r(x).i32[i] ] : 0;
|
||||
memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr));
|
||||
r[d].i32[i] = (i==0) ? ptr[ r[x].i32[i] ] : 0;
|
||||
} break;
|
||||
|
||||
STRIDE_K(Op::gather8):
|
||||
for (int i = 0; i < K; i++) {
|
||||
const uint8_t* ptr;
|
||||
memcpy(&ptr, (const uint8_t*)arg(immy) + immz, sizeof(ptr));
|
||||
r(d).i32[i] = ptr[ r(x).i32[i] ];
|
||||
memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr));
|
||||
r[d].i32[i] = ptr[ r[x].i32[i] ];
|
||||
} break;
|
||||
STRIDE_K(Op::gather16):
|
||||
for (int i = 0; i < K; i++) {
|
||||
const uint16_t* ptr;
|
||||
memcpy(&ptr, (const uint8_t*)arg(immy) + immz, sizeof(ptr));
|
||||
r(d).i32[i] = ptr[ r(x).i32[i] ];
|
||||
memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr));
|
||||
r[d].i32[i] = ptr[ r[x].i32[i] ];
|
||||
} break;
|
||||
STRIDE_K(Op::gather32):
|
||||
for (int i = 0; i < K; i++) {
|
||||
const int* ptr;
|
||||
memcpy(&ptr, (const uint8_t*)arg(immy) + immz, sizeof(ptr));
|
||||
r(d).i32[i] = ptr[ r(x).i32[i] ];
|
||||
memcpy(&ptr, (const uint8_t*)args[immy] + immz, sizeof(ptr));
|
||||
r[d].i32[i] = ptr[ r[x].i32[i] ];
|
||||
} break;
|
||||
|
||||
#undef STRIDE_1
|
||||
@ -160,13 +151,13 @@ namespace SK_OPTS_NS {
|
||||
|
||||
CASE(Op::assert_true):
|
||||
#ifdef SK_DEBUG
|
||||
if (!all(r(x).i32)) {
|
||||
if (!all(r[x].i32)) {
|
||||
SkDebugf("inst %d, register %d\n", i, y);
|
||||
for (int i = 0; i < K; i++) {
|
||||
SkDebugf("\t%2d: %08x (%g)\n", i, r(y).i32[i], r(y).f32[i]);
|
||||
SkDebugf("\t%2d: %08x (%g)\n", i, r[y].i32[i], r[y].f32[i]);
|
||||
}
|
||||
}
|
||||
SkASSERT(all(r(x).i32));
|
||||
SkASSERT(all(r[x].i32));
|
||||
#endif
|
||||
break;
|
||||
|
||||
@ -175,65 +166,65 @@ namespace SK_OPTS_NS {
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
||||
static_assert(K <= SK_ARRAY_COUNT(iota), "");
|
||||
|
||||
r(d).i32 = n - I32::Load(iota);
|
||||
r[d].i32 = n - I32::Load(iota);
|
||||
} break;
|
||||
|
||||
CASE(Op::uniform8):
|
||||
r(d).i32 = *(const uint8_t* )( (const char*)arg(immy) + immz );
|
||||
r[d].i32 = *(const uint8_t* )( (const char*)args[immy] + immz );
|
||||
break;
|
||||
CASE(Op::uniform16):
|
||||
r(d).i32 = *(const uint16_t*)( (const char*)arg(immy) + immz );
|
||||
r[d].i32 = *(const uint16_t*)( (const char*)args[immy] + immz );
|
||||
break;
|
||||
CASE(Op::uniform32):
|
||||
r(d).i32 = *(const int* )( (const char*)arg(immy) + immz );
|
||||
r[d].i32 = *(const int* )( (const char*)args[immy] + immz );
|
||||
break;
|
||||
|
||||
CASE(Op::splat): r(d).i32 = immy; break;
|
||||
CASE(Op::splat): r[d].i32 = immy; break;
|
||||
|
||||
CASE(Op::add_f32): r(d).f32 = r(x).f32 + r(y).f32; break;
|
||||
CASE(Op::sub_f32): r(d).f32 = r(x).f32 - r(y).f32; break;
|
||||
CASE(Op::mul_f32): r(d).f32 = r(x).f32 * r(y).f32; break;
|
||||
CASE(Op::div_f32): r(d).f32 = r(x).f32 / r(y).f32; break;
|
||||
CASE(Op::min_f32): r(d).f32 = min(r(x).f32, r(y).f32); break;
|
||||
CASE(Op::max_f32): r(d).f32 = max(r(x).f32, r(y).f32); break;
|
||||
CASE(Op::add_f32): r[d].f32 = r[x].f32 + r[y].f32; break;
|
||||
CASE(Op::sub_f32): r[d].f32 = r[x].f32 - r[y].f32; break;
|
||||
CASE(Op::mul_f32): r[d].f32 = r[x].f32 * r[y].f32; break;
|
||||
CASE(Op::div_f32): r[d].f32 = r[x].f32 / r[y].f32; break;
|
||||
CASE(Op::min_f32): r[d].f32 = min(r[x].f32, r[y].f32); break;
|
||||
CASE(Op::max_f32): r[d].f32 = max(r[x].f32, r[y].f32); break;
|
||||
|
||||
CASE(Op::fma_f32): r(d).f32 = fma(r(x).f32, r(y).f32, r(z).f32); break;
|
||||
CASE(Op::fms_f32): r(d).f32 = fma(r(x).f32, r(y).f32, -r(z).f32); break;
|
||||
CASE(Op::fnma_f32): r(d).f32 = fma(-r(x).f32, r(y).f32, r(z).f32); break;
|
||||
CASE(Op::fma_f32): r[d].f32 = fma( r[x].f32, r[y].f32, r[z].f32); break;
|
||||
CASE(Op::fms_f32): r[d].f32 = fma( r[x].f32, r[y].f32, -r[z].f32); break;
|
||||
CASE(Op::fnma_f32): r[d].f32 = fma(-r[x].f32, r[y].f32, r[z].f32); break;
|
||||
|
||||
CASE(Op::sqrt_f32): r(d).f32 = sqrt(r(x).f32); break;
|
||||
CASE(Op::sqrt_f32): r[d].f32 = sqrt(r[x].f32); break;
|
||||
|
||||
CASE(Op::add_i32): r(d).i32 = r(x).i32 + r(y).i32; break;
|
||||
CASE(Op::sub_i32): r(d).i32 = r(x).i32 - r(y).i32; break;
|
||||
CASE(Op::mul_i32): r(d).i32 = r(x).i32 * r(y).i32; break;
|
||||
CASE(Op::add_i32): r[d].i32 = r[x].i32 + r[y].i32; break;
|
||||
CASE(Op::sub_i32): r[d].i32 = r[x].i32 - r[y].i32; break;
|
||||
CASE(Op::mul_i32): r[d].i32 = r[x].i32 * r[y].i32; break;
|
||||
|
||||
CASE(Op::shl_i32): r(d).i32 = r(x).i32 << immy; break;
|
||||
CASE(Op::sra_i32): r(d).i32 = r(x).i32 >> immy; break;
|
||||
CASE(Op::shr_i32): r(d).u32 = r(x).u32 >> immy; break;
|
||||
CASE(Op::shl_i32): r[d].i32 = r[x].i32 << immy; break;
|
||||
CASE(Op::sra_i32): r[d].i32 = r[x].i32 >> immy; break;
|
||||
CASE(Op::shr_i32): r[d].u32 = r[x].u32 >> immy; break;
|
||||
|
||||
CASE(Op:: eq_f32): r(d).i32 = r(x).f32 == r(y).f32; break;
|
||||
CASE(Op::neq_f32): r(d).i32 = r(x).f32 != r(y).f32; break;
|
||||
CASE(Op:: gt_f32): r(d).i32 = r(x).f32 > r(y).f32; break;
|
||||
CASE(Op::gte_f32): r(d).i32 = r(x).f32 >= r(y).f32; break;
|
||||
CASE(Op:: eq_f32): r[d].i32 = r[x].f32 == r[y].f32; break;
|
||||
CASE(Op::neq_f32): r[d].i32 = r[x].f32 != r[y].f32; break;
|
||||
CASE(Op:: gt_f32): r[d].i32 = r[x].f32 > r[y].f32; break;
|
||||
CASE(Op::gte_f32): r[d].i32 = r[x].f32 >= r[y].f32; break;
|
||||
|
||||
CASE(Op:: eq_i32): r(d).i32 = r(x).i32 == r(y).i32; break;
|
||||
CASE(Op:: gt_i32): r(d).i32 = r(x).i32 > r(y).i32; break;
|
||||
CASE(Op:: eq_i32): r[d].i32 = r[x].i32 == r[y].i32; break;
|
||||
CASE(Op:: gt_i32): r[d].i32 = r[x].i32 > r[y].i32; break;
|
||||
|
||||
CASE(Op::bit_and ): r(d).i32 = r(x).i32 & r(y).i32; break;
|
||||
CASE(Op::bit_or ): r(d).i32 = r(x).i32 | r(y).i32; break;
|
||||
CASE(Op::bit_xor ): r(d).i32 = r(x).i32 ^ r(y).i32; break;
|
||||
CASE(Op::bit_clear): r(d).i32 = r(x).i32 & ~r(y).i32; break;
|
||||
CASE(Op::bit_and ): r[d].i32 = r[x].i32 & r[y].i32; break;
|
||||
CASE(Op::bit_or ): r[d].i32 = r[x].i32 | r[y].i32; break;
|
||||
CASE(Op::bit_xor ): r[d].i32 = r[x].i32 ^ r[y].i32; break;
|
||||
CASE(Op::bit_clear): r[d].i32 = r[x].i32 & ~r[y].i32; break;
|
||||
|
||||
CASE(Op::select): r(d).i32 = skvx::if_then_else(r(x).i32, r(y).i32, r(z).i32);
|
||||
CASE(Op::select): r[d].i32 = skvx::if_then_else(r[x].i32, r[y].i32, r[z].i32);
|
||||
break;
|
||||
|
||||
CASE(Op::pack): r(d).u32 = r(x).u32 | (r(y).u32 << immz); break;
|
||||
CASE(Op::pack): r[d].u32 = r[x].u32 | (r[y].u32 << immz); break;
|
||||
|
||||
CASE(Op::ceil): r(d).f32 = skvx::ceil(r(x).f32) ; break;
|
||||
CASE(Op::floor): r(d).f32 = skvx::floor(r(x).f32) ; break;
|
||||
CASE(Op::to_f32): r(d).f32 = skvx::cast<float>( r(x).i32 ); break;
|
||||
CASE(Op::trunc): r(d).i32 = skvx::cast<int> ( r(x).f32 ); break;
|
||||
CASE(Op::round): r(d).i32 = skvx::cast<int> (skvx::lrint(r(x).f32)); break;
|
||||
CASE(Op::ceil): r[d].f32 = skvx::ceil(r[x].f32) ; break;
|
||||
CASE(Op::floor): r[d].f32 = skvx::floor(r[x].f32) ; break;
|
||||
CASE(Op::to_f32): r[d].f32 = skvx::cast<float>( r[x].i32 ); break;
|
||||
CASE(Op::trunc): r[d].i32 = skvx::cast<int> ( r[x].f32 ); break;
|
||||
CASE(Op::round): r[d].i32 = skvx::cast<int> (skvx::lrint(r[x].f32)); break;
|
||||
#undef CASE
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user