5b952d2cbf
We were passing arrays by value which the compiler fails to optimize, causing abyssal performance. To fix this, we need to consider that descriptors can be in constant or const device address spaces. Also, lone descriptors are passed by value, so we explicitly remove address space qualifiers. One failure case is when shader passes a texture/sampler array as an argument. It's all UniformConstant in SPIR-V, but in MSL it might be thread, const device or constant, so that won't work ... Global variable use works fine though, and that should cover 99.9999999% of use cases.
23 lines
668 B
Plaintext
23 lines
668 B
Plaintext
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
static inline __attribute__((always_inline))
|
|
void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
|
|
{
|
|
float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
|
|
float2 storeTemp = loaded + float2(1.0);
|
|
TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
|
|
}
|
|
|
|
kernel void main0(texture2d<float, access::read_write> TargetTexture [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
|
{
|
|
uint3 id = gl_WorkGroupID;
|
|
uint3 param = id;
|
|
_main(param, TargetTexture);
|
|
}
|
|
|