5b952d2cbf
We were passing arrays by value which the compiler fails to optimize, causing abyssal performance. To fix this, we need to consider that descriptors can be in constant or const device address spaces. Also, lone descriptors are passed by value, so we explicitly remove address space qualifiers. One failure case is when shader passes a texture/sampler array as an argument. It's all UniformConstant in SPIR-V, but in MSL it might be thread, const device or constant, so that won't work ... Global variable use works fine though, and that should cover 99.9999999% of use cases.
51 lines
1.3 KiB
GLSL
51 lines
1.3 KiB
GLSL
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
struct storage_block
|
|
{
|
|
uint4 baz;
|
|
int2 quux;
|
|
};
|
|
|
|
struct constant_block
|
|
{
|
|
float4 foo;
|
|
int bar;
|
|
};
|
|
|
|
#ifndef SPIRV_CROSS_CONSTANT_ID_0
|
|
#define SPIRV_CROSS_CONSTANT_ID_0 3
|
|
#endif
|
|
constant int arraySize = SPIRV_CROSS_CONSTANT_ID_0;
|
|
|
|
static inline __attribute__((always_inline))
|
|
void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array<texture2d<int>, 3>& images)
|
|
{
|
|
storage[0]->baz = uint4(constants[3]->foo);
|
|
storage[1]->quux = images[2].read(uint2(int2(constants[1]->bar))).xy;
|
|
}
|
|
|
|
vertex void main0(device storage_block* storage_0 [[buffer(0)]], device storage_block* storage_1 [[buffer(1)]], constant constant_block* constants_0 [[buffer(2)]], constant constant_block* constants_1 [[buffer(3)]], constant constant_block* constants_2 [[buffer(4)]], constant constant_block* constants_3 [[buffer(5)]], array<texture2d<int>, 3> images [[texture(0)]])
|
|
{
|
|
device storage_block* storage[] =
|
|
{
|
|
storage_0,
|
|
storage_1,
|
|
};
|
|
|
|
constant constant_block* constants[] =
|
|
{
|
|
constants_0,
|
|
constants_1,
|
|
constants_2,
|
|
constants_3,
|
|
};
|
|
|
|
doWork(storage, constants, images);
|
|
}
|
|
|