e5fa7edfd6
Relaxed block layout relaxed the restrictions on vector alignment, allowing them to be aligned on scalar boundaries. Scalar block layout relaxes this further, allowing *any* member to be aligned on a scalar boundary. The requirement that a vector not improperly straddle a 16-byte boundary is also relaxed. I've also added a test showing that `std430` layout works with UBOs. I'm troubled by the dual meaning of the `Packed` extended decoration. In some instances (struct, `float[]`, and `vec2[]` members), it actually means the exact opposite, that the member needs extra padding. This is especially problematic for `vec2[]`, because now we need to distinguish the two cases by checking the array stride. I wonder if this should actually be split into two decorations.
157 lines
3.0 KiB
Plaintext
157 lines
3.0 KiB
Plaintext
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
typedef float3x2 packed_float2x3;
|
|
|
|
struct S0
|
|
{
|
|
packed_float2 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S1
|
|
{
|
|
packed_float3 a;
|
|
float b;
|
|
};
|
|
|
|
struct S2
|
|
{
|
|
packed_float3 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S3
|
|
{
|
|
packed_float2 a;
|
|
float b;
|
|
};
|
|
|
|
struct S4
|
|
{
|
|
float2 c;
|
|
};
|
|
|
|
struct Content
|
|
{
|
|
S0 m0s[1];
|
|
S1 m1s[1];
|
|
S2 m2s[1];
|
|
S0 m0;
|
|
S1 m1;
|
|
S2 m2;
|
|
S3 m3;
|
|
float m4;
|
|
S4 m3s[8];
|
|
};
|
|
|
|
struct SSBO1
|
|
{
|
|
Content content;
|
|
Content content1[2];
|
|
Content content2;
|
|
float2x2 m0;
|
|
float2x2 m1;
|
|
packed_float2x3 m2[4];
|
|
float3x2 m3;
|
|
float2x2 m4;
|
|
float2x2 m5[9];
|
|
packed_float2x3 m6[4][2];
|
|
float3x2 m7;
|
|
float array[1];
|
|
};
|
|
|
|
struct S0_1
|
|
{
|
|
float4 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S1_1
|
|
{
|
|
packed_float3 a;
|
|
float b;
|
|
};
|
|
|
|
struct S2_1
|
|
{
|
|
float3 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S3_1
|
|
{
|
|
float2 a;
|
|
float b;
|
|
};
|
|
|
|
struct S4_1
|
|
{
|
|
float2 c;
|
|
};
|
|
|
|
struct Content_1
|
|
{
|
|
S0_1 m0s[1];
|
|
S1_1 m1s[1];
|
|
S2_1 m2s[1];
|
|
S0_1 m0;
|
|
S1_1 m1;
|
|
S2_1 m2;
|
|
S3_1 m3;
|
|
float m4;
|
|
char _m8_pad[12];
|
|
/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
|
|
};
|
|
|
|
struct SSBO0
|
|
{
|
|
Content_1 content;
|
|
Content_1 content1[2];
|
|
Content_1 content2;
|
|
float2x2 m0;
|
|
char _m4_pad[16];
|
|
float2x2 m1;
|
|
char _m5_pad[16];
|
|
float2x3 m2[4];
|
|
float3x2 m3;
|
|
char _m7_pad[24];
|
|
float2x2 m4;
|
|
char _m8_pad[16];
|
|
float2x2 m5[9];
|
|
float2x3 m6[4][2];
|
|
float3x2 m7;
|
|
float4 array[1];
|
|
};
|
|
|
|
kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
|
|
{
|
|
ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
|
|
ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
|
|
ssbo_430.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
|
|
ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
|
|
ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
|
|
ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
|
|
ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
|
|
ssbo_430.content.m0.b = ssbo_140.content.m0.b;
|
|
ssbo_430.content.m1.a = float3(ssbo_140.content.m1.a);
|
|
ssbo_430.content.m1.b = ssbo_140.content.m1.b;
|
|
ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
|
|
ssbo_430.content.m2.b = ssbo_140.content.m2.b;
|
|
ssbo_430.content.m3.a = ssbo_140.content.m3.a;
|
|
ssbo_430.content.m3.b = ssbo_140.content.m3.b;
|
|
ssbo_430.content.m4 = ssbo_140.content.m4;
|
|
ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
|
|
ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
|
|
ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
|
|
ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
|
|
ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
|
|
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
|
|
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
|
|
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
|
|
ssbo_430.content.m1.a = float2x3(ssbo_430.m2[1]) * float2(ssbo_430.content.m0.a[0]);
|
|
}
|
|
|