28454facbb
The old method of using a different unpacked matrix type doesn't work for scalar alignment. It certainly wouldn't have any effect for a square matrix, since the number of columns and rows are the same. So now we'll store them as arrays of packed vectors.
172 lines
3.7 KiB
Plaintext
172 lines
3.7 KiB
Plaintext
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
typedef packed_float2 packed_float2x2[2];
|
|
typedef packed_float2 packed_rm_float2x3[3];
|
|
typedef packed_float3 packed_float2x3[2];
|
|
typedef packed_float3 packed_rm_float3x2[2];
|
|
|
|
struct S0
|
|
{
|
|
packed_float2 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S1
|
|
{
|
|
packed_float3 a;
|
|
float b;
|
|
};
|
|
|
|
struct S2
|
|
{
|
|
packed_float3 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S3
|
|
{
|
|
packed_float2 a;
|
|
float b;
|
|
};
|
|
|
|
struct S4
|
|
{
|
|
float2 c;
|
|
};
|
|
|
|
struct Content
|
|
{
|
|
S0 m0s[1];
|
|
S1 m1s[1];
|
|
S2 m2s[1];
|
|
S0 m0;
|
|
S1 m1;
|
|
S2 m2;
|
|
S3 m3;
|
|
float m4;
|
|
S4 m3s[8];
|
|
};
|
|
|
|
struct SSBO1
|
|
{
|
|
Content content;
|
|
Content content1[2];
|
|
Content content2;
|
|
float2x2 m0;
|
|
float2x2 m1;
|
|
packed_float2x3 m2[4];
|
|
float3x2 m3;
|
|
float2x2 m4;
|
|
float2x2 m5[9];
|
|
packed_rm_float2x3 m6[4][2];
|
|
float3x2 m7;
|
|
float array[1];
|
|
};
|
|
|
|
struct S0_1
|
|
{
|
|
float4 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S1_1
|
|
{
|
|
packed_float3 a;
|
|
float b;
|
|
};
|
|
|
|
struct S2_1
|
|
{
|
|
float3 a[1];
|
|
float b;
|
|
};
|
|
|
|
struct S3_1
|
|
{
|
|
float2 a;
|
|
float b;
|
|
};
|
|
|
|
struct S4_1
|
|
{
|
|
float2 c;
|
|
};
|
|
|
|
struct Content_1
|
|
{
|
|
S0_1 m0s[1];
|
|
S1_1 m1s[1];
|
|
S2_1 m2s[1];
|
|
S0_1 m0;
|
|
S1_1 m1;
|
|
S2_1 m2;
|
|
S3_1 m3;
|
|
float m4;
|
|
char _m8_pad[12];
|
|
/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
|
|
};
|
|
|
|
struct SSBO0
|
|
{
|
|
Content_1 content;
|
|
Content_1 content1[2];
|
|
Content_1 content2;
|
|
float2x2 m0;
|
|
char _m4_pad[16];
|
|
float2x2 m1;
|
|
char _m5_pad[16];
|
|
float2x3 m2[4];
|
|
float3x2 m3;
|
|
char _m7_pad[24];
|
|
float2x2 m4;
|
|
char _m8_pad[16];
|
|
float2x2 m5[9];
|
|
float2x3 m6[4][2];
|
|
float3x2 m7;
|
|
float4 array[1];
|
|
};
|
|
|
|
struct SSBO2
|
|
{
|
|
float m0;
|
|
packed_float2x2 m1;
|
|
packed_rm_float3x2 m2;
|
|
};
|
|
|
|
kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]], device SSBO2& ssbo_scalar2 [[buffer(2)]])
|
|
{
|
|
ssbo_scalar.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
|
|
ssbo_scalar.content.m0s[0].b = ssbo_140.content.m0s[0].b;
|
|
ssbo_scalar.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
|
|
ssbo_scalar.content.m1s[0].b = ssbo_140.content.m1s[0].b;
|
|
ssbo_scalar.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
|
|
ssbo_scalar.content.m2s[0].b = ssbo_140.content.m2s[0].b;
|
|
ssbo_scalar.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
|
|
ssbo_scalar.content.m0.b = ssbo_140.content.m0.b;
|
|
ssbo_scalar.content.m1.a = float3(ssbo_140.content.m1.a);
|
|
ssbo_scalar.content.m1.b = ssbo_140.content.m1.b;
|
|
ssbo_scalar.content.m2.a[0] = ssbo_140.content.m2.a[0];
|
|
ssbo_scalar.content.m2.b = ssbo_140.content.m2.b;
|
|
ssbo_scalar.content.m3.a = ssbo_140.content.m3.a;
|
|
ssbo_scalar.content.m3.b = ssbo_140.content.m3.b;
|
|
ssbo_scalar.content.m4 = ssbo_140.content.m4;
|
|
ssbo_scalar.content.m3s[0].c = ssbo_140.content.m3s[0].c;
|
|
ssbo_scalar.content.m3s[1].c = ssbo_140.content.m3s[1].c;
|
|
ssbo_scalar.content.m3s[2].c = ssbo_140.content.m3s[2].c;
|
|
ssbo_scalar.content.m3s[3].c = ssbo_140.content.m3s[3].c;
|
|
ssbo_scalar.content.m3s[4].c = ssbo_140.content.m3s[4].c;
|
|
ssbo_scalar.content.m3s[5].c = ssbo_140.content.m3s[5].c;
|
|
ssbo_scalar.content.m3s[6].c = ssbo_140.content.m3s[6].c;
|
|
ssbo_scalar.content.m3s[7].c = ssbo_140.content.m3s[7].c;
|
|
ssbo_scalar.content.m1.a = float2x3(float3(ssbo_scalar.m2[1][0]), float3(ssbo_scalar.m2[1][1])) * float2(ssbo_scalar.content.m0.a[0]);
|
|
ssbo_scalar.m0 = float2x2(float2(ssbo_scalar2.m1[0]), float2(ssbo_scalar2.m1[1]));
|
|
ssbo_scalar2.m1[0] = transpose(ssbo_scalar.m4)[0];
|
|
ssbo_scalar2.m1[1] = transpose(ssbo_scalar.m4)[1];
|
|
ssbo_scalar2.m2[0] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[0];
|
|
ssbo_scalar2.m2[1] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[1];
|
|
}
|
|
|