SPIRV-Cross/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp
Hans-Kristian Arntzen 711300baed MSL: Do not emit swizzled writes in packing fixups.
Similar to scalar access chain fix, this causes a read-modify-write on
memory we're not supposed to write to.
2020-07-06 10:03:46 +02:00

103 lines
5.4 KiB
Plaintext

#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef packed_float3 packed_float3x3[3];
typedef packed_float3 packed_rm_float3x3[3];
struct SSBOCol
{
packed_float3x3 col_major0;
packed_float3x3 col_major1;
};
struct SSBORow
{
packed_rm_float3x3 row_major0;
packed_rm_float3x3 row_major1;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
static inline __attribute__((always_inline))
void load_store_to_variable_col_major(device SSBOCol& v_29)
{
float3x3 loaded = float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]));
v_29.col_major1[0] = loaded[0];
v_29.col_major1[1] = loaded[1];
v_29.col_major1[2] = loaded[2];
}
static inline __attribute__((always_inline))
void load_store_to_variable_row_major(device SSBORow& v_41)
{
float3x3 loaded = transpose(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2])));
v_41.row_major0[0] = float3(loaded[0][0], loaded[1][0], loaded[2][0]);
v_41.row_major0[1] = float3(loaded[0][1], loaded[1][1], loaded[2][1]);
v_41.row_major0[2] = float3(loaded[0][2], loaded[1][2], loaded[2][2]);
}
static inline __attribute__((always_inline))
void copy_col_major_to_col_major(device SSBOCol& v_29)
{
v_29.col_major0[0] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[0];
v_29.col_major0[1] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[1];
v_29.col_major0[2] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[2];
}
static inline __attribute__((always_inline))
void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
{
v_41.row_major0[0] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][0], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][0], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][0]);
v_41.row_major0[1] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][1], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][1], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][1]);
v_41.row_major0[2] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][2], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][2], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][2]);
}
static inline __attribute__((always_inline))
void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][0], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][0], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][0]);
v_29.col_major0[1] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][1], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][1], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][1]);
v_29.col_major0[2] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][2], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][2], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][2]);
}
static inline __attribute__((always_inline))
void copy_row_major_to_row_major(device SSBORow& v_41)
{
v_41.row_major0[0] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[0];
v_41.row_major0[1] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[1];
v_41.row_major0[2] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[2];
}
static inline __attribute__((always_inline))
void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1][0];
((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1][1];
((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1][2];
}
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0][1u] = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0][1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
{
load_store_to_variable_col_major(v_29);
load_store_to_variable_row_major(v_41);
copy_col_major_to_col_major(v_29);
copy_col_major_to_row_major(v_29, v_41);
copy_row_major_to_col_major(v_29, v_41);
copy_row_major_to_row_major(v_41);
copy_columns(v_29, v_41);
copy_elements(v_29, v_41);
}