MSL: Workaround broken vector -> scalar access chain in MSL.

On MSL, the compiler refuses to allow access chains into a normal vector type.
What happens in practice instead is a read-modify-write where a vector type is
loaded, modified and written back.

The workaround is to convert a vector into a pointer-to-scalar before
the access chain continues to add the scalar index.
This commit is contained in:
Hans-Kristian Arntzen 2020-07-01 13:02:11 +02:00
parent fab75792a9
commit fa5b206d97
39 changed files with 221 additions and 121 deletions

View File

@ -23,6 +23,6 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[
float4 _52 = _50;
_52.w = 90.0;
_27.out_data[gl_GlobalInvocationID.x] = _52;
_27.out_data[gl_GlobalInvocationID.x].y = 20.0;
((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0;
}

View File

@ -11,6 +11,6 @@ struct SSBORow
kernel void main0(device SSBORow& _4 [[buffer(0)]])
{
_4.v = _4.row_major0[2u][1];
_4.v = ((device float*)&_4.row_major0[2u])[1];
}

View File

@ -19,8 +19,8 @@ kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)
{
uint ident = gl_GlobalInvocationID.x;
half2 a = as_type<half2>(_25.inputs[ident].xy);
_39.outputs[ident].x = int(as_type<uint>(a + half2(half(1.0))));
_39.outputs[ident].y = as_type<int>(_25.inputs[ident].zw);
_39.outputs[ident].z = int(as_type<uint>(ushort2(_25.inputs[ident].xy)));
((device int*)&_39.outputs[ident])[0u] = int(as_type<uint>(a + half2(half(1.0))));
((device int*)&_39.outputs[ident])[1u] = as_type<int>(_25.inputs[ident].zw);
((device int*)&_39.outputs[ident])[2u] = int(as_type<uint>(ushort2(_25.inputs[ident].xy)));
}

View File

@ -23,9 +23,9 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint ident = gl_GlobalInvocationID.x;
short2 _47 = as_type<short2>(_29.inputs[ident].x) + as_type<short2>(_40.const0.xy);
short2 _47 = as_type<short2>(((device int*)&_29.inputs[ident])[0u]) + as_type<short2>(_40.const0.xy);
_21.outputs[ident] = short4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
short2 _66 = short2(as_type<ushort2>(uint(_29.inputs[ident].y)) - as_type<ushort2>(_40.const0.zw));
short2 _66 = short2(as_type<ushort2>(uint(((device int*)&_29.inputs[ident])[1u])) - as_type<ushort2>(_40.const0.zw));
_21.outputs[ident] = short4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
}

View File

@ -199,50 +199,50 @@ inline T spvFaceForward(T n, T i, T nref)
kernel void main0(device SSBO& _19 [[buffer(0)]])
{
_19.res = round(_19.f32.x);
_19.res = rint(_19.f32.x);
_19.res = trunc(_19.f32.x);
_19.res = abs(_19.f32.x);
_19.ires = abs(_19.s32.x);
_19.res = sign(_19.f32.x);
_19.ires = sign(_19.s32.x);
_19.res = floor(_19.f32.x);
_19.res = ceil(_19.f32.x);
_19.res = fract(_19.f32.x);
_19.res = radians(_19.f32.x);
_19.res = degrees(_19.f32.x);
_19.res = sin(_19.f32.x);
_19.res = cos(_19.f32.x);
_19.res = tan(_19.f32.x);
_19.res = asin(_19.f32.x);
_19.res = acos(_19.f32.x);
_19.res = atan(_19.f32.x);
_19.res = sinh(_19.f32.x);
_19.res = cosh(_19.f32.x);
_19.res = tanh(_19.f32.x);
_19.res = asinh(_19.f32.x);
_19.res = acosh(_19.f32.x);
_19.res = atanh(_19.f32.x);
_19.res = atan2(_19.f32.x, _19.f32.y);
_19.res = pow(_19.f32.x, _19.f32.y);
_19.res = exp(_19.f32.x);
_19.res = log(_19.f32.x);
_19.res = exp2(_19.f32.x);
_19.res = log2(_19.f32.x);
_19.res = sqrt(_19.f32.x);
_19.res = rsqrt(_19.f32.x);
_19.res = abs(_19.f32.x);
_19.res = abs(_19.f32.x - _19.f32.y);
_19.res = sign(_19.f32.x);
_19.res = spvFaceForward(_19.f32.x, _19.f32.y, _19.f32.z);
_19.res = spvReflect(_19.f32.x, _19.f32.y);
_19.res = spvRefract(_19.f32.x, _19.f32.y, _19.f32.z);
_19.res = round(((device float*)&_19.f32)[0u]);
_19.res = rint(((device float*)&_19.f32)[0u]);
_19.res = trunc(((device float*)&_19.f32)[0u]);
_19.res = abs(((device float*)&_19.f32)[0u]);
_19.ires = abs(((device int*)&_19.s32)[0u]);
_19.res = sign(((device float*)&_19.f32)[0u]);
_19.ires = sign(((device int*)&_19.s32)[0u]);
_19.res = floor(((device float*)&_19.f32)[0u]);
_19.res = ceil(((device float*)&_19.f32)[0u]);
_19.res = fract(((device float*)&_19.f32)[0u]);
_19.res = radians(((device float*)&_19.f32)[0u]);
_19.res = degrees(((device float*)&_19.f32)[0u]);
_19.res = sin(((device float*)&_19.f32)[0u]);
_19.res = cos(((device float*)&_19.f32)[0u]);
_19.res = tan(((device float*)&_19.f32)[0u]);
_19.res = asin(((device float*)&_19.f32)[0u]);
_19.res = acos(((device float*)&_19.f32)[0u]);
_19.res = atan(((device float*)&_19.f32)[0u]);
_19.res = sinh(((device float*)&_19.f32)[0u]);
_19.res = cosh(((device float*)&_19.f32)[0u]);
_19.res = tanh(((device float*)&_19.f32)[0u]);
_19.res = asinh(((device float*)&_19.f32)[0u]);
_19.res = acosh(((device float*)&_19.f32)[0u]);
_19.res = atanh(((device float*)&_19.f32)[0u]);
_19.res = atan2(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
_19.res = pow(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
_19.res = exp(((device float*)&_19.f32)[0u]);
_19.res = log(((device float*)&_19.f32)[0u]);
_19.res = exp2(((device float*)&_19.f32)[0u]);
_19.res = log2(((device float*)&_19.f32)[0u]);
_19.res = sqrt(((device float*)&_19.f32)[0u]);
_19.res = rsqrt(((device float*)&_19.f32)[0u]);
_19.res = abs(((device float*)&_19.f32)[0u]);
_19.res = abs(((device float*)&_19.f32)[0u] - ((device float*)&_19.f32)[1u]);
_19.res = sign(((device float*)&_19.f32)[0u]);
_19.res = spvFaceForward(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
_19.res = spvReflect(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
_19.res = spvRefract(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
_19.res = length(_19.f32.xy);
_19.res = distance(_19.f32.xy, _19.f32.zw);
float2 v2 = normalize(_19.f32.xy);
v2 = faceforward(_19.f32.xy, _19.f32.yz, _19.f32.zw);
v2 = reflect(_19.f32.xy, _19.f32.zw);
v2 = refract(_19.f32.xy, _19.f32.yz, _19.f32.w);
v2 = refract(_19.f32.xy, _19.f32.yz, ((device float*)&_19.f32)[3u]);
float3 v3 = cross(_19.f32.xyz, _19.f32.yzw);
_19.res = determinant(_19.m2);
_19.res = determinant(_19.m3);
@ -251,36 +251,36 @@ kernel void main0(device SSBO& _19 [[buffer(0)]])
_19.m3 = spvInverse3x3(_19.m3);
_19.m4 = spvInverse4x4(_19.m4);
float tmp;
float _287 = modf(_19.f32.x, tmp);
float _287 = modf(((device float*)&_19.f32)[0u], tmp);
_19.res = _287;
_19.res = fast::min(_19.f32.x, _19.f32.y);
_19.ures = min(_19.u32.x, _19.u32.y);
_19.ires = min(_19.s32.x, _19.s32.y);
_19.res = fast::max(_19.f32.x, _19.f32.y);
_19.ures = max(_19.u32.x, _19.u32.y);
_19.ires = max(_19.s32.x, _19.s32.y);
_19.res = fast::clamp(_19.f32.x, _19.f32.y, _19.f32.z);
_19.ures = clamp(_19.u32.x, _19.u32.y, _19.u32.z);
_19.ires = clamp(_19.s32.x, _19.s32.y, _19.s32.z);
_19.res = mix(_19.f32.x, _19.f32.y, _19.f32.z);
_19.res = step(_19.f32.x, _19.f32.y);
_19.res = smoothstep(_19.f32.x, _19.f32.y, _19.f32.z);
_19.res = fma(_19.f32.x, _19.f32.y, _19.f32.z);
_19.res = fast::min(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
_19.ures = min(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]);
_19.ires = min(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]);
_19.res = fast::max(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
_19.ures = max(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]);
_19.ires = max(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]);
_19.res = fast::clamp(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
_19.ures = clamp(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u], ((device uint*)&_19.u32)[2u]);
_19.ires = clamp(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u], ((device int*)&_19.s32)[2u]);
_19.res = mix(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
_19.res = step(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
_19.res = smoothstep(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
_19.res = fma(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
ResType _387;
_387._m0 = frexp(_19.f32.x, _387._m1);
_387._m0 = frexp(((device float*)&_19.f32)[0u], _387._m1);
int itmp = _387._m1;
_19.res = _387._m0;
_19.res = ldexp(_19.f32.x, itmp);
_19.res = ldexp(((device float*)&_19.f32)[0u], itmp);
_19.ures = pack_float_to_snorm4x8(_19.f32);
_19.ures = pack_float_to_unorm4x8(_19.f32);
_19.ures = pack_float_to_snorm2x16(_19.f32.xy);
_19.ures = pack_float_to_unorm2x16(_19.f32.xy);
_19.ures = as_type<uint>(half2(_19.f32.xy));
v2 = unpack_snorm2x16_to_float(_19.u32.x);
v2 = unpack_unorm2x16_to_float(_19.u32.x);
v2 = float2(as_type<half2>(_19.u32.x));
float4 v4 = unpack_snorm4x8_to_float(_19.u32.x);
v4 = unpack_unorm4x8_to_float(_19.u32.x);
v2 = unpack_snorm2x16_to_float(((device uint*)&_19.u32)[0u]);
v2 = unpack_unorm2x16_to_float(((device uint*)&_19.u32)[0u]);
v2 = float2(as_type<half2>(((device uint*)&_19.u32)[0u]));
float4 v4 = unpack_snorm4x8_to_float(((device uint*)&_19.u32)[0u]);
v4 = unpack_unorm4x8_to_float(((device uint*)&_19.u32)[0u]);
_19.s32 = spvFindLSB(_19.s32);
_19.s32 = int4(spvFindLSB(_19.u32));
_19.s32 = spvFindSMSB(_19.s32);

View File

@ -0,0 +1,26 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct SSBO
{
float4 v;
float4x4 cm;
float4x4 rm;
packed_float3 v3;
float f;
};
kernel void main0(device SSBO& _12 [[buffer(0)]])
{
threadgroup float4 shared_vec4;
threadgroup float3 shared_vec3;
((device float*)&_12.v)[0u] = 10.0;
_12.v3[1u] = 40.0;
((device float*)&_12.cm[1])[2u] = 20.0;
((device float*)&_12.rm[1u])[3] = 30.0;
((threadgroup float*)&shared_vec4)[2u] = 40.0;
((threadgroup float*)&shared_vec3)[1u] = 1.0;
}

View File

@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -74,8 +74,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -74,8 +74,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0][1u] = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0][1u];
v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -75,8 +75,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -79,8 +79,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -73,8 +73,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -74,8 +74,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -81,8 +81,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -77,8 +77,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -80,8 +80,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0][1u] = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0][1u];
v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
static inline __attribute__((always_inline))
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
{
v_29.col_major0[0].y = v_41.row_major0[1u][0];
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
}
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])

View File

@ -21,7 +21,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
kernel void main0(device SSBO& _12 [[buffer(0)]])
{
_12.a.x = 10.0;
((device float*)&_12.a)[0u] = 10.0;
_12.b = 20.0;
}

View File

@ -18,6 +18,6 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[
v.z = 70.0;
v.w = 90.0;
_27.out_data[gl_GlobalInvocationID.x] = v;
_27.out_data[gl_GlobalInvocationID.x].y = 20.0;
((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0;
}

View File

@ -0,0 +1,25 @@
#version 450
layout(set = 0, binding = 0) buffer SSBO
{
vec4 v;
mat4 cm;
layout(row_major) mat4 rm;
vec3 v3;
float f;
};
shared vec4 shared_vec4;
shared vec3 shared_vec3;
void main()
{
v.x = 10.0;
v3.y = 40.0;
cm[1][2] = 20.0;
rm[3][1] = 30.0;
shared_vec4.z = 40.0;
shared_vec3.y = 1.0;
}

View File

@ -7691,6 +7691,23 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
}
}
// Internally, access chain implementation can also be used on composites,
// ignore scalar access workarounds in this case.
StorageClass effective_storage;
if (expression_type(base).pointer)
effective_storage = get_expression_effective_storage_class(base);
else
effective_storage = StorageClassGeneric;
if (!row_major_matrix_needs_conversion)
{
// On some backends, we might not be able to safely access individual scalars in a vector.
// To work around this, we might have to cast the access chain reference to something which can,
// like a pointer to scalar, which we can then index into.
prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
is_packed);
}
if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
{
expr += ".";
@ -7722,6 +7739,12 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
expr += "]";
}
if (row_major_matrix_needs_conversion)
{
prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
is_packed);
}
expr += deferred_index;
row_major_matrix_needs_conversion = false;
@ -7752,6 +7775,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
return expr;
}
void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
{
}
string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
{
return sanitize_underscores(join(basename, "_", to_member_name(type, index)));

View File

@ -565,6 +565,9 @@ protected:
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
AccessChainMeta *meta);
virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
spv::StorageClass storage, bool &is_packed);
std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
AccessChainMeta *meta = nullptr, bool ptr_chain = false);

View File

@ -5859,6 +5859,23 @@ bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs)
(builtin == BuiltInTessLevelOuter && c->scalar() == 3);
}
void CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
spv::StorageClass storage, bool &is_packed)
{
// If there is any risk of writes happening with the access chain in question,
// and there is a risk of concurrent write access to other components,
// we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
// The MSL compiler refuses to allow component-level access for any non-packed vector types.
if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
{
const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
// Further indexing should happen with packed rules (array index, not swizzle).
is_packed = true;
}
}
// Override for MSL-specific syntax instructions
void CompilerMSL::emit_instruction(const Instruction &instruction)
{

View File

@ -793,6 +793,8 @@ protected:
void analyze_sampled_image_usage();
void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
bool &is_packed) override;
bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr);
bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);