mirror of
https://github.com/KhronosGroup/SPIRV-Cross.git
synced 2024-11-10 06:00:07 +00:00
MSL: Workaround broken vector -> scalar access chain in MSL.
On MSL, the compiler refuses to allow access chains into a normal vector type. What happens in practice instead is a read-modify-write where a vector type is loaded, modified and written back. The workaround is to convert a vector into a pointer-to-scalar before the access chain continues to add the scalar index.
This commit is contained in:
parent
fab75792a9
commit
fa5b206d97
@ -23,6 +23,6 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[
|
||||
float4 _52 = _50;
|
||||
_52.w = 90.0;
|
||||
_27.out_data[gl_GlobalInvocationID.x] = _52;
|
||||
_27.out_data[gl_GlobalInvocationID.x].y = 20.0;
|
||||
((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0;
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,6 @@ struct SSBORow
|
||||
|
||||
kernel void main0(device SSBORow& _4 [[buffer(0)]])
|
||||
{
|
||||
_4.v = _4.row_major0[2u][1];
|
||||
_4.v = ((device float*)&_4.row_major0[2u])[1];
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,8 @@ kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
half2 a = as_type<half2>(_25.inputs[ident].xy);
|
||||
_39.outputs[ident].x = int(as_type<uint>(a + half2(half(1.0))));
|
||||
_39.outputs[ident].y = as_type<int>(_25.inputs[ident].zw);
|
||||
_39.outputs[ident].z = int(as_type<uint>(ushort2(_25.inputs[ident].xy)));
|
||||
((device int*)&_39.outputs[ident])[0u] = int(as_type<uint>(a + half2(half(1.0))));
|
||||
((device int*)&_39.outputs[ident])[1u] = as_type<int>(_25.inputs[ident].zw);
|
||||
((device int*)&_39.outputs[ident])[2u] = int(as_type<uint>(ushort2(_25.inputs[ident].xy)));
|
||||
}
|
||||
|
||||
|
@ -23,9 +23,9 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
short2 _47 = as_type<short2>(_29.inputs[ident].x) + as_type<short2>(_40.const0.xy);
|
||||
short2 _47 = as_type<short2>(((device int*)&_29.inputs[ident])[0u]) + as_type<short2>(_40.const0.xy);
|
||||
_21.outputs[ident] = short4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
|
||||
short2 _66 = short2(as_type<ushort2>(uint(_29.inputs[ident].y)) - as_type<ushort2>(_40.const0.zw));
|
||||
short2 _66 = short2(as_type<ushort2>(uint(((device int*)&_29.inputs[ident])[1u])) - as_type<ushort2>(_40.const0.zw));
|
||||
_21.outputs[ident] = short4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
|
||||
}
|
||||
|
||||
|
@ -199,50 +199,50 @@ inline T spvFaceForward(T n, T i, T nref)
|
||||
|
||||
kernel void main0(device SSBO& _19 [[buffer(0)]])
|
||||
{
|
||||
_19.res = round(_19.f32.x);
|
||||
_19.res = rint(_19.f32.x);
|
||||
_19.res = trunc(_19.f32.x);
|
||||
_19.res = abs(_19.f32.x);
|
||||
_19.ires = abs(_19.s32.x);
|
||||
_19.res = sign(_19.f32.x);
|
||||
_19.ires = sign(_19.s32.x);
|
||||
_19.res = floor(_19.f32.x);
|
||||
_19.res = ceil(_19.f32.x);
|
||||
_19.res = fract(_19.f32.x);
|
||||
_19.res = radians(_19.f32.x);
|
||||
_19.res = degrees(_19.f32.x);
|
||||
_19.res = sin(_19.f32.x);
|
||||
_19.res = cos(_19.f32.x);
|
||||
_19.res = tan(_19.f32.x);
|
||||
_19.res = asin(_19.f32.x);
|
||||
_19.res = acos(_19.f32.x);
|
||||
_19.res = atan(_19.f32.x);
|
||||
_19.res = sinh(_19.f32.x);
|
||||
_19.res = cosh(_19.f32.x);
|
||||
_19.res = tanh(_19.f32.x);
|
||||
_19.res = asinh(_19.f32.x);
|
||||
_19.res = acosh(_19.f32.x);
|
||||
_19.res = atanh(_19.f32.x);
|
||||
_19.res = atan2(_19.f32.x, _19.f32.y);
|
||||
_19.res = pow(_19.f32.x, _19.f32.y);
|
||||
_19.res = exp(_19.f32.x);
|
||||
_19.res = log(_19.f32.x);
|
||||
_19.res = exp2(_19.f32.x);
|
||||
_19.res = log2(_19.f32.x);
|
||||
_19.res = sqrt(_19.f32.x);
|
||||
_19.res = rsqrt(_19.f32.x);
|
||||
_19.res = abs(_19.f32.x);
|
||||
_19.res = abs(_19.f32.x - _19.f32.y);
|
||||
_19.res = sign(_19.f32.x);
|
||||
_19.res = spvFaceForward(_19.f32.x, _19.f32.y, _19.f32.z);
|
||||
_19.res = spvReflect(_19.f32.x, _19.f32.y);
|
||||
_19.res = spvRefract(_19.f32.x, _19.f32.y, _19.f32.z);
|
||||
_19.res = round(((device float*)&_19.f32)[0u]);
|
||||
_19.res = rint(((device float*)&_19.f32)[0u]);
|
||||
_19.res = trunc(((device float*)&_19.f32)[0u]);
|
||||
_19.res = abs(((device float*)&_19.f32)[0u]);
|
||||
_19.ires = abs(((device int*)&_19.s32)[0u]);
|
||||
_19.res = sign(((device float*)&_19.f32)[0u]);
|
||||
_19.ires = sign(((device int*)&_19.s32)[0u]);
|
||||
_19.res = floor(((device float*)&_19.f32)[0u]);
|
||||
_19.res = ceil(((device float*)&_19.f32)[0u]);
|
||||
_19.res = fract(((device float*)&_19.f32)[0u]);
|
||||
_19.res = radians(((device float*)&_19.f32)[0u]);
|
||||
_19.res = degrees(((device float*)&_19.f32)[0u]);
|
||||
_19.res = sin(((device float*)&_19.f32)[0u]);
|
||||
_19.res = cos(((device float*)&_19.f32)[0u]);
|
||||
_19.res = tan(((device float*)&_19.f32)[0u]);
|
||||
_19.res = asin(((device float*)&_19.f32)[0u]);
|
||||
_19.res = acos(((device float*)&_19.f32)[0u]);
|
||||
_19.res = atan(((device float*)&_19.f32)[0u]);
|
||||
_19.res = sinh(((device float*)&_19.f32)[0u]);
|
||||
_19.res = cosh(((device float*)&_19.f32)[0u]);
|
||||
_19.res = tanh(((device float*)&_19.f32)[0u]);
|
||||
_19.res = asinh(((device float*)&_19.f32)[0u]);
|
||||
_19.res = acosh(((device float*)&_19.f32)[0u]);
|
||||
_19.res = atanh(((device float*)&_19.f32)[0u]);
|
||||
_19.res = atan2(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
|
||||
_19.res = pow(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
|
||||
_19.res = exp(((device float*)&_19.f32)[0u]);
|
||||
_19.res = log(((device float*)&_19.f32)[0u]);
|
||||
_19.res = exp2(((device float*)&_19.f32)[0u]);
|
||||
_19.res = log2(((device float*)&_19.f32)[0u]);
|
||||
_19.res = sqrt(((device float*)&_19.f32)[0u]);
|
||||
_19.res = rsqrt(((device float*)&_19.f32)[0u]);
|
||||
_19.res = abs(((device float*)&_19.f32)[0u]);
|
||||
_19.res = abs(((device float*)&_19.f32)[0u] - ((device float*)&_19.f32)[1u]);
|
||||
_19.res = sign(((device float*)&_19.f32)[0u]);
|
||||
_19.res = spvFaceForward(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
|
||||
_19.res = spvReflect(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
|
||||
_19.res = spvRefract(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
|
||||
_19.res = length(_19.f32.xy);
|
||||
_19.res = distance(_19.f32.xy, _19.f32.zw);
|
||||
float2 v2 = normalize(_19.f32.xy);
|
||||
v2 = faceforward(_19.f32.xy, _19.f32.yz, _19.f32.zw);
|
||||
v2 = reflect(_19.f32.xy, _19.f32.zw);
|
||||
v2 = refract(_19.f32.xy, _19.f32.yz, _19.f32.w);
|
||||
v2 = refract(_19.f32.xy, _19.f32.yz, ((device float*)&_19.f32)[3u]);
|
||||
float3 v3 = cross(_19.f32.xyz, _19.f32.yzw);
|
||||
_19.res = determinant(_19.m2);
|
||||
_19.res = determinant(_19.m3);
|
||||
@ -251,36 +251,36 @@ kernel void main0(device SSBO& _19 [[buffer(0)]])
|
||||
_19.m3 = spvInverse3x3(_19.m3);
|
||||
_19.m4 = spvInverse4x4(_19.m4);
|
||||
float tmp;
|
||||
float _287 = modf(_19.f32.x, tmp);
|
||||
float _287 = modf(((device float*)&_19.f32)[0u], tmp);
|
||||
_19.res = _287;
|
||||
_19.res = fast::min(_19.f32.x, _19.f32.y);
|
||||
_19.ures = min(_19.u32.x, _19.u32.y);
|
||||
_19.ires = min(_19.s32.x, _19.s32.y);
|
||||
_19.res = fast::max(_19.f32.x, _19.f32.y);
|
||||
_19.ures = max(_19.u32.x, _19.u32.y);
|
||||
_19.ires = max(_19.s32.x, _19.s32.y);
|
||||
_19.res = fast::clamp(_19.f32.x, _19.f32.y, _19.f32.z);
|
||||
_19.ures = clamp(_19.u32.x, _19.u32.y, _19.u32.z);
|
||||
_19.ires = clamp(_19.s32.x, _19.s32.y, _19.s32.z);
|
||||
_19.res = mix(_19.f32.x, _19.f32.y, _19.f32.z);
|
||||
_19.res = step(_19.f32.x, _19.f32.y);
|
||||
_19.res = smoothstep(_19.f32.x, _19.f32.y, _19.f32.z);
|
||||
_19.res = fma(_19.f32.x, _19.f32.y, _19.f32.z);
|
||||
_19.res = fast::min(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
|
||||
_19.ures = min(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]);
|
||||
_19.ires = min(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]);
|
||||
_19.res = fast::max(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
|
||||
_19.ures = max(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]);
|
||||
_19.ires = max(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]);
|
||||
_19.res = fast::clamp(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
|
||||
_19.ures = clamp(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u], ((device uint*)&_19.u32)[2u]);
|
||||
_19.ires = clamp(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u], ((device int*)&_19.s32)[2u]);
|
||||
_19.res = mix(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
|
||||
_19.res = step(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
|
||||
_19.res = smoothstep(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
|
||||
_19.res = fma(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
|
||||
ResType _387;
|
||||
_387._m0 = frexp(_19.f32.x, _387._m1);
|
||||
_387._m0 = frexp(((device float*)&_19.f32)[0u], _387._m1);
|
||||
int itmp = _387._m1;
|
||||
_19.res = _387._m0;
|
||||
_19.res = ldexp(_19.f32.x, itmp);
|
||||
_19.res = ldexp(((device float*)&_19.f32)[0u], itmp);
|
||||
_19.ures = pack_float_to_snorm4x8(_19.f32);
|
||||
_19.ures = pack_float_to_unorm4x8(_19.f32);
|
||||
_19.ures = pack_float_to_snorm2x16(_19.f32.xy);
|
||||
_19.ures = pack_float_to_unorm2x16(_19.f32.xy);
|
||||
_19.ures = as_type<uint>(half2(_19.f32.xy));
|
||||
v2 = unpack_snorm2x16_to_float(_19.u32.x);
|
||||
v2 = unpack_unorm2x16_to_float(_19.u32.x);
|
||||
v2 = float2(as_type<half2>(_19.u32.x));
|
||||
float4 v4 = unpack_snorm4x8_to_float(_19.u32.x);
|
||||
v4 = unpack_unorm4x8_to_float(_19.u32.x);
|
||||
v2 = unpack_snorm2x16_to_float(((device uint*)&_19.u32)[0u]);
|
||||
v2 = unpack_unorm2x16_to_float(((device uint*)&_19.u32)[0u]);
|
||||
v2 = float2(as_type<half2>(((device uint*)&_19.u32)[0u]));
|
||||
float4 v4 = unpack_snorm4x8_to_float(((device uint*)&_19.u32)[0u]);
|
||||
v4 = unpack_unorm4x8_to_float(((device uint*)&_19.u32)[0u]);
|
||||
_19.s32 = spvFindLSB(_19.s32);
|
||||
_19.s32 = int4(spvFindLSB(_19.u32));
|
||||
_19.s32 = spvFindSMSB(_19.s32);
|
||||
|
@ -0,0 +1,26 @@
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 v;
|
||||
float4x4 cm;
|
||||
float4x4 rm;
|
||||
packed_float3 v3;
|
||||
float f;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _12 [[buffer(0)]])
|
||||
{
|
||||
threadgroup float4 shared_vec4;
|
||||
threadgroup float3 shared_vec3;
|
||||
((device float*)&_12.v)[0u] = 10.0;
|
||||
_12.v3[1u] = 40.0;
|
||||
((device float*)&_12.cm[1])[2u] = 20.0;
|
||||
((device float*)&_12.rm[1u])[3] = 30.0;
|
||||
((threadgroup float*)&shared_vec4)[2u] = 40.0;
|
||||
((threadgroup float*)&shared_vec3)[1u] = 1.0;
|
||||
}
|
||||
|
@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -74,8 +74,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -74,8 +74,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0][1u] = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0][1u];
|
||||
v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -75,8 +75,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -79,8 +79,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -73,8 +73,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -74,8 +74,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -81,8 +81,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -77,8 +77,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -68,8 +68,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -80,8 +80,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0][1u] = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0][1u];
|
||||
v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -69,8 +69,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -70,8 +70,8 @@ void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
static inline __attribute__((always_inline))
|
||||
void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
|
||||
{
|
||||
v_29.col_major0[0].y = v_41.row_major0[1u][0];
|
||||
v_41.row_major0[1u][0] = v_29.col_major0[0].y;
|
||||
((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
|
||||
((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
|
||||
}
|
||||
|
||||
kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
|
||||
|
@ -21,7 +21,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
kernel void main0(device SSBO& _12 [[buffer(0)]])
|
||||
{
|
||||
_12.a.x = 10.0;
|
||||
((device float*)&_12.a)[0u] = 10.0;
|
||||
_12.b = 20.0;
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,6 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[
|
||||
v.z = 70.0;
|
||||
v.w = 90.0;
|
||||
_27.out_data[gl_GlobalInvocationID.x] = v;
|
||||
_27.out_data[gl_GlobalInvocationID.x].y = 20.0;
|
||||
((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0;
|
||||
}
|
||||
|
||||
|
25
shaders-msl-no-opt/packing/isolated-scalar-access.comp
Normal file
25
shaders-msl-no-opt/packing/isolated-scalar-access.comp
Normal file
@ -0,0 +1,25 @@
|
||||
#version 450
|
||||
|
||||
layout(set = 0, binding = 0) buffer SSBO
|
||||
{
|
||||
vec4 v;
|
||||
mat4 cm;
|
||||
layout(row_major) mat4 rm;
|
||||
|
||||
vec3 v3;
|
||||
float f;
|
||||
};
|
||||
|
||||
shared vec4 shared_vec4;
|
||||
shared vec3 shared_vec3;
|
||||
|
||||
void main()
|
||||
{
|
||||
v.x = 10.0;
|
||||
v3.y = 40.0;
|
||||
cm[1][2] = 20.0;
|
||||
rm[3][1] = 30.0;
|
||||
|
||||
shared_vec4.z = 40.0;
|
||||
shared_vec3.y = 1.0;
|
||||
}
|
@ -7691,6 +7691,23 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
}
|
||||
}
|
||||
|
||||
// Internally, access chain implementation can also be used on composites,
|
||||
// ignore scalar access workarounds in this case.
|
||||
StorageClass effective_storage;
|
||||
if (expression_type(base).pointer)
|
||||
effective_storage = get_expression_effective_storage_class(base);
|
||||
else
|
||||
effective_storage = StorageClassGeneric;
|
||||
|
||||
if (!row_major_matrix_needs_conversion)
|
||||
{
|
||||
// On some backends, we might not be able to safely access individual scalars in a vector.
|
||||
// To work around this, we might have to cast the access chain reference to something which can,
|
||||
// like a pointer to scalar, which we can then index into.
|
||||
prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
|
||||
is_packed);
|
||||
}
|
||||
|
||||
if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
|
||||
{
|
||||
expr += ".";
|
||||
@ -7722,6 +7739,12 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
expr += "]";
|
||||
}
|
||||
|
||||
if (row_major_matrix_needs_conversion)
|
||||
{
|
||||
prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
|
||||
is_packed);
|
||||
}
|
||||
|
||||
expr += deferred_index;
|
||||
row_major_matrix_needs_conversion = false;
|
||||
|
||||
@ -7752,6 +7775,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
|
||||
return expr;
|
||||
}
|
||||
|
||||
void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
|
||||
{
|
||||
}
|
||||
|
||||
string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
|
||||
{
|
||||
return sanitize_underscores(join(basename, "_", to_member_name(type, index)));
|
||||
|
@ -565,6 +565,9 @@ protected:
|
||||
std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
|
||||
AccessChainMeta *meta);
|
||||
|
||||
virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
|
||||
spv::StorageClass storage, bool &is_packed);
|
||||
|
||||
std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
|
||||
AccessChainMeta *meta = nullptr, bool ptr_chain = false);
|
||||
|
||||
|
@ -5859,6 +5859,23 @@ bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs)
|
||||
(builtin == BuiltInTessLevelOuter && c->scalar() == 3);
|
||||
}
|
||||
|
||||
void CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
|
||||
spv::StorageClass storage, bool &is_packed)
|
||||
{
|
||||
// If there is any risk of writes happening with the access chain in question,
|
||||
// and there is a risk of concurrent write access to other components,
|
||||
// we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
|
||||
// The MSL compiler refuses to allow component-level access for any non-packed vector types.
|
||||
if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
|
||||
{
|
||||
const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
|
||||
expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
|
||||
|
||||
// Further indexing should happen with packed rules (array index, not swizzle).
|
||||
is_packed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Override for MSL-specific syntax instructions
|
||||
void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
|
@ -793,6 +793,8 @@ protected:
|
||||
|
||||
void analyze_sampled_image_usage();
|
||||
|
||||
void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
|
||||
bool &is_packed) override;
|
||||
bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
|
||||
bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr);
|
||||
bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);
|
||||
|
Loading…
Reference in New Issue
Block a user