diff --git a/reference/opt/shaders-hlsl/comp/access-chains.comp b/reference/opt/shaders-hlsl/comp/access-chains.comp index 924e9191..c748200b 100644 --- a/reference/opt/shaders-hlsl/comp/access-chains.comp +++ b/reference/opt/shaders-hlsl/comp/access-chains.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer wo : register(u1); ByteAddressBuffer ro : register(t0); diff --git a/reference/opt/shaders-hlsl/comp/address-buffers.comp b/reference/opt/shaders-hlsl/comp/address-buffers.comp index a252fc8a..7f1c7975 100644 --- a/reference/opt/shaders-hlsl/comp/address-buffers.comp +++ b/reference/opt/shaders-hlsl/comp/address-buffers.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer WriteOnly : register(u2); ByteAddressBuffer ReadOnly : register(t0); RWByteAddressBuffer ReadWrite : register(u1); diff --git a/reference/opt/shaders-hlsl/comp/atomic.comp b/reference/opt/shaders-hlsl/comp/atomic.comp index 72e15bf7..e6ff891e 100644 --- a/reference/opt/shaders-hlsl/comp/atomic.comp +++ b/reference/opt/shaders-hlsl/comp/atomic.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer ssbo : register(u2); RWTexture2D uImage : register(u0); RWTexture2D iImage : register(u1); diff --git a/reference/opt/shaders-hlsl/comp/globallycoherent.comp b/reference/opt/shaders-hlsl/comp/globallycoherent.comp index 1637727d..b5f1e377 100644 --- a/reference/opt/shaders-hlsl/comp/globallycoherent.comp +++ b/reference/opt/shaders-hlsl/comp/globallycoherent.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + globallycoherent RWByteAddressBuffer _29 : register(u3); ByteAddressBuffer _33 : register(t2); RWTexture2D uImageIn : register(u0); diff --git a/reference/opt/shaders-hlsl/comp/image.comp b/reference/opt/shaders-hlsl/comp/image.comp index 6c2b58cd..e2f6b0a3 100644 --- a/reference/opt/shaders-hlsl/comp/image.comp +++ b/reference/opt/shaders-hlsl/comp/image.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWTexture2D uImageInF : register(u0); RWTexture2D uImageOutF : register(u1); RWTexture2D uImageInI : register(u2); diff --git a/reference/opt/shaders-hlsl/comp/inverse.comp b/reference/opt/shaders-hlsl/comp/inverse.comp index 3be954a6..f9ec89aa 100644 --- a/reference/opt/shaders-hlsl/comp/inverse.comp +++ b/reference/opt/shaders-hlsl/comp/inverse.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _15 : register(u0); ByteAddressBuffer _20 : register(t1); diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp index dee39e3d..dc87dc84 100644 --- a/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp +++ b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); cbuffer SPIRV_Cross_NumWorkgroups : register(b0) { diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp index 1c98e5e5..2e2ad55f 100644 --- a/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp +++ b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); cbuffer SPIRV_Cross_NumWorkgroups : register(b0) { diff --git a/reference/opt/shaders-hlsl/comp/outer-product.comp b/reference/opt/shaders-hlsl/comp/outer-product.comp index 71613d4f..e58c02fe 100644 --- a/reference/opt/shaders-hlsl/comp/outer-product.comp +++ b/reference/opt/shaders-hlsl/comp/outer-product.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _21 : register(u0); ByteAddressBuffer _26 : register(t1); diff --git a/reference/opt/shaders-hlsl/comp/rmw-matrix.comp b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp index ed666693..30ac03f8 100644 --- a/reference/opt/shaders-hlsl/comp/rmw-matrix.comp +++ b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u0); void comp_main() diff --git a/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp b/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp index 42103c2b..a6621c97 100644 --- a/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp +++ b/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _28 : register(u0); cbuffer UBO : register(b1) { @@ -8,57 +10,57 @@ cbuffer UBO : register(b1) void comp_main() { - float4x4 _253 = asfloat(uint4x4(_28.Load(64), _28.Load(80), _28.Load(96), _28.Load(112), _28.Load(68), _28.Load(84), _28.Load(100), _28.Load(116), _28.Load(72), _28.Load(88), _28.Load(104), _28.Load(120), _28.Load(76), _28.Load(92), _28.Load(108), _28.Load(124))); - _28.Store4(0, asuint(_253[0])); - _28.Store4(16, asuint(_253[1])); - _28.Store4(32, asuint(_253[2])); - _28.Store4(48, asuint(_253[3])); - float2x2 _256 = asfloat(uint2x2(_28.Load(144), _28.Load(152), _28.Load(148), _28.Load(156))); - _28.Store2(128, asuint(_256[0])); - _28.Store2(136, asuint(_256[1])); - float2x3 _259 = asfloat(uint2x3(_28.Load(192), _28.Load(200), _28.Load(208), _28.Load(196), _28.Load(204), _28.Load(212))); - _28.Store3(160, asuint(_259[0])); - _28.Store3(176, asuint(_259[1])); - float3x2 _262 = asfloat(uint3x2(_28.Load(240), _28.Load(256), _28.Load(244), _28.Load(260), _28.Load(248), _28.Load(264))); - _28.Store2(216, asuint(_262[0])); - _28.Store2(224, asuint(_262[1])); - _28.Store2(232, asuint(_262[2])); - float4x4 _265 = asfloat(uint4x4(_28.Load4(0), _28.Load4(16), _28.Load4(32), _28.Load4(48))); - _28.Store(64, asuint(_265[0].x)); - _28.Store(68, asuint(_265[1].x)); - _28.Store(72, asuint(_265[2].x)); - _28.Store(76, asuint(_265[3].x)); - _28.Store(80, asuint(_265[0].y)); - _28.Store(84, asuint(_265[1].y)); - _28.Store(88, asuint(_265[2].y)); - _28.Store(92, asuint(_265[3].y)); - _28.Store(96, asuint(_265[0].z)); - _28.Store(100, asuint(_265[1].z)); - _28.Store(104, asuint(_265[2].z)); - _28.Store(108, asuint(_265[3].z)); - _28.Store(112, asuint(_265[0].w)); - _28.Store(116, asuint(_265[1].w)); - _28.Store(120, asuint(_265[2].w)); - _28.Store(124, asuint(_265[3].w)); - float2x2 _268 = asfloat(uint2x2(_28.Load2(128), _28.Load2(136))); - _28.Store(144, asuint(_268[0].x)); - _28.Store(148, asuint(_268[1].x)); - _28.Store(152, asuint(_268[0].y)); - _28.Store(156, asuint(_268[1].y)); - float2x3 _271 = asfloat(uint2x3(_28.Load3(160), _28.Load3(176))); - _28.Store(192, asuint(_271[0].x)); - _28.Store(196, asuint(_271[1].x)); - _28.Store(200, asuint(_271[0].y)); - _28.Store(204, asuint(_271[1].y)); - _28.Store(208, asuint(_271[0].z)); - _28.Store(212, asuint(_271[1].z)); - float3x2 _274 = asfloat(uint3x2(_28.Load2(216), _28.Load2(224), _28.Load2(232))); - _28.Store(240, asuint(_274[0].x)); - _28.Store(244, asuint(_274[1].x)); - _28.Store(248, asuint(_274[2].x)); - _28.Store(256, asuint(_274[0].y)); - _28.Store(260, asuint(_274[1].y)); - _28.Store(264, asuint(_274[2].y)); + float4x4 _257 = asfloat(uint4x4(_28.Load(64), _28.Load(80), _28.Load(96), _28.Load(112), _28.Load(68), _28.Load(84), _28.Load(100), _28.Load(116), _28.Load(72), _28.Load(88), _28.Load(104), _28.Load(120), _28.Load(76), _28.Load(92), _28.Load(108), _28.Load(124))); + _28.Store4(0, asuint(_257[0])); + _28.Store4(16, asuint(_257[1])); + _28.Store4(32, asuint(_257[2])); + _28.Store4(48, asuint(_257[3])); + float2x2 _260 = asfloat(uint2x2(_28.Load(144), _28.Load(152), _28.Load(148), _28.Load(156))); + _28.Store2(128, asuint(_260[0])); + _28.Store2(136, asuint(_260[1])); + float2x3 _263 = asfloat(uint2x3(_28.Load(192), _28.Load(200), _28.Load(208), _28.Load(196), _28.Load(204), _28.Load(212))); + _28.Store3(160, asuint(_263[0])); + _28.Store3(176, asuint(_263[1])); + float3x2 _266 = asfloat(uint3x2(_28.Load(240), _28.Load(256), _28.Load(244), _28.Load(260), _28.Load(248), _28.Load(264))); + _28.Store2(216, asuint(_266[0])); + _28.Store2(224, asuint(_266[1])); + _28.Store2(232, asuint(_266[2])); + float4x4 _269 = asfloat(uint4x4(_28.Load4(0), _28.Load4(16), _28.Load4(32), _28.Load4(48))); + _28.Store(64, asuint(_269[0].x)); + _28.Store(68, asuint(_269[1].x)); + _28.Store(72, asuint(_269[2].x)); + _28.Store(76, asuint(_269[3].x)); + _28.Store(80, asuint(_269[0].y)); + _28.Store(84, asuint(_269[1].y)); + _28.Store(88, asuint(_269[2].y)); + _28.Store(92, asuint(_269[3].y)); + _28.Store(96, asuint(_269[0].z)); + _28.Store(100, asuint(_269[1].z)); + _28.Store(104, asuint(_269[2].z)); + _28.Store(108, asuint(_269[3].z)); + _28.Store(112, asuint(_269[0].w)); + _28.Store(116, asuint(_269[1].w)); + _28.Store(120, asuint(_269[2].w)); + _28.Store(124, asuint(_269[3].w)); + float2x2 _272 = asfloat(uint2x2(_28.Load2(128), _28.Load2(136))); + _28.Store(144, asuint(_272[0].x)); + _28.Store(148, asuint(_272[1].x)); + _28.Store(152, asuint(_272[0].y)); + _28.Store(156, asuint(_272[1].y)); + float2x3 _275 = asfloat(uint2x3(_28.Load3(160), _28.Load3(176))); + _28.Store(192, asuint(_275[0].x)); + _28.Store(196, asuint(_275[1].x)); + _28.Store(200, asuint(_275[0].y)); + _28.Store(204, asuint(_275[1].y)); + _28.Store(208, asuint(_275[0].z)); + _28.Store(212, asuint(_275[1].z)); + float3x2 _278 = asfloat(uint3x2(_28.Load2(216), _28.Load2(224), _28.Load2(232))); + _28.Store(240, asuint(_278[0].x)); + _28.Store(244, asuint(_278[1].x)); + _28.Store(248, asuint(_278[2].x)); + _28.Store(256, asuint(_278[0].y)); + _28.Store(260, asuint(_278[1].y)); + _28.Store(264, asuint(_278[2].y)); _28.Store(_68_index0 * 4 + _68_index1 * 16 + 64, asuint(1.0f)); _28.Store(_68_index0 * 4 + _68_index1 * 8 + 144, asuint(2.0f)); _28.Store(_68_index0 * 4 + _68_index1 * 8 + 192, asuint(3.0f)); diff --git a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp index 47f2fe41..db2bbe96 100644 --- a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _9 : register(u0); void comp_main() diff --git a/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp b/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp index c4537db0..4e7c5e61 100644 --- a/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp +++ b/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp @@ -28,6 +28,7 @@ static const int d = (c + 50); #define SPIRV_CROSS_CONSTANT_ID_3 400 #endif static const int e = SPIRV_CROSS_CONSTANT_ID_3; +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); RWByteAddressBuffer _22 : register(u0); diff --git a/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp b/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp index 2e3df626..82657cac 100644 --- a/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp +++ b/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u1); void comp_main() diff --git a/reference/opt/shaders-hlsl/comp/ssbo-array.comp b/reference/opt/shaders-hlsl/comp/ssbo-array.comp index d8bce8d5..ee202a22 100644 --- a/reference/opt/shaders-hlsl/comp/ssbo-array.comp +++ b/reference/opt/shaders-hlsl/comp/ssbo-array.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + void comp_main() { } diff --git a/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp index dabc7df9..d89e3abc 100644 --- a/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp +++ b/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _9 : register(u0, space0); static uint4 gl_SubgroupEqMask; diff --git a/reference/opt/shaders-msl/asm/frag/accesschain-invalid-expression.asm.frag b/reference/opt/shaders-msl/asm/frag/accesschain-invalid-expression.asm.frag index 92825795..5255db96 100644 --- a/reference/opt/shaders-msl/asm/frag/accesschain-invalid-expression.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/accesschain-invalid-expression.asm.frag @@ -359,7 +359,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu } float3 _484 = float3(_276); float3 _488; - _488 = ((float3(_423 * fast::max(0.0, dot(_206, MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz))) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightColor.xyz) * (_270 + float3(_276 * (_439 * fast::min(_446 * _446, 65504.0))))) + ((_481 * float3(fast::clamp(1.0, 0.0, 1.0))) * _484); + _488 = ((float3(_423 * fast::max(0.0, dot(_206, MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz))) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightColor.xyz) * (_270 + float3(_276 * (_439 * fast::min(_446 * _446, 65504.0))))) + (_481 * _484); float _537; int _491 = 0; for (;;) @@ -393,7 +393,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu break; } } - float3 _567 = (mix(_488 + fast::max(float3(0.0), float3(0.0)), _270 + _484, float3(View.View_UnlitViewmodeMask)) * float3(in.in_var_TEXCOORD7.w)) + in.in_var_TEXCOORD7.xyz; + float3 _567 = (mix(_488, _270 + _484, float3(View.View_UnlitViewmodeMask)) * float3(in.in_var_TEXCOORD7.w)) + in.in_var_TEXCOORD7.xyz; float4 _571 = float4(_567.x, _567.y, _567.z, _137.w); _571.w = fast::min(in.in_var_TEXCOORD8.w, 65500.0); out.out_var_SV_Target0 = _571; diff --git a/reference/opt/shaders-msl/asm/frag/array-copy-error.asm.frag b/reference/opt/shaders-msl/asm/frag/array-copy-error.asm.frag index cf91115f..575a2e28 100644 --- a/reference/opt/shaders-msl/asm/frag/array-copy-error.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/array-copy-error.asm.frag @@ -261,6 +261,12 @@ struct main0_in float4 in_var_TEXCOORD0_0 [[user(locn4)]]; }; +static inline __attribute__((always_inline)) +void _353() +{ + discard_fragment(); +} + fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_PrimitiveDither& PrimitiveDither [[buffer(1)]], constant type_PrimitiveFade& PrimitiveFade [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture2d Material_Texture2D_0 [[texture(0)]], texture2d Material_Texture2D_3 [[texture(1)]], sampler Material_Texture2D_0Sampler [[sampler(0)]], sampler Material_Texture2D_3Sampler [[sampler(1)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; @@ -334,13 +340,13 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float _317 = fract(cos(dot(floor(gl_FragCoord.xy), float2(347.834503173828125, 3343.28369140625))) * 1000.0); if ((float((PrimitiveDither.PrimitiveDither_LODFactor < 0.0) ? ((PrimitiveDither.PrimitiveDither_LODFactor + 1.0) > _317) : (PrimitiveDither.PrimitiveDither_LODFactor < _317)) - 0.001000000047497451305389404296875) < 0.0) { - discard_fragment(); + _353(); } } } if ((((_218.z + ((fast::min(fast::max(1.0 - (_218.x * Material.Material_ScalarExpressions[2].y), 0.0), 1.0) + ((_237 + (_242.x * Material.Material_ScalarExpressions[2].z)) * 0.16666667163372039794921875)) + (-0.5))) * ((fast::clamp((View.View_RealTime * PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.x) + PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.y, 0.0, 1.0) + ((_237 + _254.x) * 0.16666667163372039794921875)) + (-0.5))) - 0.33329999446868896484375) < 0.0) { - discard_fragment(); + _353(); } float2 _351 = ((((in.in_var_TEXCOORD6.xy / float2(in.in_var_TEXCOORD6.w)) - View.View_TemporalAAJitter.xy) - ((in.in_var_TEXCOORD7.xy / float2(in.in_var_TEXCOORD7.w)) - View.View_TemporalAAJitter.zw)) * float2(0.2495000064373016357421875)) + float2(0.49999237060546875); out.gl_FragDepth = fast::min(_140.z / (_140.w + (sqrt(dot(_272, _272)) / (fast::max(sqrt(dot(_276, _276)) / sqrt(dot(_279, _279)), sqrt(dot(_284, _284)) / sqrt(dot(_287, _287))) / abs(dot(float3x3(View.View_ViewToTranslatedWorld[0].xyz, View.View_ViewToTranslatedWorld[1].xyz, View.View_ViewToTranslatedWorld[2].xyz) * float3(0.0, 0.0, 1.0), _151))))), gl_FragCoord.z); diff --git a/reference/opt/shaders-msl/asm/frag/global-constant-arrays.asm.frag b/reference/opt/shaders-msl/asm/frag/global-constant-arrays.asm.frag index 404a52cd..b5378b33 100644 --- a/reference/opt/shaders-msl/asm/frag/global-constant-arrays.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/global-constant-arrays.asm.frag @@ -190,13 +190,12 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa _686.y = 1.0; float3 _690 = _686; _690.z = ((1.0 - _681) - _682) / _683; - float _691 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10); float3 _693 = _391; - _693.x = 0.3127000033855438232421875 / _691; + _693.x = 0.950455963611602783203125; float3 _694 = _693; _694.y = 1.0; float3 _696 = _694; - _696.z = 0.3582999706268310546875 / _691; + _696.z = 1.0890576839447021484375; float3 _697 = _690 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); float3 _698 = _696 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); float3 _717 = (_599 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_698.x / _697.x, 0.0, 0.0), float3(0.0, _698.y / _697.y, 0.0), float3(0.0, 0.0, _698.z / _697.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _547; diff --git a/reference/opt/shaders-msl/asm/frag/padded-float-array-member-defef.asm.frag b/reference/opt/shaders-msl/asm/frag/padded-float-array-member-defef.asm.frag index db493e4e..cdbc7a10 100644 --- a/reference/opt/shaders-msl/asm/frag/padded-float-array-member-defef.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/padded-float-array-member-defef.asm.frag @@ -102,7 +102,7 @@ constant spvUnsafeArray _506 = spvUnsafeArray({ -2.3010299 constant spvUnsafeArray _507 = spvUnsafeArray({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 }); constant float3 _523 = {}; -constant float3 _3121 = {}; +constant float3 _3123 = {}; struct main0_out { @@ -192,13 +192,12 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa _712.y = 1.0; float3 _716 = _712; _716.z = ((1.0 - _707) - _708) / _709; - float _717 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10); float3 _719 = _523; - _719.x = 0.3127000033855438232421875 / _717; + _719.x = 0.950455963611602783203125; float3 _720 = _719; _720.y = 1.0; float3 _722 = _720; - _722.z = 0.3582999706268310546875 / _717; + _722.z = 1.0890576839447021484375; float3 _723 = _716 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); float3 _724 = _722 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)); float3 _743 = (_625 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_724.x / _723.x, 0.0, 0.0), float3(0.0, _724.y / _723.y, 0.0), float3(0.0, 0.0, _724.z / _723.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _573; @@ -328,7 +327,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa float3 _1215; if (_Globals.ColorShadow_Tint2.w == 0.0) { - float3 _1157 = _3121; + float3 _1157 = _3123; _1157.x = dot(_932, _Globals.ColorMatrixR_ColorCurveCd1.xyz); float3 _1162 = _1157; _1162.y = dot(_932, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz); diff --git a/reference/opt/shaders-msl/asm/frag/phi-variable-declaration.asm.frag b/reference/opt/shaders-msl/asm/frag/phi-variable-declaration.asm.frag index cf91115f..575a2e28 100644 --- a/reference/opt/shaders-msl/asm/frag/phi-variable-declaration.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/phi-variable-declaration.asm.frag @@ -261,6 +261,12 @@ struct main0_in float4 in_var_TEXCOORD0_0 [[user(locn4)]]; }; +static inline __attribute__((always_inline)) +void _353() +{ + discard_fragment(); +} + fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_PrimitiveDither& PrimitiveDither [[buffer(1)]], constant type_PrimitiveFade& PrimitiveFade [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture2d Material_Texture2D_0 [[texture(0)]], texture2d Material_Texture2D_3 [[texture(1)]], sampler Material_Texture2D_0Sampler [[sampler(0)]], sampler Material_Texture2D_3Sampler [[sampler(1)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; @@ -334,13 +340,13 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float _317 = fract(cos(dot(floor(gl_FragCoord.xy), float2(347.834503173828125, 3343.28369140625))) * 1000.0); if ((float((PrimitiveDither.PrimitiveDither_LODFactor < 0.0) ? ((PrimitiveDither.PrimitiveDither_LODFactor + 1.0) > _317) : (PrimitiveDither.PrimitiveDither_LODFactor < _317)) - 0.001000000047497451305389404296875) < 0.0) { - discard_fragment(); + _353(); } } } if ((((_218.z + ((fast::min(fast::max(1.0 - (_218.x * Material.Material_ScalarExpressions[2].y), 0.0), 1.0) + ((_237 + (_242.x * Material.Material_ScalarExpressions[2].z)) * 0.16666667163372039794921875)) + (-0.5))) * ((fast::clamp((View.View_RealTime * PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.x) + PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.y, 0.0, 1.0) + ((_237 + _254.x) * 0.16666667163372039794921875)) + (-0.5))) - 0.33329999446868896484375) < 0.0) { - discard_fragment(); + _353(); } float2 _351 = ((((in.in_var_TEXCOORD6.xy / float2(in.in_var_TEXCOORD6.w)) - View.View_TemporalAAJitter.xy) - ((in.in_var_TEXCOORD7.xy / float2(in.in_var_TEXCOORD7.w)) - View.View_TemporalAAJitter.zw)) * float2(0.2495000064373016357421875)) + float2(0.49999237060546875); out.gl_FragDepth = fast::min(_140.z / (_140.w + (sqrt(dot(_272, _272)) / (fast::max(sqrt(dot(_276, _276)) / sqrt(dot(_279, _279)), sqrt(dot(_284, _284)) / sqrt(dot(_287, _287))) / abs(dot(float3x3(View.View_ViewToTranslatedWorld[0].xyz, View.View_ViewToTranslatedWorld[1].xyz, View.View_ViewToTranslatedWorld[2].xyz) * float3(0.0, 0.0, 1.0), _151))))), gl_FragCoord.z); diff --git a/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag index 036a330e..bedab01a 100644 --- a/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag @@ -71,7 +71,7 @@ struct main0_in // Returns buffer coords clamped to storage buffer size #define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / sizeof(type)) - 1) -fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; uint2 _77 = uint2(gl_FragCoord.xy); diff --git a/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp index 59fc03a7..e57b2ea1 100644 --- a/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp +++ b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0() { } diff --git a/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp b/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp index f7757cd1..18cfd68c 100644 --- a/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp +++ b/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp @@ -23,6 +23,8 @@ struct SSBO2 float4 v; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { const device SSBO0* ssbo0 [[id(0)]]; diff --git a/reference/opt/shaders-msl/comp/array-length.comp b/reference/opt/shaders-msl/comp/array-length.comp index 4a408f36..f58cebca 100644 --- a/reference/opt/shaders-msl/comp/array-length.comp +++ b/reference/opt/shaders-msl/comp/array-length.comp @@ -55,7 +55,9 @@ struct SSBO1 spvUnsafeArray bz; }; -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(3)]], device SSBO& _14 [[buffer(0)]], device SSBO1* ssbos_0 [[buffer(1)]], device SSBO1* ssbos_1 [[buffer(2)]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device SSBO& _14 [[buffer(0)]], device SSBO1* ssbos_0 [[buffer(1)]], device SSBO1* ssbos_1 [[buffer(2)]]) { spvUnsafeArray ssbos = { @@ -63,8 +65,8 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(3)]], device SS ssbos_1, }; - constant uint& _14BufferSize = spvBufferSizeConstants[4]; - constant uint* ssbosBufferSize = &spvBufferSizeConstants[5]; + constant uint& _14BufferSize = spvBufferSizeConstants[0]; + constant uint* ssbosBufferSize = &spvBufferSizeConstants[1]; _14.size = uint(int((_14BufferSize - 16) / 16) + int((ssbosBufferSize[1] - 0) / 4)); } diff --git a/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp b/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp index 11782d11..f57b8683 100644 --- a/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp +++ b/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp @@ -66,6 +66,8 @@ struct SSBO3 spvUnsafeArray bz; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { device SSBO* m_16 [[id(0)]]; @@ -78,7 +80,7 @@ struct spvDescriptorSetBuffer1 constant uint* spvBufferSizeConstants [[id(2)]]; }; -kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvBufferSizeConstants [[buffer(5)]], device SSBO2& _38 [[buffer(2)]], device SSBO3* ssbos2_0 [[buffer(3)]], device SSBO3* ssbos2_1 [[buffer(4)]]) +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvBufferSizeConstants [[buffer(25)]], device SSBO2& _38 [[buffer(2)]], device SSBO3* ssbos2_0 [[buffer(3)]], device SSBO3* ssbos2_1 [[buffer(4)]]) { spvUnsafeArray ssbos2 = { @@ -88,8 +90,8 @@ kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0 constant uint& spvDescriptorSet0_m_16BufferSize = spvDescriptorSet0.spvBufferSizeConstants[0]; constant uint* spvDescriptorSet1_ssbosBufferSize = &spvDescriptorSet1.spvBufferSizeConstants[0]; - constant uint& _38BufferSize = spvBufferSizeConstants[6]; - constant uint* ssbos2BufferSize = &spvBufferSizeConstants[7]; + constant uint& _38BufferSize = spvBufferSizeConstants[2]; + constant uint* ssbos2BufferSize = &spvBufferSizeConstants[3]; (*spvDescriptorSet0.m_16).size = ((uint(int((spvDescriptorSet0_m_16BufferSize - 16) / 16)) + uint(int((spvDescriptorSet1_ssbosBufferSize[1] - 0) / 4))) + uint(int((_38BufferSize - 16) / 16))) + uint(int((ssbos2BufferSize[0] - 0) / 4)); } diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp index 04721502..fca72bfc 100644 --- a/reference/opt/shaders-msl/comp/atomic.comp +++ b/reference/opt/shaders-msl/comp/atomic.comp @@ -12,6 +12,8 @@ struct SSBO int i32; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& ssbo [[buffer(0)]]) { threadgroup uint shared_u32; diff --git a/reference/opt/shaders-msl/comp/basic.comp b/reference/opt/shaders-msl/comp/basic.comp index 6f1c7971..35cce333 100644 --- a/reference/opt/shaders-msl/comp/basic.comp +++ b/reference/opt/shaders-msl/comp/basic.comp @@ -61,6 +61,8 @@ struct SSBO3 uint counter; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 _29 = _23.in_data[gl_GlobalInvocationID.x]; diff --git a/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp b/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp index e094e018..f30b382f 100644 --- a/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp +++ b/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp @@ -61,9 +61,11 @@ struct SSBO3 uint counter; }; -kernel void main0(constant uint3& spvDispatchBase [[buffer(3)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant uint3& spvDispatchBase [[buffer(29)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - gl_GlobalInvocationID += spvDispatchBase * uint3(1, 1, 1); + gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize; float4 _33 = _27.in_data[gl_GlobalInvocationID.x]; if (dot(_33, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) { diff --git a/reference/opt/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp b/reference/opt/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp index 5fd1a261..20d10220 100644 --- a/reference/opt/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp +++ b/reference/opt/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp @@ -75,7 +75,7 @@ struct spvDescriptorSetBuffer1 spvUnsafeArray, 2>*, 3>, 3>, 2> baz [[id(0)]]; }; -kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]); device spvUnsafeArray, 3>, 2>* baz = diff --git a/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp index 9df6529e..903cb32d 100644 --- a/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp +++ b/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp @@ -54,6 +54,8 @@ struct SSBO1 spvUnsafeArray outputs; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _39.outputs[gl_GlobalInvocationID.x].x = int(as_type(as_type(_25.inputs[gl_GlobalInvocationID.x].xy) + half2(half(1.0)))); diff --git a/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp index 7c09aa61..7c91542e 100644 --- a/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp +++ b/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp @@ -59,6 +59,8 @@ struct UBO half4 const0; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { short2 _47 = as_type(_29.inputs[gl_GlobalInvocationID.x].x) + as_type(_40.const0.xy); diff --git a/reference/opt/shaders-msl/comp/coherent-block.comp b/reference/opt/shaders-msl/comp/coherent-block.comp index 580b9e3e..58bbacb7 100644 --- a/reference/opt/shaders-msl/comp/coherent-block.comp +++ b/reference/opt/shaders-msl/comp/coherent-block.comp @@ -8,6 +8,8 @@ struct SSBO float4 value; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(volatile device SSBO& _10 [[buffer(0)]]) { _10.value = float4(20.0); diff --git a/reference/opt/shaders-msl/comp/coherent-image.comp b/reference/opt/shaders-msl/comp/coherent-image.comp index c6af46b5..50904844 100644 --- a/reference/opt/shaders-msl/comp/coherent-image.comp +++ b/reference/opt/shaders-msl/comp/coherent-image.comp @@ -8,6 +8,8 @@ struct SSBO int4 value; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) { _10.value = uImage.read(uint2(int2(10))); diff --git a/reference/opt/shaders-msl/comp/composite-construct.comp b/reference/opt/shaders-msl/comp/composite-construct.comp index d3541af9..c8da329c 100644 --- a/reference/opt/shaders-msl/comp/composite-construct.comp +++ b/reference/opt/shaders-msl/comp/composite-construct.comp @@ -54,6 +54,8 @@ struct SSBO1 spvUnsafeArray bs; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { spvUnsafeArray _37 = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }; diff --git a/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp b/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp index ea9693ce..cb396cff 100644 --- a/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp +++ b/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp @@ -10,6 +10,8 @@ struct BUF float c; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device BUF& o [[buffer(0)]]) { o.a = 4; diff --git a/reference/opt/shaders-msl/comp/defer-parens.comp b/reference/opt/shaders-msl/comp/defer-parens.comp index 69a8aab9..8c130e3a 100644 --- a/reference/opt/shaders-msl/comp/defer-parens.comp +++ b/reference/opt/shaders-msl/comp/defer-parens.comp @@ -9,6 +9,8 @@ struct SSBO int index; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _13 [[buffer(0)]]) { float4 _17 = _13.data; diff --git a/reference/opt/shaders-msl/comp/dowhile.comp b/reference/opt/shaders-msl/comp/dowhile.comp index 33b4ec92..d0bc32aa 100644 --- a/reference/opt/shaders-msl/comp/dowhile.comp +++ b/reference/opt/shaders-msl/comp/dowhile.comp @@ -55,21 +55,23 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float4 _57; - int _58; - _58 = 0; - _57 = _28.in_data[gl_GlobalInvocationID.x]; + float4 _59; + int _60; + _60 = 0; + _59 = _28.in_data[gl_GlobalInvocationID.x]; float4 _42; for (;;) { - _42 = _28.mvp * _57; - int _44 = _58 + 1; + _42 = _28.mvp * _59; + int _44 = _60 + 1; if (_44 < 16) { - _58 = _44; - _57 = _42; + _60 = _44; + _59 = _42; } else { diff --git a/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp index 8c40d5a3..fbf4c4f7 100644 --- a/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp +++ b/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp @@ -8,13 +8,11 @@ using namespace metal; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -67,9 +65,9 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -kernel void main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) +kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) { - constant uint& fooSwzl = spvSwizzleConstants[2]; - bar.write(foo.sample(fooSmplr, float2(1.0), level(0.0)), uint2(int2(0))); + constant uint& fooSwzl = spvSwizzleConstants[0]; + bar.write(spvTextureSwizzle(foo.sample(fooSmplr, float2(1.0), level(0.0)), fooSwzl), uint2(int2(0))); } diff --git a/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp index 1eeaf87c..41c4dfc1 100644 --- a/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp +++ b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texturecube_array uImageIn [[texture(0)]], texturecube_array uImageOut [[texture(1)]]) { uImageOut.write(uImageIn.read(uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u), uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u); diff --git a/reference/opt/shaders-msl/comp/image.comp b/reference/opt/shaders-msl/comp/image.comp index 447732dd..c875e78d 100644 --- a/reference/opt/shaders-msl/comp/image.comp +++ b/reference/opt/shaders-msl/comp/image.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texture2d uImageIn [[texture(0)]], texture2d uImageOut [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { int2 _23 = int2(gl_GlobalInvocationID.xy); diff --git a/reference/opt/shaders-msl/comp/insert.comp b/reference/opt/shaders-msl/comp/insert.comp index 3c79cfd7..ac4d3d6a 100644 --- a/reference/opt/shaders-msl/comp/insert.comp +++ b/reference/opt/shaders-msl/comp/insert.comp @@ -49,19 +49,21 @@ struct SSBO spvUnsafeArray out_data; }; -constant float4 _52 = {}; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +constant float4 _53 = {}; kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float4 _45 = _52; - _45.x = 10.0; - float4 _47 = _45; - _47.y = 30.0; - float4 _49 = _47; - _49.z = 70.0; - float4 _51 = _49; - _51.w = 90.0; - _27.out_data[gl_GlobalInvocationID.x] = _51; + float4 _46 = _53; + _46.x = 10.0; + float4 _48 = _46; + _48.y = 30.0; + float4 _50 = _48; + _50.z = 70.0; + float4 _52 = _50; + _52.w = 90.0; + _27.out_data[gl_GlobalInvocationID.x] = _52; _27.out_data[gl_GlobalInvocationID.x].y = 20.0; } diff --git a/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp b/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp index 13304bd0..8a9beaf9 100644 --- a/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp +++ b/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp @@ -9,16 +9,18 @@ struct SSBO uint u32; }; -constant long _162 = {}; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +constant long _164 = {}; kernel void main0(device SSBO& _96 [[buffer(0)]]) { - long4 _137; - ulong4 _141; - _137 = abs((_137 + long4(30l, 40l, 50l, 60l)) + long4(_141 + ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul))); - _141 += ulong4(long4(999999999999999999l, 8888888888888888l, 77777777777777777l, 6666666666666666l)); - ulong _109 = ulong(_162); - _96.s32 = int(uint(((ulong(_137.x) + _141.y) + _109) + _109)); - _96.u32 = uint(((ulong(_137.y) + _141.z) + ulong(_162 + 1l)) + _109); + long4 _139; + ulong4 _143; + _139 = abs((_139 + long4(30l, 40l, 50l, 60l)) + long4(_143 + ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul))); + _143 += ulong4(long4(999999999999999999l, 8888888888888888l, 77777777777777777l, 6666666666666666l)); + ulong _109 = ulong(_164); + _96.s32 = int(uint(((ulong(_139.x) + _143.y) + _109) + _109)); + _96.u32 = uint(((ulong(_139.y) + _143.z) + ulong(_164 + 1l)) + _109); } diff --git a/reference/opt/shaders-msl/comp/inverse.comp b/reference/opt/shaders-msl/comp/inverse.comp index 04cfacee..33aed468 100644 --- a/reference/opt/shaders-msl/comp/inverse.comp +++ b/reference/opt/shaders-msl/comp/inverse.comp @@ -19,6 +19,8 @@ struct MatrixIn float4x4 m4in; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + // Returns the determinant of a 2x2 matrix. static inline __attribute__((always_inline)) float spvDet2x2(float a1, float a2, float b1, float b2) diff --git a/reference/opt/shaders-msl/comp/mat3.comp b/reference/opt/shaders-msl/comp/mat3.comp index 42fa871f..aa521dbd 100644 --- a/reference/opt/shaders-msl/comp/mat3.comp +++ b/reference/opt/shaders-msl/comp/mat3.comp @@ -49,6 +49,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _22.out_data[gl_GlobalInvocationID.x] = float3x3(float3(10.0), float3(20.0), float3(40.0)); diff --git a/reference/opt/shaders-msl/comp/mod.comp b/reference/opt/shaders-msl/comp/mod.comp index eb0f4a5b..30c01a09 100644 --- a/reference/opt/shaders-msl/comp/mod.comp +++ b/reference/opt/shaders-msl/comp/mod.comp @@ -54,6 +54,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template inline Tx mod(Tx x, Ty y) diff --git a/reference/opt/shaders-msl/comp/modf.comp b/reference/opt/shaders-msl/comp/modf.comp index a5e8ec1c..fe31b0de 100644 --- a/reference/opt/shaders-msl/comp/modf.comp +++ b/reference/opt/shaders-msl/comp/modf.comp @@ -54,6 +54,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 i; diff --git a/reference/opt/shaders-msl/comp/outer-product.comp b/reference/opt/shaders-msl/comp/outer-product.comp index 8e32db39..e589642d 100644 --- a/reference/opt/shaders-msl/comp/outer-product.comp +++ b/reference/opt/shaders-msl/comp/outer-product.comp @@ -23,6 +23,8 @@ struct ReadSSBO float4 v4; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]]) { _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y); diff --git a/reference/opt/shaders-msl/comp/read-write-only.comp b/reference/opt/shaders-msl/comp/read-write-only.comp index 7547b417..0cf8d8e3 100644 --- a/reference/opt/shaders-msl/comp/read-write-only.comp +++ b/reference/opt/shaders-msl/comp/read-write-only.comp @@ -21,6 +21,8 @@ struct SSBO1 float4 data3; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], const device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]]) { _10.data4 = _15.data0 + _21.data2; diff --git a/reference/opt/shaders-msl/comp/rmw-matrix.comp b/reference/opt/shaders-msl/comp/rmw-matrix.comp index 150db7ed..b53a3a75 100644 --- a/reference/opt/shaders-msl/comp/rmw-matrix.comp +++ b/reference/opt/shaders-msl/comp/rmw-matrix.comp @@ -13,6 +13,8 @@ struct SSBO float4x4 c1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.a *= _11.a1; diff --git a/reference/opt/shaders-msl/comp/rmw-opt.comp b/reference/opt/shaders-msl/comp/rmw-opt.comp index 05e1f6f2..f93967da 100644 --- a/reference/opt/shaders-msl/comp/rmw-opt.comp +++ b/reference/opt/shaders-msl/comp/rmw-opt.comp @@ -8,6 +8,8 @@ struct SSBO int a; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.a += 10; diff --git a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp index 312a6f94..0ae6e556 100644 --- a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -12,6 +12,8 @@ struct SSBO float e; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.c = abs(_9.a - _9.b); diff --git a/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp b/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp index a708edcd..74984fbc 100644 --- a/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp +++ b/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp @@ -81,6 +81,7 @@ struct SSBO constant int e_tmp [[function_constant(3)]]; constant int e = is_function_constant_defined(e_tmp) ? e_tmp : 400; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(device SSBO& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { diff --git a/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp index f495e766..0c8f21d0 100644 --- a/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp +++ b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp @@ -57,32 +57,34 @@ struct SSBO spvUnsafeArray sub; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - spvUnsafeArray _153; - _153[0] = _27.sub[gl_WorkGroupID.x].f[0].x; - _153[1] = _27.sub[gl_WorkGroupID.x].f[1].x; - spvUnsafeArray _154; - _154[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy; - _154[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy; - spvUnsafeArray _155; - _155[0] = _27.sub[gl_WorkGroupID.x].f3[0]; - _155[1] = _27.sub[gl_WorkGroupID.x].f3[1]; - spvUnsafeArray _156; - _156[0] = _27.sub[gl_WorkGroupID.x].f4[0]; - _156[1] = _27.sub[gl_WorkGroupID.x].f4[1]; - _153[gl_GlobalInvocationID.x] += 1.0; - _154[gl_GlobalInvocationID.x] += float2(2.0); - _155[gl_GlobalInvocationID.x] += float3(3.0); - _156[gl_GlobalInvocationID.x] += float4(4.0); - _27.sub[gl_WorkGroupID.x].f[0].x = _153[0]; - _27.sub[gl_WorkGroupID.x].f[1].x = _153[1]; - _27.sub[gl_WorkGroupID.x].f2[0].xy = _154[0]; - _27.sub[gl_WorkGroupID.x].f2[1].xy = _154[1]; - _27.sub[gl_WorkGroupID.x].f3[0] = _155[0]; - _27.sub[gl_WorkGroupID.x].f3[1] = _155[1]; - _27.sub[gl_WorkGroupID.x].f4[0] = _156[0]; - _27.sub[gl_WorkGroupID.x].f4[1] = _156[1]; + spvUnsafeArray _155; + _155[0] = _27.sub[gl_WorkGroupID.x].f[0].x; + _155[1] = _27.sub[gl_WorkGroupID.x].f[1].x; + spvUnsafeArray _156; + _156[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy; + _156[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy; + spvUnsafeArray _157; + _157[0] = _27.sub[gl_WorkGroupID.x].f3[0]; + _157[1] = _27.sub[gl_WorkGroupID.x].f3[1]; + spvUnsafeArray _158; + _158[0] = _27.sub[gl_WorkGroupID.x].f4[0]; + _158[1] = _27.sub[gl_WorkGroupID.x].f4[1]; + _155[gl_GlobalInvocationID.x] += 1.0; + _156[gl_GlobalInvocationID.x] += float2(2.0); + _157[gl_GlobalInvocationID.x] += float3(3.0); + _158[gl_GlobalInvocationID.x] += float4(4.0); + _27.sub[gl_WorkGroupID.x].f[0].x = _155[0]; + _27.sub[gl_WorkGroupID.x].f[1].x = _155[1]; + _27.sub[gl_WorkGroupID.x].f2[0].xy = _156[0]; + _27.sub[gl_WorkGroupID.x].f2[1].xy = _156[1]; + _27.sub[gl_WorkGroupID.x].f3[0] = _157[0]; + _27.sub[gl_WorkGroupID.x].f3[1] = _157[1]; + _27.sub[gl_WorkGroupID.x].f4[0] = _158[0]; + _27.sub[gl_WorkGroupID.x].f4[1] = _158[1]; _27.sub[0].f[0].x += 5.0; _27.sub[0].f2[1].xy += float2(5.0); } diff --git a/reference/opt/shaders-msl/comp/struct-layout.comp b/reference/opt/shaders-msl/comp/struct-layout.comp index 5b2ca82b..53c06911 100644 --- a/reference/opt/shaders-msl/comp/struct-layout.comp +++ b/reference/opt/shaders-msl/comp/struct-layout.comp @@ -59,6 +59,8 @@ struct SSBO spvUnsafeArray in_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _23 [[buffer(0)]], const device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _23.out_data[gl_GlobalInvocationID.x].m = _30.in_data[gl_GlobalInvocationID.x].m * _30.in_data[gl_GlobalInvocationID.x].m; diff --git a/reference/opt/shaders-msl/comp/struct-nested.comp b/reference/opt/shaders-msl/comp/struct-nested.comp index 6e3e0631..dabd4058 100644 --- a/reference/opt/shaders-msl/comp/struct-nested.comp +++ b/reference/opt/shaders-msl/comp/struct-nested.comp @@ -59,6 +59,8 @@ struct dstbuffer spvUnsafeArray test; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device dstbuffer& _19 [[buffer(0)]]) { _19.test[0].b.a = 0; diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp index 286a1c81..bb991dd5 100644 --- a/reference/opt/shaders-msl/comp/struct-packing.comp +++ b/reference/opt/shaders-msl/comp/struct-packing.comp @@ -159,6 +159,8 @@ struct SSBO0 spvUnsafeArray array; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]]) { Content_1 _60 = ssbo_140.content; diff --git a/reference/opt/shaders-msl/comp/torture-loop.comp b/reference/opt/shaders-msl/comp/torture-loop.comp index 463e934b..ffa358c8 100644 --- a/reference/opt/shaders-msl/comp/torture-loop.comp +++ b/reference/opt/shaders-msl/comp/torture-loop.comp @@ -55,29 +55,31 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - float4 _99; - _99 = _24.in_data[gl_GlobalInvocationID.x]; - for (int _93 = 0; (_93 + 1) < 10; ) + float4 _101; + _101 = _24.in_data[gl_GlobalInvocationID.x]; + for (int _95 = 0; (_95 + 1) < 10; ) { - _99 *= 2.0; - _93 += 2; + _101 *= 2.0; + _95 += 2; continue; } - float4 _98; - _98 = _99; - float4 _103; - for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++) + float4 _100; + _100 = _101; + float4 _105; + for (uint _96 = 0u; _96 < 16u; _100 = _105, _96++) { - _103 = _98; - for (uint _100 = 0u; _100 < 30u; ) + _105 = _100; + for (uint _102 = 0u; _102 < 30u; ) { - _103 = _24.mvp * _103; - _100++; + _105 = _24.mvp * _105; + _102++; continue; } } - _89.out_data[gl_GlobalInvocationID.x] = _98; + _89.out_data[gl_GlobalInvocationID.x] = _100; } diff --git a/reference/opt/shaders-msl/comp/type-alias.comp b/reference/opt/shaders-msl/comp/type-alias.comp index aff21aaf..eed6d606 100644 --- a/reference/opt/shaders-msl/comp/type-alias.comp +++ b/reference/opt/shaders-msl/comp/type-alias.comp @@ -69,6 +69,8 @@ struct SSBO2 spvUnsafeArray outputs; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO0& _36 [[buffer(0)]], device SSBO1& _55 [[buffer(1)]], device SSBO2& _66 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _66.outputs[gl_GlobalInvocationID.x] = _36.s0s[gl_GlobalInvocationID.x].a + _55.s1s[gl_GlobalInvocationID.x].a; diff --git a/reference/opt/shaders-msl/comp/udiv.comp b/reference/opt/shaders-msl/comp/udiv.comp index d7f4034f..0b5cc6a6 100644 --- a/reference/opt/shaders-msl/comp/udiv.comp +++ b/reference/opt/shaders-msl/comp/udiv.comp @@ -54,6 +54,8 @@ struct SSBO spvUnsafeArray inputs; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u; diff --git a/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp b/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp index a37fe519..cea12980 100644 --- a/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp +++ b/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp @@ -91,6 +91,8 @@ struct ResType_7 int4 _m1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1)]]) { ResType _25; diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag index 25b4c31b..bf919533 100644 --- a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag +++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag @@ -25,13 +25,11 @@ struct main0_in template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -84,15 +82,15 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(1)]], texture2d uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]]) +fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]], texture2d uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]]) { main0_out out = {}; constant uint* spvDescriptorSet0_uSampler0Swzl = &spvDescriptorSet0.spvSwizzleConstants[0]; - constant uint& uSampler1Swzl = spvSwizzleConstants[1]; - out.FragColor = spvDescriptorSet0.uSampler0[2].sample(spvDescriptorSet0.uSampler0Smplr[2], in.vUV); - out.FragColor += uSampler1.sample(uSampler1Smplr, in.vUV); - out.FragColor += spvDescriptorSet0.uSampler0[1].sample(spvDescriptorSet0.uSampler0Smplr[1], in.vUV); - out.FragColor += uSampler1.sample(uSampler1Smplr, in.vUV); + constant uint& uSampler1Swzl = spvSwizzleConstants[0]; + out.FragColor = spvTextureSwizzle(spvDescriptorSet0.uSampler0[2].sample(spvDescriptorSet0.uSampler0Smplr[2], in.vUV), spvDescriptorSet0_uSampler0Swzl[2]); + out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl); + out.FragColor += spvTextureSwizzle(spvDescriptorSet0.uSampler0[1].sample(spvDescriptorSet0.uSampler0Smplr[1], in.vUV), spvDescriptorSet0_uSampler0Swzl[1]); + out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl); return out; } diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag index a46a51fd..2f9aef13 100644 --- a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag +++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag @@ -18,13 +18,11 @@ struct main0_in template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -77,12 +75,12 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(0)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) { main0_out out = {}; - constant uint* uSamplerSwzl = &spvSwizzleConstants[4]; - out.FragColor = uSampler[2].sample(uSamplerSmplr[2], in.vUV); - out.FragColor += uSampler[1].sample(uSamplerSmplr[1], in.vUV); + constant uint* uSamplerSwzl = &spvSwizzleConstants[0]; + out.FragColor = spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], in.vUV), uSamplerSwzl[2]); + out.FragColor += spvTextureSwizzle(uSampler[1].sample(uSamplerSmplr[1], in.vUV), uSamplerSwzl[1]); return out; } diff --git a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag index 451d1a64..8ab83539 100644 --- a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag +++ b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag @@ -78,9 +78,10 @@ fragment main0_out main0(main0_in in [[stage_in]], device SSBO& ssbo [[buffer(0) main0_out out = {}; short _196 = short(10); int _197 = 20; - char2 _198 = as_type(_196); - char4 _199 = as_type(_197); - _196 = as_type(_198); + char2 _201 = as_type(short(10)); + char2 _198 = _201; + char4 _199 = as_type(20); + _196 = as_type(_201); _197 = as_type(_199); ssbo.i8[0] = _199.x; ssbo.i8[1] = _199.y; @@ -88,9 +89,10 @@ fragment main0_out main0(main0_in in [[stage_in]], device SSBO& ssbo [[buffer(0) ssbo.i8[3] = _199.w; ushort _220 = ushort(10); uint _221 = 20u; - uchar2 _222 = as_type(_220); - uchar4 _223 = as_type(_221); - _220 = as_type(_222); + uchar2 _225 = as_type(ushort(10)); + uchar2 _222 = _225; + uchar4 _223 = as_type(20u); + _220 = as_type(_225); _221 = as_type(_223); ssbo.u8[0] = _223.x; ssbo.u8[1] = _223.y; @@ -98,21 +100,34 @@ fragment main0_out main0(main0_in in [[stage_in]], device SSBO& ssbo [[buffer(0) ssbo.u8[3] = _223.w; char4 _246 = char4(in.vColor); char4 _244 = _246; - _244 += char4(registers.i8); - _244 += char4(-40); - _244 += char4(-50); - _244 += char4(char(10), char(20), char(30), char(40)); - _244 += char4(ssbo.i8[4]); - _244 += char4(ubo.i8); - out.FragColorInt = int4(_244); - uchar4 _271 = uchar4(_246); - _271 += uchar4(registers.u8); - _271 += uchar4(216); - _271 += uchar4(206); - _271 += uchar4(uchar(10), uchar(20), uchar(30), uchar(40)); - _271 += uchar4(ssbo.u8[4]); - _271 += uchar4(ubo.u8); - out.FragColorUint = uint4(_271); + char4 _251 = _246 + char4(registers.i8); + _244 = _251; + char4 _254 = _251 + char4(-40); + _244 = _254; + char4 _256 = _254 + char4(-50); + _244 = _256; + char4 _258 = _256 + char4(char(10), char(20), char(30), char(40)); + _244 = _258; + char4 _263 = _258 + char4(ssbo.i8[4]); + _244 = _263; + char4 _268 = _263 + char4(ubo.i8); + _244 = _268; + out.FragColorInt = int4(_268); + uchar4 _274 = uchar4(_246); + uchar4 _271 = _274; + uchar4 _279 = _274 + uchar4(registers.u8); + _271 = _279; + uchar4 _282 = _279 + uchar4(216); + _271 = _282; + uchar4 _284 = _282 + uchar4(206); + _271 = _284; + uchar4 _286 = _284 + uchar4(uchar(10), uchar(20), uchar(30), uchar(40)); + _271 = _286; + uchar4 _291 = _286 + uchar4(ssbo.u8[4]); + _271 = _291; + uchar4 _296 = _291 + uchar4(ubo.u8); + _271 = _296; + out.FragColorUint = uint4(_296); return out; } diff --git a/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp index 10ced4f3..76b575aa 100644 --- a/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp +++ b/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp @@ -162,6 +162,8 @@ struct SSBO2 packed_rm_float3x2 m2; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]], device SSBO2& ssbo_scalar2 [[buffer(2)]]) { spvUnsafeArray(ssbo_scalar.content.m0s[0].a[0]) = ssbo_140.content.m0s[0].a[0].xy; diff --git a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp index 948806db..71bce630 100644 --- a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp +++ b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp @@ -10,6 +10,8 @@ struct SSBO float FragColor; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + inline uint4 spvSubgroupBallot(bool value) { simd_vote vote = simd_ballot(value); diff --git a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp index 6d32de69..e5c83eb6 100644 --- a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp +++ b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp @@ -8,6 +8,8 @@ struct SSBO float FragColor; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]]) { _9.FragColor = float(gl_NumSubgroups); diff --git a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag index d5f48ccd..67895e3e 100644 --- a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag +++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag @@ -58,7 +58,7 @@ struct main0_in float2 vTex_3 [[user(locn4)]]; }; -fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(0)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) { main0_out out = {}; spvUnsafeArray vTex = {}; diff --git a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag index 429da476..0e0348bf 100644 --- a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag +++ b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag @@ -6,5 +6,6 @@ using namespace metal; fragment void main0() { bool _9 = simd_is_helper_thread(); + bool helper = _9; } diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert index 16473bf0..8164bd23 100644 --- a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert +++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert @@ -60,7 +60,7 @@ struct main0_in float4 Position [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(1)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) { main0_out out = {}; uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1]; diff --git a/reference/opt/shaders/comp/cfg.comp b/reference/opt/shaders/comp/cfg.comp index 0b7e0c16..bc1a4178 100644 --- a/reference/opt/shaders/comp/cfg.comp +++ b/reference/opt/shaders/comp/cfg.comp @@ -6,7 +6,7 @@ layout(binding = 0, std430) buffer SSBO float data; } _11; -float _183; +float _187; void main() { @@ -31,14 +31,14 @@ void main() break; } } - float _180; - _180 = _183; - for (int _179 = 0; _179 < 20; ) + float _184; + _184 = _187; + for (int _183 = 0; _183 < 20; ) { - _180 += 10.0; - _179++; + _184 += 10.0; + _183++; continue; } - _11.data = _180; + _11.data = _184; } diff --git a/reference/opt/shaders/comp/dowhile.comp b/reference/opt/shaders/comp/dowhile.comp index 4370ea30..d9a9f77c 100644 --- a/reference/opt/shaders/comp/dowhile.comp +++ b/reference/opt/shaders/comp/dowhile.comp @@ -14,19 +14,19 @@ layout(binding = 1, std430) writeonly buffer SSBO2 void main() { - vec4 _57; - int _58; - _58 = 0; - _57 = _28.in_data[gl_GlobalInvocationID.x]; + vec4 _59; + int _60; + _60 = 0; + _59 = _28.in_data[gl_GlobalInvocationID.x]; vec4 _42; for (;;) { - _42 = _28.mvp * _57; - int _44 = _58 + 1; + _42 = _28.mvp * _59; + int _44 = _60 + 1; if (_44 < 16) { - _58 = _44; - _57 = _42; + _60 = _44; + _59 = _42; } else { diff --git a/reference/opt/shaders/comp/insert.comp b/reference/opt/shaders/comp/insert.comp index 5ff71944..ad394aab 100644 --- a/reference/opt/shaders/comp/insert.comp +++ b/reference/opt/shaders/comp/insert.comp @@ -6,19 +6,19 @@ layout(binding = 0, std430) writeonly buffer SSBO vec4 out_data[]; } _27; -vec4 _52; +vec4 _53; void main() { - vec4 _45 = _52; - _45.x = 10.0; - vec4 _47 = _45; - _47.y = 30.0; - vec4 _49 = _47; - _49.z = 70.0; - vec4 _51 = _49; - _51.w = 90.0; - _27.out_data[gl_GlobalInvocationID.x] = _51; + vec4 _46 = _53; + _46.x = 10.0; + vec4 _48 = _46; + _48.y = 30.0; + vec4 _50 = _48; + _50.z = 70.0; + vec4 _52 = _50; + _52.w = 90.0; + _27.out_data[gl_GlobalInvocationID.x] = _52; _27.out_data[gl_GlobalInvocationID.x].y = 20.0; } diff --git a/reference/opt/shaders/comp/torture-loop.comp b/reference/opt/shaders/comp/torture-loop.comp index 5943966c..9ca2b959 100644 --- a/reference/opt/shaders/comp/torture-loop.comp +++ b/reference/opt/shaders/comp/torture-loop.comp @@ -14,27 +14,27 @@ layout(binding = 1, std430) writeonly buffer SSBO2 void main() { - vec4 _99; - _99 = _24.in_data[gl_GlobalInvocationID.x]; - for (int _93 = 0; (_93 + 1) < 10; ) + vec4 _101; + _101 = _24.in_data[gl_GlobalInvocationID.x]; + for (int _95 = 0; (_95 + 1) < 10; ) { - _99 *= 2.0; - _93 += 2; + _101 *= 2.0; + _95 += 2; continue; } - vec4 _98; - _98 = _99; - vec4 _103; - for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++) + vec4 _100; + _100 = _101; + vec4 _105; + for (uint _96 = 0u; _96 < 16u; _100 = _105, _96++) { - _103 = _98; - for (uint _100 = 0u; _100 < 30u; ) + _105 = _100; + for (uint _102 = 0u; _102 < 30u; ) { - _103 = _24.mvp * _103; - _100++; + _105 = _24.mvp * _105; + _102++; continue; } } - _89.out_data[gl_GlobalInvocationID.x] = _98; + _89.out_data[gl_GlobalInvocationID.x] = _100; } diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk index dfcaac83..31283196 100644 --- a/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk +++ b/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk @@ -19,27 +19,17 @@ layout(set = 0, binding = 0, std430) restrict buffer LinkedList void main() { - Node _45; + Node _112; if (gl_WorkGroupID.x < 4u) { - _45 = _50.head1; + _112 = _50.head1; } else { - _45 = _50.head2; + _112 = _50.head2; } - restrict Node n = _45; - Node param = n.next; - Node param_1 = _50.head1; - Node param_2 = _50.head2; - param.value = param_1.value + param_2.value; - Node param_4 = _50.head1; - Node param_3 = param_4; - n = param_3; - int v = _50.head2.value; - n.value = 20; - n.value = v * 10; - uint64_t uptr = uint64_t(_50.head2.next); - Node unode = Node(uptr); + _112.next.value = _50.head1.value + _50.head2.value; + _50.head1.value = 20; + _50.head1.value = _50.head2.value * 10; } diff --git a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk index d09930f3..69558dee 100644 --- a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk +++ b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk @@ -30,9 +30,10 @@ void main() { int16_t _196 = 10s; int _197 = 20; - i8vec2 _198 = unpack8(_196); - i8vec4 _199 = unpack8(_197); - _196 = pack16(_198); + i8vec2 _201 = unpack8(10s); + i8vec2 _198 = _201; + i8vec4 _199 = unpack8(20); + _196 = pack16(_201); _197 = pack32(_199); ssbo.i8[0] = _199.x; ssbo.i8[1] = _199.y; @@ -40,9 +41,10 @@ void main() ssbo.i8[3] = _199.w; uint16_t _220 = 10us; uint _221 = 20u; - u8vec2 _222 = unpack8(_220); - u8vec4 _223 = unpack8(_221); - _220 = pack16(_222); + u8vec2 _225 = unpack8(10us); + u8vec2 _222 = _225; + u8vec4 _223 = unpack8(20u); + _220 = pack16(_225); _221 = pack32(_223); ssbo.u8[0] = _223.x; ssbo.u8[1] = _223.y; @@ -50,20 +52,33 @@ void main() ssbo.u8[3] = _223.w; i8vec4 _246 = i8vec4(vColor); i8vec4 _244 = _246; - _244 += i8vec4(registers.i8); - _244 += i8vec4(-40); - _244 += i8vec4(-50); - _244 += i8vec4(int8_t(10), int8_t(20), int8_t(30), int8_t(40)); - _244 += i8vec4(ssbo.i8[4]); - _244 += i8vec4(ubo.i8); - FragColorInt = ivec4(_244); - u8vec4 _271 = u8vec4(_246); - _271 += u8vec4(registers.u8); - _271 += u8vec4(216); - _271 += u8vec4(206); - _271 += u8vec4(uint8_t(10), uint8_t(20), uint8_t(30), uint8_t(40)); - _271 += u8vec4(ssbo.u8[4]); - _271 += u8vec4(ubo.u8); - FragColorUint = uvec4(_271); + i8vec4 _251 = _246 + i8vec4(registers.i8); + _244 = _251; + i8vec4 _254 = _251 + i8vec4(-40); + _244 = _254; + i8vec4 _256 = _254 + i8vec4(-50); + _244 = _256; + i8vec4 _258 = _256 + i8vec4(int8_t(10), int8_t(20), int8_t(30), int8_t(40)); + _244 = _258; + i8vec4 _263 = _258 + i8vec4(ssbo.i8[4]); + _244 = _263; + i8vec4 _268 = _263 + i8vec4(ubo.i8); + _244 = _268; + FragColorInt = ivec4(_268); + u8vec4 _274 = u8vec4(_246); + u8vec4 _271 = _274; + u8vec4 _279 = _274 + u8vec4(registers.u8); + _271 = _279; + u8vec4 _282 = _279 + u8vec4(216); + _271 = _282; + u8vec4 _284 = _282 + u8vec4(206); + _271 = _284; + u8vec4 _286 = _284 + u8vec4(uint8_t(10), uint8_t(20), uint8_t(30), uint8_t(40)); + _271 = _286; + u8vec4 _291 = _286 + u8vec4(ssbo.u8[4]); + _271 = _291; + u8vec4 _296 = _291 + u8vec4(ubo.u8); + _271 = _296; + FragColorUint = uvec4(_296); } diff --git a/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp b/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp index d697ed4e..7558afaa 100644 --- a/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp +++ b/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp @@ -4,6 +4,8 @@ struct ResType int _m1; }; +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _19 : register(u0); uint SPIRV_Cross_packHalf2x16(float2 value) diff --git a/reference/shaders-hlsl/comp/access-chains.comp b/reference/shaders-hlsl/comp/access-chains.comp index 924e9191..c748200b 100644 --- a/reference/shaders-hlsl/comp/access-chains.comp +++ b/reference/shaders-hlsl/comp/access-chains.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer wo : register(u1); ByteAddressBuffer ro : register(t0); diff --git a/reference/shaders-hlsl/comp/address-buffers.comp b/reference/shaders-hlsl/comp/address-buffers.comp index a252fc8a..7f1c7975 100644 --- a/reference/shaders-hlsl/comp/address-buffers.comp +++ b/reference/shaders-hlsl/comp/address-buffers.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer WriteOnly : register(u2); ByteAddressBuffer ReadOnly : register(t0); RWByteAddressBuffer ReadWrite : register(u1); diff --git a/reference/shaders-hlsl/comp/atomic.comp b/reference/shaders-hlsl/comp/atomic.comp index 72e15bf7..e6ff891e 100644 --- a/reference/shaders-hlsl/comp/atomic.comp +++ b/reference/shaders-hlsl/comp/atomic.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer ssbo : register(u2); RWTexture2D uImage : register(u0); RWTexture2D iImage : register(u1); diff --git a/reference/shaders-hlsl/comp/globallycoherent.comp b/reference/shaders-hlsl/comp/globallycoherent.comp index 69886256..236f341e 100644 --- a/reference/shaders-hlsl/comp/globallycoherent.comp +++ b/reference/shaders-hlsl/comp/globallycoherent.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + globallycoherent RWByteAddressBuffer _29 : register(u3); ByteAddressBuffer _33 : register(t2); RWTexture2D uImageIn : register(u0); diff --git a/reference/shaders-hlsl/comp/image.comp b/reference/shaders-hlsl/comp/image.comp index c8504e63..89a99409 100644 --- a/reference/shaders-hlsl/comp/image.comp +++ b/reference/shaders-hlsl/comp/image.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWTexture2D uImageInF : register(u0); RWTexture2D uImageOutF : register(u1); RWTexture2D uImageInI : register(u2); diff --git a/reference/shaders-hlsl/comp/inverse.comp b/reference/shaders-hlsl/comp/inverse.comp index 3be954a6..f9ec89aa 100644 --- a/reference/shaders-hlsl/comp/inverse.comp +++ b/reference/shaders-hlsl/comp/inverse.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _15 : register(u0); ByteAddressBuffer _20 : register(t1); diff --git a/reference/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/shaders-hlsl/comp/num-workgroups-alone.comp index dee39e3d..dc87dc84 100644 --- a/reference/shaders-hlsl/comp/num-workgroups-alone.comp +++ b/reference/shaders-hlsl/comp/num-workgroups-alone.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); cbuffer SPIRV_Cross_NumWorkgroups : register(b0) { diff --git a/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp index 1c98e5e5..2e2ad55f 100644 --- a/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp +++ b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _10 : register(u0); cbuffer SPIRV_Cross_NumWorkgroups : register(b0) { diff --git a/reference/shaders-hlsl/comp/outer-product.comp b/reference/shaders-hlsl/comp/outer-product.comp index 71613d4f..e58c02fe 100644 --- a/reference/shaders-hlsl/comp/outer-product.comp +++ b/reference/shaders-hlsl/comp/outer-product.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _21 : register(u0); ByteAddressBuffer _26 : register(t1); diff --git a/reference/shaders-hlsl/comp/rmw-matrix.comp b/reference/shaders-hlsl/comp/rmw-matrix.comp index ed666693..30ac03f8 100644 --- a/reference/shaders-hlsl/comp/rmw-matrix.comp +++ b/reference/shaders-hlsl/comp/rmw-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u0); void comp_main() diff --git a/reference/shaders-hlsl/comp/rwbuffer-matrix.comp b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp index e7982928..197c9a95 100644 --- a/reference/shaders-hlsl/comp/rwbuffer-matrix.comp +++ b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _28 : register(u0); cbuffer UBO : register(b1) { diff --git a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp index 47f2fe41..db2bbe96 100644 --- a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _9 : register(u0); void comp_main() diff --git a/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp b/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp index c4537db0..4e7c5e61 100644 --- a/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp +++ b/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp @@ -28,6 +28,7 @@ static const int d = (c + 50); #define SPIRV_CROSS_CONSTANT_ID_3 400 #endif static const int e = SPIRV_CROSS_CONSTANT_ID_3; +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); RWByteAddressBuffer _22 : register(u0); diff --git a/reference/shaders-hlsl/comp/ssbo-array-length.comp b/reference/shaders-hlsl/comp/ssbo-array-length.comp index 2e3df626..82657cac 100644 --- a/reference/shaders-hlsl/comp/ssbo-array-length.comp +++ b/reference/shaders-hlsl/comp/ssbo-array-length.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _11 : register(u1); void comp_main() diff --git a/reference/shaders-hlsl/comp/ssbo-array.comp b/reference/shaders-hlsl/comp/ssbo-array.comp index 90927421..dab20325 100644 --- a/reference/shaders-hlsl/comp/ssbo-array.comp +++ b/reference/shaders-hlsl/comp/ssbo-array.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer ssbo0 : register(u0); void comp_main() diff --git a/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp index b87574f1..0957a708 100644 --- a/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp +++ b/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp @@ -1,3 +1,5 @@ +static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); + RWByteAddressBuffer _9 : register(u0, space0); static uint4 gl_SubgroupEqMask; diff --git a/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag index cb685838..9d71a91e 100644 --- a/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag +++ b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag @@ -15,13 +15,11 @@ uint2 spvTexelBufferCoord(uint tc) template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -134,51 +132,51 @@ inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint return t.gather_compare(s, spvForward(params)...); } -fragment void main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], depth2d depth2d [[texture(6)]], depthcube depthCube [[texture(7)]], depth2d_array depth2dArray [[texture(8)]], depthcube_array depthCubeArray [[texture(9)]], texture2d texBuffer [[texture(10)]], sampler tex1dSamp [[sampler(0)]], sampler tex2dSamp [[sampler(1)]], sampler tex3dSamp [[sampler(2)]], sampler texCubeSamp [[sampler(3)]], sampler tex2dArraySamp [[sampler(4)]], sampler texCubeArraySamp [[sampler(5)]], sampler depth2dSamp [[sampler(6)]], sampler depthCubeSamp [[sampler(7)]], sampler depth2dArraySamp [[sampler(8)]], sampler depthCubeArraySamp [[sampler(9)]]) +fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], depth2d depth2d [[texture(6)]], depthcube depthCube [[texture(7)]], depth2d_array depth2dArray [[texture(8)]], depthcube_array depthCubeArray [[texture(9)]], texture2d texBuffer [[texture(10)]], sampler tex1dSamp [[sampler(0)]], sampler tex2dSamp [[sampler(1)]], sampler tex3dSamp [[sampler(2)]], sampler texCubeSamp [[sampler(3)]], sampler tex2dArraySamp [[sampler(4)]], sampler texCubeArraySamp [[sampler(5)]], sampler depth2dSamp [[sampler(6)]], sampler depthCubeSamp [[sampler(7)]], sampler depth2dArraySamp [[sampler(8)]], sampler depthCubeArraySamp [[sampler(9)]]) { - constant uint& tex1dSwzl = spvSwizzleConstants[11]; - constant uint& tex2dSwzl = spvSwizzleConstants[12]; - constant uint& tex3dSwzl = spvSwizzleConstants[13]; - constant uint& texCubeSwzl = spvSwizzleConstants[14]; - constant uint& tex2dArraySwzl = spvSwizzleConstants[15]; - constant uint& texCubeArraySwzl = spvSwizzleConstants[16]; - constant uint& depth2dSwzl = spvSwizzleConstants[17]; - constant uint& depthCubeSwzl = spvSwizzleConstants[18]; - constant uint& depth2dArraySwzl = spvSwizzleConstants[19]; - constant uint& depthCubeArraySwzl = spvSwizzleConstants[20]; - float4 c = tex1d.sample(tex1dSamp, 0.0); - c = tex2d.sample(tex2dSamp, float2(0.0)); - c = tex3d.sample(tex3dSamp, float3(0.0)); - c = texCube.sample(texCubeSamp, float3(0.0)); - c = tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z))); - c = texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w))); - c.x = depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z); - c.x = depthCube.sample_compare(depthCubeSamp, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w); - c.x = depth2dArray.sample_compare(depth2dArraySamp, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w); - c.x = depthCubeArray.sample_compare(depthCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); - c = tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z); - c = tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w); + constant uint& tex1dSwzl = spvSwizzleConstants[0]; + constant uint& tex2dSwzl = spvSwizzleConstants[1]; + constant uint& tex3dSwzl = spvSwizzleConstants[2]; + constant uint& texCubeSwzl = spvSwizzleConstants[3]; + constant uint& tex2dArraySwzl = spvSwizzleConstants[4]; + constant uint& texCubeArraySwzl = spvSwizzleConstants[5]; + constant uint& depth2dSwzl = spvSwizzleConstants[6]; + constant uint& depthCubeSwzl = spvSwizzleConstants[7]; + constant uint& depth2dArraySwzl = spvSwizzleConstants[8]; + constant uint& depthCubeArraySwzl = spvSwizzleConstants[9]; + float4 c = spvTextureSwizzle(tex1d.sample(tex1dSamp, 0.0), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float2(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float3(0.0)), tex3dSwzl); + c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0)), texCubeSwzl); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSamp, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySamp, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl); + c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl); float4 _152 = float4(0.0, 0.0, 1.0, 1.0); _152.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = depth2d.sample_compare(depth2dSamp, _152.xy / _152.z, float4(0.0, 0.0, 1.0, 1.0).z / _152.z); - c = tex1d.sample(tex1dSamp, 0.0); - c = tex2d.sample(tex2dSamp, float2(0.0), level(0.0)); - c = tex3d.sample(tex3dSamp, float3(0.0), level(0.0)); - c = texCube.sample(texCubeSamp, float3(0.0), level(0.0)); - c = tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)); - c = texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)); - c.x = depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)); - c = tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)); - c = tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _152.xy / _152.z, float4(0.0, 0.0, 1.0, 1.0).z / _152.z), depth2dSwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSamp, 0.0), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float2(0.0), level(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float3(0.0), level(0.0)), tex3dSwzl); + c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0), level(0.0)), texCubeSwzl); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl); float4 _202 = float4(0.0, 0.0, 1.0, 1.0); _202.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = depth2d.sample_compare(depth2dSamp, _202.xy / _202.z, float4(0.0, 0.0, 1.0, 1.0).z / _202.z, level(0.0)); - c = tex1d.read(uint(0)); - c = tex2d.read(uint2(int2(0)), 0); - c = tex3d.read(uint3(int3(0)), 0); - c = tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _202.xy / _202.z, float4(0.0, 0.0, 1.0, 1.0).z / _202.z, level(0.0)), depth2dSwzl); + c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl); + c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl); + c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); + c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); c = spvGatherSwizzle(tex2d, tex2dSamp, tex2dSwzl, component::x, float2(0.0), int2(0)); c = spvGatherSwizzle(texCube, texCubeSamp, texCubeSwzl, component::y, float3(0.0)); diff --git a/reference/shaders-msl-no-opt/comp/glsl.std450.comp b/reference/shaders-msl-no-opt/comp/glsl.std450.comp index abf885c3..e357c64c 100644 --- a/reference/shaders-msl-no-opt/comp/glsl.std450.comp +++ b/reference/shaders-msl-no-opt/comp/glsl.std450.comp @@ -24,6 +24,8 @@ struct ResType int _m1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + // Implementation of the GLSL radians() function template inline T radians(T d) diff --git a/reference/shaders-msl-no-opt/comp/loop.comp b/reference/shaders-msl-no-opt/comp/loop.comp index 1dc1870a..ceb5e6fa 100644 --- a/reference/shaders-msl-no-opt/comp/loop.comp +++ b/reference/shaders-msl-no-opt/comp/loop.comp @@ -55,6 +55,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl-no-opt/comp/return.comp b/reference/shaders-msl-no-opt/comp/return.comp index d03b4c12..9ae67e22 100644 --- a/reference/shaders-msl-no-opt/comp/return.comp +++ b/reference/shaders-msl-no-opt/comp/return.comp @@ -49,6 +49,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag index 23cc779c..ff4b8a91 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag @@ -15,13 +15,11 @@ uint2 spvTexelBufferCoord(uint tc) template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -111,36 +109,36 @@ inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, co } } -fragment void main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) +fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) { - constant uint& tex1dSwzl = spvSwizzleConstants[7]; - constant uint& tex2dSwzl = spvSwizzleConstants[8]; - constant uint& tex3dSwzl = spvSwizzleConstants[9]; - constant uint& texCubeSwzl = spvSwizzleConstants[10]; - constant uint& tex2dArraySwzl = spvSwizzleConstants[11]; - constant uint& texCubeArraySwzl = spvSwizzleConstants[12]; - float4 c = float4(tex1d.sample(tex1dSmplr, 0.0)); - c = float4(tex2d.sample(tex2dSmplr, float2(0.0))); - c = float4(tex3d.sample(tex3dSmplr, float3(0.0))); - c = float4(texCube.sample(texCubeSmplr, float3(0.0))); - c = float4(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)))); - c = float4(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)))); - c = float4(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y)); - c = float4(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z)); - c = float4(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w)); - c = float4(tex1d.sample(tex1dSmplr, 0.0)); - c = float4(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0))); - c = float4(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0))); - c = float4(texCube.sample(texCubeSmplr, float3(0.0), level(0.0))); - c = float4(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0))); - c = float4(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0))); - c = float4(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y)); - c = float4(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0))); - c = float4(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0))); - c = float4(tex1d.read(uint(0))); - c = float4(tex2d.read(uint2(int2(0)), 0)); - c = float4(tex3d.read(uint3(int3(0)), 0)); - c = float4(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0)); + constant uint& tex1dSwzl = spvSwizzleConstants[0]; + constant uint& tex2dSwzl = spvSwizzleConstants[1]; + constant uint& tex3dSwzl = spvSwizzleConstants[2]; + constant uint& texCubeSwzl = spvSwizzleConstants[3]; + constant uint& tex2dArraySwzl = spvSwizzleConstants[4]; + constant uint& texCubeArraySwzl = spvSwizzleConstants[5]; + float4 c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), tex3dSwzl)); + c = float4(spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl)); + c = float4(spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl)); + c = float4(spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl)); + c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl)); + c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl)); + c = float4(spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl)); + c = float4(spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl)); + c = float4(spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl)); + c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl)); + c = float4(spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl)); + c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl)); c = float4(texBuffer.read(spvTexelBufferCoord(0))); c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0))); c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0))); diff --git a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag index d5f38810..730728c1 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag @@ -15,13 +15,11 @@ uint2 spvTexelBufferCoord(uint tc) template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -137,39 +135,39 @@ inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint static inline __attribute__((always_inline)) float4 doSwizzle(thread texture1d tex1d, thread const sampler tex1dSmplr, constant uint& tex1dSwzl, thread texture2d tex2d, thread const sampler tex2dSmplr, constant uint& tex2dSwzl, thread texture3d tex3d, thread const sampler tex3dSmplr, constant uint& tex3dSwzl, thread texturecube texCube, thread const sampler texCubeSmplr, constant uint& texCubeSwzl, thread texture2d_array tex2dArray, thread const sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, thread texturecube_array texCubeArray, thread const sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, thread depth2d depth2d, thread const sampler depth2dSmplr, constant uint& depth2dSwzl, thread depthcube depthCube, thread const sampler depthCubeSmplr, constant uint& depthCubeSwzl, thread depth2d_array depth2dArray, thread const sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, thread depthcube_array depthCubeArray, thread const sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, thread texture2d texBuffer) { - float4 c = tex1d.sample(tex1dSmplr, 0.0); - c = tex2d.sample(tex2dSmplr, float2(0.0)); - c = tex3d.sample(tex3dSmplr, float3(0.0)); - c = texCube.sample(texCubeSmplr, float3(0.0)); - c = tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))); - c = texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))); - c.x = depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z); - c.x = depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w); - c.x = depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w); - c.x = depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); - c = tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z); - c = tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w); + float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), tex3dSwzl); + c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl); + c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl); float4 _103 = float4(0.0, 0.0, 1.0, 1.0); _103.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = depth2d.sample_compare(depth2dSmplr, _103.xy / _103.z, float4(0.0, 0.0, 1.0, 1.0).z / _103.z); - c = tex1d.sample(tex1dSmplr, 0.0); - c = tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)); - c = tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)); - c = texCube.sample(texCubeSmplr, float3(0.0), level(0.0)); - c = tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)); - c = texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)); - c.x = depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)); - c = tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)); - c = tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _103.xy / _103.z, float4(0.0, 0.0, 1.0, 1.0).z / _103.z), depth2dSwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl); + c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl); float4 _131 = float4(0.0, 0.0, 1.0, 1.0); _131.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = depth2d.sample_compare(depth2dSmplr, _131.xy / _131.z, float4(0.0, 0.0, 1.0, 1.0).z / _131.z, level(0.0)); - c = tex1d.read(uint(0)); - c = tex2d.read(uint2(int2(0)), 0); - c = tex3d.read(uint3(int3(0)), 0); - c = tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _131.xy / _131.z, float4(0.0, 0.0, 1.0, 1.0).z / _131.z, level(0.0)), depth2dSwzl); + c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl); + c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl); + c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); + c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)); c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)); @@ -182,18 +180,18 @@ float4 doSwizzle(thread texture1d tex1d, thread const sampler tex1dSmplr, return c; } -fragment void main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], depth2d depth2d [[texture(6)]], depthcube depthCube [[texture(7)]], depth2d_array depth2dArray [[texture(8)]], depthcube_array depthCubeArray [[texture(9)]], texture2d texBuffer [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(6)]], sampler depthCubeSmplr [[sampler(7)]], sampler depth2dArraySmplr [[sampler(8)]], sampler depthCubeArraySmplr [[sampler(9)]]) +fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], depth2d depth2d [[texture(6)]], depthcube depthCube [[texture(7)]], depth2d_array depth2dArray [[texture(8)]], depthcube_array depthCubeArray [[texture(9)]], texture2d texBuffer [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(6)]], sampler depthCubeSmplr [[sampler(7)]], sampler depth2dArraySmplr [[sampler(8)]], sampler depthCubeArraySmplr [[sampler(9)]]) { - constant uint& tex1dSwzl = spvSwizzleConstants[11]; - constant uint& tex2dSwzl = spvSwizzleConstants[12]; - constant uint& tex3dSwzl = spvSwizzleConstants[13]; - constant uint& texCubeSwzl = spvSwizzleConstants[14]; - constant uint& tex2dArraySwzl = spvSwizzleConstants[15]; - constant uint& texCubeArraySwzl = spvSwizzleConstants[16]; - constant uint& depth2dSwzl = spvSwizzleConstants[17]; - constant uint& depthCubeSwzl = spvSwizzleConstants[18]; - constant uint& depth2dArraySwzl = spvSwizzleConstants[19]; - constant uint& depthCubeArraySwzl = spvSwizzleConstants[20]; + constant uint& tex1dSwzl = spvSwizzleConstants[0]; + constant uint& tex2dSwzl = spvSwizzleConstants[1]; + constant uint& tex3dSwzl = spvSwizzleConstants[2]; + constant uint& texCubeSwzl = spvSwizzleConstants[3]; + constant uint& tex2dArraySwzl = spvSwizzleConstants[4]; + constant uint& texCubeArraySwzl = spvSwizzleConstants[5]; + constant uint& depth2dSwzl = spvSwizzleConstants[6]; + constant uint& depthCubeSwzl = spvSwizzleConstants[7]; + constant uint& depth2dArraySwzl = spvSwizzleConstants[8]; + constant uint& depthCubeArraySwzl = spvSwizzleConstants[9]; float4 c = doSwizzle(tex1d, tex1dSmplr, tex1dSwzl, tex2d, tex2dSmplr, tex2dSwzl, tex3d, tex3dSmplr, tex3dSwzl, texCube, texCubeSmplr, texCubeSwzl, tex2dArray, tex2dArraySmplr, tex2dArraySwzl, texCubeArray, texCubeArraySmplr, texCubeArraySwzl, depth2d, depth2dSmplr, depth2dSwzl, depthCube, depthCubeSmplr, depthCubeSwzl, depth2dArray, depth2dArraySmplr, depth2dArraySwzl, depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, texBuffer); } diff --git a/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag index 39b586bd..0ec278f9 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag @@ -15,13 +15,11 @@ uint2 spvTexelBufferCoord(uint tc) template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -111,36 +109,36 @@ inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, co } } -fragment void main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) +fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) { - constant uint& tex1dSwzl = spvSwizzleConstants[7]; - constant uint& tex2dSwzl = spvSwizzleConstants[8]; - constant uint& tex3dSwzl = spvSwizzleConstants[9]; - constant uint& texCubeSwzl = spvSwizzleConstants[10]; - constant uint& tex2dArraySwzl = spvSwizzleConstants[11]; - constant uint& texCubeArraySwzl = spvSwizzleConstants[12]; - float4 c = float4(tex1d.sample(tex1dSmplr, 0.0)); - c = float4(tex2d.sample(tex2dSmplr, float2(0.0))); - c = float4(tex3d.sample(tex3dSmplr, float3(0.0))); - c = float4(texCube.sample(texCubeSmplr, float3(0.0))); - c = float4(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)))); - c = float4(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)))); - c = float4(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y)); - c = float4(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z)); - c = float4(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w)); - c = float4(tex1d.sample(tex1dSmplr, 0.0)); - c = float4(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0))); - c = float4(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0))); - c = float4(texCube.sample(texCubeSmplr, float3(0.0), level(0.0))); - c = float4(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0))); - c = float4(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0))); - c = float4(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y)); - c = float4(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0))); - c = float4(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0))); - c = float4(tex1d.read(uint(0))); - c = float4(tex2d.read(uint2(int2(0)), 0)); - c = float4(tex3d.read(uint3(int3(0)), 0)); - c = float4(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0)); + constant uint& tex1dSwzl = spvSwizzleConstants[0]; + constant uint& tex2dSwzl = spvSwizzleConstants[1]; + constant uint& tex3dSwzl = spvSwizzleConstants[2]; + constant uint& texCubeSwzl = spvSwizzleConstants[3]; + constant uint& tex2dArraySwzl = spvSwizzleConstants[4]; + constant uint& texCubeArraySwzl = spvSwizzleConstants[5]; + float4 c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), tex3dSwzl)); + c = float4(spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl)); + c = float4(spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl)); + c = float4(spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl)); + c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl)); + c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl)); + c = float4(spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl)); + c = float4(spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl)); + c = float4(spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl)); + c = float4(spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl)); + c = float4(spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl)); + c = float4(spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl)); + c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl)); + c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl)); c = float4(texBuffer.read(spvTexelBufferCoord(0))); c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0))); c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0))); diff --git a/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag index 3ebe1647..c31d5d7d 100644 --- a/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag +++ b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag @@ -15,13 +15,11 @@ uint2 spvTexelBufferCoord(uint tc) template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -134,51 +132,51 @@ inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint return t.gather_compare(s, spvForward(params)...); } -fragment void main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], depth2d depth2d [[texture(6)]], depthcube depthCube [[texture(7)]], depth2d_array depth2dArray [[texture(8)]], depthcube_array depthCubeArray [[texture(9)]], texture2d texBuffer [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(6)]], sampler depthCubeSmplr [[sampler(7)]], sampler depth2dArraySmplr [[sampler(8)]], sampler depthCubeArraySmplr [[sampler(9)]]) +fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], depth2d depth2d [[texture(6)]], depthcube depthCube [[texture(7)]], depth2d_array depth2dArray [[texture(8)]], depthcube_array depthCubeArray [[texture(9)]], texture2d texBuffer [[texture(10)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(6)]], sampler depthCubeSmplr [[sampler(7)]], sampler depth2dArraySmplr [[sampler(8)]], sampler depthCubeArraySmplr [[sampler(9)]]) { - constant uint& tex1dSwzl = spvSwizzleConstants[11]; - constant uint& tex2dSwzl = spvSwizzleConstants[12]; - constant uint& tex3dSwzl = spvSwizzleConstants[13]; - constant uint& texCubeSwzl = spvSwizzleConstants[14]; - constant uint& tex2dArraySwzl = spvSwizzleConstants[15]; - constant uint& texCubeArraySwzl = spvSwizzleConstants[16]; - constant uint& depth2dSwzl = spvSwizzleConstants[17]; - constant uint& depthCubeSwzl = spvSwizzleConstants[18]; - constant uint& depth2dArraySwzl = spvSwizzleConstants[19]; - constant uint& depthCubeArraySwzl = spvSwizzleConstants[20]; - float4 c = tex1d.sample(tex1dSmplr, 0.0); - c = tex2d.sample(tex2dSmplr, float2(0.0)); - c = tex3d.sample(tex3dSmplr, float3(0.0)); - c = texCube.sample(texCubeSmplr, float3(0.0)); - c = tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))); - c = texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))); - c.x = depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z); - c.x = depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w); - c.x = depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w); - c.x = depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); - c = tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z); - c = tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w); + constant uint& tex1dSwzl = spvSwizzleConstants[0]; + constant uint& tex2dSwzl = spvSwizzleConstants[1]; + constant uint& tex3dSwzl = spvSwizzleConstants[2]; + constant uint& texCubeSwzl = spvSwizzleConstants[3]; + constant uint& tex2dArraySwzl = spvSwizzleConstants[4]; + constant uint& texCubeArraySwzl = spvSwizzleConstants[5]; + constant uint& depth2dSwzl = spvSwizzleConstants[6]; + constant uint& depthCubeSwzl = spvSwizzleConstants[7]; + constant uint& depth2dArraySwzl = spvSwizzleConstants[8]; + constant uint& depthCubeArraySwzl = spvSwizzleConstants[9]; + float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0)), tex3dSwzl); + c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl); + c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl); + c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl); + c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl); float4 _100 = float4(0.0, 0.0, 1.0, 1.0); _100.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = depth2d.sample_compare(depth2dSmplr, _100.xy / _100.z, float4(0.0, 0.0, 1.0, 1.0).z / _100.z); - c = tex1d.sample(tex1dSmplr, 0.0); - c = tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)); - c = tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)); - c = texCube.sample(texCubeSmplr, float3(0.0), level(0.0)); - c = tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)); - c = texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)); - c.x = depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)); - c = tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)); - c = tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _100.xy / _100.z, float4(0.0, 0.0, 1.0, 1.0).z / _100.z), depth2dSwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl); + c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl); + c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl); + c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl); + c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl); + c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl); + c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl); float4 _128 = float4(0.0, 0.0, 1.0, 1.0); _128.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = depth2d.sample_compare(depth2dSmplr, _128.xy / _128.z, float4(0.0, 0.0, 1.0, 1.0).z / _128.z, level(0.0)); - c = tex1d.read(uint(0)); - c = tex2d.read(uint2(int2(0)), 0); - c = tex3d.read(uint3(int3(0)), 0); - c = tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0); + c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _128.xy / _128.z, float4(0.0, 0.0, 1.0, 1.0).z / _128.z, level(0.0)), depth2dSwzl); + c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl); + c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl); + c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); + c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)); c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)); diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp index ea74df91..d1bdd47f 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp @@ -17,6 +17,8 @@ struct SSBORow float2x2 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp index b86448f4..1e23ce37 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float2x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp index ea74df91..d1bdd47f 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float2x2 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp index e98cf02d..d72d931f 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp @@ -19,6 +19,8 @@ struct SSBORow float3x2 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp index a38f4740..c94069fe 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float3x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp index 6199511d..6263c058 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float3x2 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp index 8d04a2e8..cea5620a 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp @@ -17,6 +17,8 @@ struct SSBORow float4x2 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp index eeafbc5a..22270483 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float4x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp index 8d04a2e8..cea5620a 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float4x2 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp index a9b0060d..68537b67 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp @@ -19,6 +19,8 @@ struct SSBORow packed_rm_float3x2 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp index 620d2668..5a74c072 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float2x3 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp index 87b2c696..77a419c4 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float2x3 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp index 59353dd2..9efa3895 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp @@ -20,6 +20,8 @@ struct SSBORow packed_rm_float3x3 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp index cdfeacd9..2551c023 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float3x3 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp index cdfeacd9..2551c023 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float3x3 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp index a271b041..82075579 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp @@ -19,6 +19,8 @@ struct SSBORow packed_rm_float3x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp index bf688b51..e58cab08 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float4x3 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp index bf688b51..e58cab08 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float4x3 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp index 804d734d..f7ca9297 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp @@ -17,6 +17,8 @@ struct SSBORow float2x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp index 4af86071..e5cca638 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float2x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp index 804d734d..f7ca9297 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float2x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp index 78714fe3..d3bb8f23 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp @@ -19,6 +19,8 @@ struct SSBORow float3x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp index 16b0aa0f..34420a46 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float3x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp index 16b0aa0f..34420a46 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float3x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp index 918598ab..c36369a6 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp @@ -17,6 +17,8 @@ struct SSBORow float4x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp index 918598ab..c36369a6 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp @@ -17,6 +17,8 @@ struct SSBORow float4x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp index 918598ab..c36369a6 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp @@ -17,6 +17,8 @@ struct SSBORow float4x4 row_major1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void load_store_to_variable_col_major(device SSBOCol& v_29) { diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp index 8c3d5aca..2384e364 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp @@ -11,6 +11,8 @@ struct SSBO float3 v1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.v0 = _11.v1 * (_11.m1 * _11.m0); diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp index f7b4c598..b3af26bc 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp @@ -13,6 +13,8 @@ struct SSBO packed_float3 v1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.v0 = (float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2]))) * float3(_11.v1); diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp index 991a76bf..40f00886 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp @@ -11,6 +11,8 @@ struct SSBO float2 v1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.v0 = (float2x2(_11.m0[0].xy, _11.m0[1].xy) * float2x2(_11.m1[0].xy, _11.m1[1].xy)) * _11.v1; diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp index 74d020c8..d357d8db 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp @@ -13,6 +13,8 @@ struct SSBO packed_float3 v1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.v0 = float3(_11.v1) * (float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2]))); diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp index d683558b..f061dd66 100644 --- a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp +++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp @@ -11,6 +11,8 @@ struct SSBO float2 v1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.v0 = _11.v1 * (float2x2(_11.m1[0].xy, _11.m1[1].xy) * float2x2(_11.m0[0].xy, _11.m0[1].xy)); diff --git a/reference/shaders-msl-no-opt/packing/member-padding.comp b/reference/shaders-msl-no-opt/packing/member-padding.comp index a0939047..4f653ecd 100644 --- a/reference/shaders-msl-no-opt/packing/member-padding.comp +++ b/reference/shaders-msl-no-opt/packing/member-padding.comp @@ -11,6 +11,8 @@ struct SSBO float b; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.a = 10.0; diff --git a/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp b/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp index 43f1c174..a70f7148 100644 --- a/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp +++ b/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp @@ -57,6 +57,8 @@ struct SSBO spvUnsafeArray v_unsized; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _29 [[buffer(0)]]) { float loaded1 = _29.v1[1].x; diff --git a/reference/shaders-msl-no-opt/packing/struct-alignment.comp b/reference/shaders-msl-no-opt/packing/struct-alignment.comp index 8baa8146..baa4ee66 100644 --- a/reference/shaders-msl-no-opt/packing/struct-alignment.comp +++ b/reference/shaders-msl-no-opt/packing/struct-alignment.comp @@ -17,6 +17,8 @@ struct SSBO Foo foo; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _12 [[buffer(0)]]) { _12.a.x = 10.0; diff --git a/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp b/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp index c7d5cf73..af7e1a58 100644 --- a/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp +++ b/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp @@ -54,6 +54,8 @@ struct SSBOScalar spvUnsafeArray v; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]]) { buffer_scalar.v[1].a[1u] = 1.0; diff --git a/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp b/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp index a7832a5d..e0652b93 100644 --- a/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp +++ b/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp @@ -24,6 +24,8 @@ struct SSBOScalar Baz baz; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]]) { buffer_scalar.baz.a.a.a[3u] = 10.0; diff --git a/reference/shaders-msl-no-opt/packing/struct-packing.comp b/reference/shaders-msl-no-opt/packing/struct-packing.comp index 115e1241..a86809fe 100644 --- a/reference/shaders-msl-no-opt/packing/struct-packing.comp +++ b/reference/shaders-msl-no-opt/packing/struct-packing.comp @@ -19,6 +19,8 @@ struct SSBOScalar Bar bar; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]]) { buffer_scalar.foo.a[0u] = 10.0; diff --git a/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp b/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp index 5eda1e35..56cc30b4 100644 --- a/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp +++ b/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp @@ -83,6 +83,8 @@ struct SSBO spvUnsafeArray f; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _32 [[buffer(0)]]) { _32.f[0].a = float4(2.0); diff --git a/reference/shaders-msl-no-opt/packing/struct-size-padding.comp b/reference/shaders-msl-no-opt/packing/struct-size-padding.comp index 8b0a5bb2..e45ed1e8 100644 --- a/reference/shaders-msl-no-opt/packing/struct-size-padding.comp +++ b/reference/shaders-msl-no-opt/packing/struct-size-padding.comp @@ -83,6 +83,8 @@ struct SSBO spvUnsafeArray f; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _26 [[buffer(0)]]) { _26.f[0].a = float4(2.0); diff --git a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag index 18feaaef..d4f70e0e 100644 --- a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag +++ b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag @@ -20,13 +20,11 @@ uint2 spvTexelBufferCoord(uint tc) template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -142,39 +140,39 @@ inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint static inline __attribute__((always_inline)) float4 do_samples(thread const texture1d t1, thread const sampler t1Smplr, constant uint& t1Swzl, thread const texture2d t2, constant uint& t2Swzl, thread const texture3d t3, thread const sampler t3Smplr, constant uint& t3Swzl, thread const texturecube tc, constant uint& tcSwzl, thread const texture2d_array t2a, thread const sampler t2aSmplr, constant uint& t2aSwzl, thread const texturecube_array tca, thread const sampler tcaSmplr, constant uint& tcaSwzl, thread const texture2d tb, thread const depth2d d2, thread const sampler d2Smplr, constant uint& d2Swzl, thread const depthcube dc, thread const sampler dcSmplr, constant uint& dcSwzl, thread const depth2d_array d2a, constant uint& d2aSwzl, thread const depthcube_array dca, thread const sampler dcaSmplr, constant uint& dcaSwzl, thread sampler defaultSampler, thread sampler shadowSampler) { - float4 c = t1.sample(t1Smplr, 0.0); - c = t2.sample(defaultSampler, float2(0.0)); - c = t3.sample(t3Smplr, float3(0.0)); - c = tc.sample(defaultSampler, float3(0.0)); - c = t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z))); - c = tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w))); - c.x = d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z); - c.x = dc.sample_compare(dcSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w); - c.x = d2a.sample_compare(shadowSampler, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w); - c.x = dca.sample_compare(dcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); - c = t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z); - c = t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w); + float4 c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl); + c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0)), t2Swzl); + c = spvTextureSwizzle(t3.sample(t3Smplr, float3(0.0)), t3Swzl); + c = spvTextureSwizzle(tc.sample(defaultSampler, float3(0.0)), tcSwzl); + c = spvTextureSwizzle(t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z))), t2aSwzl); + c = spvTextureSwizzle(tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), tcaSwzl); + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), d2Swzl); + c.x = spvTextureSwizzle(dc.sample_compare(dcSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), dcSwzl); + c.x = spvTextureSwizzle(d2a.sample_compare(shadowSampler, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), d2aSwzl); + c.x = spvTextureSwizzle(dca.sample_compare(dcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), dcaSwzl); + c = spvTextureSwizzle(t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), t1Swzl); + c = spvTextureSwizzle(t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), t2Swzl); + c = spvTextureSwizzle(t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), t3Swzl); float4 _119 = float4(0.0, 0.0, 1.0, 1.0); _119.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = d2.sample_compare(d2Smplr, _119.xy / _119.z, float4(0.0, 0.0, 1.0, 1.0).z / _119.z); - c = t1.sample(t1Smplr, 0.0); - c = t2.sample(defaultSampler, float2(0.0), level(0.0)); - c = t3.sample(t3Smplr, float3(0.0), level(0.0)); - c = tc.sample(defaultSampler, float3(0.0), level(0.0)); - c = t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)); - c = tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)); - c.x = d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)); - c = t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y); - c = t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)); - c = t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)); + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _119.xy / _119.z, float4(0.0, 0.0, 1.0, 1.0).z / _119.z), d2Swzl); + c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl); + c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0), level(0.0)), t2Swzl); + c = spvTextureSwizzle(t3.sample(t3Smplr, float3(0.0), level(0.0)), t3Swzl); + c = spvTextureSwizzle(tc.sample(defaultSampler, float3(0.0), level(0.0)), tcSwzl); + c = spvTextureSwizzle(t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), t2aSwzl); + c = spvTextureSwizzle(tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), tcaSwzl); + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), d2Swzl); + c = spvTextureSwizzle(t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), t1Swzl); + c = spvTextureSwizzle(t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), t2Swzl); + c = spvTextureSwizzle(t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), t3Swzl); float4 _153 = float4(0.0, 0.0, 1.0, 1.0); _153.z = float4(0.0, 0.0, 1.0, 1.0).w; - c.x = d2.sample_compare(d2Smplr, _153.xy / _153.z, float4(0.0, 0.0, 1.0, 1.0).z / _153.z, level(0.0)); - c = t1.read(uint(0)); - c = t2.read(uint2(int2(0)), 0); - c = t3.read(uint3(int3(0)), 0); - c = t2a.read(uint2(int3(0).xy), uint(int3(0).z), 0); + c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _153.xy / _153.z, float4(0.0, 0.0, 1.0, 1.0).z / _153.z, level(0.0)), d2Swzl); + c = spvTextureSwizzle(t1.read(uint(0)), t1Swzl); + c = spvTextureSwizzle(t2.read(uint2(int2(0)), 0), t2Swzl); + c = spvTextureSwizzle(t3.read(uint3(int3(0)), 0), t3Swzl); + c = spvTextureSwizzle(t2a.read(uint2(int3(0).xy), uint(int3(0).z), 0), t2aSwzl); c = tb.read(spvTexelBufferCoord(0)); c = spvGatherSwizzle(t2, defaultSampler, t2Swzl, component::x, float2(0.0), int2(0)); c = spvGatherSwizzle(tc, defaultSampler, tcSwzl, component::y, float3(0.0)); @@ -187,19 +185,19 @@ float4 do_samples(thread const texture1d t1, thread const sampler t1Smplr return c; } -fragment main0_out main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], depth2d depth2d [[texture(7)]], depthcube depthCube [[texture(8)]], depth2d_array depth2dArray [[texture(9)]], depthcube_array depthCubeArray [[texture(10)]], sampler defaultSampler [[sampler(0)]], sampler shadowSampler [[sampler(1)]], sampler tex1dSmplr [[sampler(2)]], sampler tex3dSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(6)]], sampler depthCubeSmplr [[sampler(7)]], sampler depthCubeArraySmplr [[sampler(8)]]) +fragment main0_out main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], depth2d depth2d [[texture(7)]], depthcube depthCube [[texture(8)]], depth2d_array depth2dArray [[texture(9)]], depthcube_array depthCubeArray [[texture(10)]], sampler defaultSampler [[sampler(0)]], sampler shadowSampler [[sampler(1)]], sampler tex1dSmplr [[sampler(2)]], sampler tex3dSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]], sampler depth2dSmplr [[sampler(6)]], sampler depthCubeSmplr [[sampler(7)]], sampler depthCubeArraySmplr [[sampler(8)]]) { main0_out out = {}; - constant uint& tex1dSwzl = spvSwizzleConstants[11]; - constant uint& tex2dSwzl = spvSwizzleConstants[12]; - constant uint& tex3dSwzl = spvSwizzleConstants[13]; - constant uint& texCubeSwzl = spvSwizzleConstants[14]; - constant uint& tex2dArraySwzl = spvSwizzleConstants[15]; - constant uint& texCubeArraySwzl = spvSwizzleConstants[16]; - constant uint& depth2dSwzl = spvSwizzleConstants[17]; - constant uint& depthCubeSwzl = spvSwizzleConstants[18]; - constant uint& depth2dArraySwzl = spvSwizzleConstants[19]; - constant uint& depthCubeArraySwzl = spvSwizzleConstants[20]; + constant uint& tex1dSwzl = spvSwizzleConstants[0]; + constant uint& tex2dSwzl = spvSwizzleConstants[1]; + constant uint& tex3dSwzl = spvSwizzleConstants[2]; + constant uint& texCubeSwzl = spvSwizzleConstants[3]; + constant uint& tex2dArraySwzl = spvSwizzleConstants[4]; + constant uint& texCubeArraySwzl = spvSwizzleConstants[5]; + constant uint& depth2dSwzl = spvSwizzleConstants[7]; + constant uint& depthCubeSwzl = spvSwizzleConstants[8]; + constant uint& depth2dArraySwzl = spvSwizzleConstants[9]; + constant uint& depthCubeArraySwzl = spvSwizzleConstants[10]; out.fragColor = do_samples(tex1d, tex1dSmplr, tex1dSwzl, tex2d, tex2dSwzl, tex3d, tex3dSmplr, tex3dSwzl, texCube, texCubeSwzl, tex2dArray, tex2dArraySmplr, tex2dArraySwzl, texCubeArray, texCubeArraySmplr, texCubeArraySwzl, texBuffer, depth2d, depth2dSmplr, depth2dSwzl, depthCube, depthCubeSmplr, depthCubeSwzl, depth2dArray, depth2dArraySwzl, depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, defaultSampler, shadowSampler); return out; } diff --git a/reference/shaders-msl/asm/frag/texture-atomics.asm.frag b/reference/shaders-msl/asm/frag/texture-atomics.asm.frag index 036a330e..bedab01a 100644 --- a/reference/shaders-msl/asm/frag/texture-atomics.asm.frag +++ b/reference/shaders-msl/asm/frag/texture-atomics.asm.frag @@ -71,7 +71,7 @@ struct main0_in // Returns buffer coords clamped to storage buffer size #define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / sizeof(type)) - 1) -fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) +fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; uint2 _77 = uint2(gl_FragCoord.xy); diff --git a/reference/shaders-msl/asm/vert/double-decrement-instance-vertex-id.asm.vert b/reference/shaders-msl/asm/vert/double-decrement-instance-vertex-id.asm.vert deleted file mode 100644 index aa8103a1..00000000 --- a/reference/shaders-msl/asm/vert/double-decrement-instance-vertex-id.asm.vert +++ /dev/null @@ -1,365 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -template -struct unsafe_array -{ - T __Elements[Num ? Num : 1]; - - constexpr size_t size() const thread { return Num; } - constexpr size_t max_size() const thread { return Num; } - constexpr bool empty() const thread { return Num == 0; } - - constexpr size_t size() const device { return Num; } - constexpr size_t max_size() const device { return Num; } - constexpr bool empty() const device { return Num == 0; } - - constexpr size_t size() const constant { return Num; } - constexpr size_t max_size() const constant { return Num; } - constexpr bool empty() const constant { return Num == 0; } - - constexpr size_t size() const threadgroup { return Num; } - constexpr size_t max_size() const threadgroup { return Num; } - constexpr bool empty() const threadgroup { return Num == 0; } - - thread T &operator[](size_t pos) thread - { - return __Elements[pos]; - } - constexpr const thread T &operator[](size_t pos) const thread - { - return __Elements[pos]; - } - - device T &operator[](size_t pos) device - { - return __Elements[pos]; - } - constexpr const device T &operator[](size_t pos) const device - { - return __Elements[pos]; - } - - constexpr const constant T &operator[](size_t pos) const constant - { - return __Elements[pos]; - } - - threadgroup T &operator[](size_t pos) threadgroup - { - return __Elements[pos]; - } - constexpr const threadgroup T &operator[](size_t pos) const threadgroup - { - return __Elements[pos]; - } -}; - -using namespace metal; - -struct type_View -{ - float4x4 View_TranslatedWorldToClip; - float4x4 View_WorldToClip; - float4x4 View_ClipToWorld; - float4x4 View_TranslatedWorldToView; - float4x4 View_ViewToTranslatedWorld; - float4x4 View_TranslatedWorldToCameraView; - float4x4 View_CameraViewToTranslatedWorld; - float4x4 View_ViewToClip; - float4x4 View_ViewToClipNoAA; - float4x4 View_ClipToView; - float4x4 View_ClipToTranslatedWorld; - float4x4 View_SVPositionToTranslatedWorld; - float4x4 View_ScreenToWorld; - float4x4 View_ScreenToTranslatedWorld; - packed_float3 View_ViewForward; - float PrePadding_View_908; - packed_float3 View_ViewUp; - float PrePadding_View_924; - packed_float3 View_ViewRight; - float PrePadding_View_940; - packed_float3 View_HMDViewNoRollUp; - float PrePadding_View_956; - packed_float3 View_HMDViewNoRollRight; - float PrePadding_View_972; - float4 View_InvDeviceZToWorldZTransform; - float4 View_ScreenPositionScaleBias; - packed_float3 View_WorldCameraOrigin; - float PrePadding_View_1020; - packed_float3 View_TranslatedWorldCameraOrigin; - float PrePadding_View_1036; - packed_float3 View_WorldViewOrigin; - float PrePadding_View_1052; - packed_float3 View_PreViewTranslation; - float PrePadding_View_1068; - float4x4 View_PrevProjection; - float4x4 View_PrevViewProj; - float4x4 View_PrevViewRotationProj; - float4x4 View_PrevViewToClip; - float4x4 View_PrevClipToView; - float4x4 View_PrevTranslatedWorldToClip; - float4x4 View_PrevTranslatedWorldToView; - float4x4 View_PrevViewToTranslatedWorld; - float4x4 View_PrevTranslatedWorldToCameraView; - float4x4 View_PrevCameraViewToTranslatedWorld; - packed_float3 View_PrevWorldCameraOrigin; - float PrePadding_View_1724; - packed_float3 View_PrevWorldViewOrigin; - float PrePadding_View_1740; - packed_float3 View_PrevPreViewTranslation; - float PrePadding_View_1756; - float4x4 View_PrevInvViewProj; - float4x4 View_PrevScreenToTranslatedWorld; - float4x4 View_ClipToPrevClip; - float4 View_TemporalAAJitter; - float4 View_GlobalClippingPlane; - float2 View_FieldOfViewWideAngles; - float2 View_PrevFieldOfViewWideAngles; - float4 View_ViewRectMin; - float4 View_ViewSizeAndInvSize; - float4 View_BufferSizeAndInvSize; - float4 View_BufferBilinearUVMinMax; - int View_NumSceneColorMSAASamples; - float View_PreExposure; - float View_OneOverPreExposure; - float PrePadding_View_2076; - float4 View_DiffuseOverrideParameter; - float4 View_SpecularOverrideParameter; - float4 View_NormalOverrideParameter; - float2 View_RoughnessOverrideParameter; - float View_PrevFrameGameTime; - float View_PrevFrameRealTime; - float View_OutOfBoundsMask; - float PrePadding_View_2148; - float PrePadding_View_2152; - float PrePadding_View_2156; - packed_float3 View_WorldCameraMovementSinceLastFrame; - float View_CullingSign; - float View_NearPlane; - float View_AdaptiveTessellationFactor; - float View_GameTime; - float View_RealTime; - float View_DeltaTime; - float View_MaterialTextureMipBias; - float View_MaterialTextureDerivativeMultiply; - uint View_Random; - uint View_FrameNumber; - uint View_StateFrameIndexMod8; - uint View_StateFrameIndex; - float View_CameraCut; - float View_UnlitViewmodeMask; - float PrePadding_View_2228; - float PrePadding_View_2232; - float PrePadding_View_2236; - float4 View_DirectionalLightColor; - packed_float3 View_DirectionalLightDirection; - float PrePadding_View_2268; - unsafe_array View_TranslucencyLightingVolumeMin; - unsafe_array View_TranslucencyLightingVolumeInvSize; - float4 View_TemporalAAParams; - float4 View_CircleDOFParams; - float View_DepthOfFieldSensorWidth; - float View_DepthOfFieldFocalDistance; - float View_DepthOfFieldScale; - float View_DepthOfFieldFocalLength; - float View_DepthOfFieldFocalRegion; - float View_DepthOfFieldNearTransitionRegion; - float View_DepthOfFieldFarTransitionRegion; - float View_MotionBlurNormalizedToPixel; - float View_bSubsurfacePostprocessEnabled; - float View_GeneralPurposeTweak; - float View_DemosaicVposOffset; - float PrePadding_View_2412; - packed_float3 View_IndirectLightingColorScale; - float View_HDR32bppEncodingMode; - packed_float3 View_AtmosphericFogSunDirection; - float View_AtmosphericFogSunPower; - float View_AtmosphericFogPower; - float View_AtmosphericFogDensityScale; - float View_AtmosphericFogDensityOffset; - float View_AtmosphericFogGroundOffset; - float View_AtmosphericFogDistanceScale; - float View_AtmosphericFogAltitudeScale; - float View_AtmosphericFogHeightScaleRayleigh; - float View_AtmosphericFogStartDistance; - float View_AtmosphericFogDistanceOffset; - float View_AtmosphericFogSunDiscScale; - float View_AtmosphericFogSunDiscHalfApexAngleRadian; - float PrePadding_View_2492; - float4 View_AtmosphericFogSunDiscLuminance; - uint View_AtmosphericFogRenderMask; - uint View_AtmosphericFogInscatterAltitudeSampleNum; - uint PrePadding_View_2520; - uint PrePadding_View_2524; - float4 View_AtmosphericFogSunColor; - packed_float3 View_NormalCurvatureToRoughnessScaleBias; - float View_RenderingReflectionCaptureMask; - float4 View_AmbientCubemapTint; - float View_AmbientCubemapIntensity; - float View_SkyLightParameters; - float PrePadding_View_2584; - float PrePadding_View_2588; - float4 View_SkyLightColor; - unsafe_array View_SkyIrradianceEnvironmentMap; - float View_MobilePreviewMode; - float View_HMDEyePaddingOffset; - float View_ReflectionCubemapMaxMip; - float View_ShowDecalsMask; - uint View_DistanceFieldAOSpecularOcclusionMode; - float View_IndirectCapsuleSelfShadowingIntensity; - float PrePadding_View_2744; - float PrePadding_View_2748; - packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight; - int View_StereoPassIndex; - unsafe_array View_GlobalVolumeCenterAndExtent; - unsafe_array View_GlobalVolumeWorldToUVAddAndMul; - float View_GlobalVolumeDimension; - float View_GlobalVolumeTexelSize; - float View_MaxGlobalDistance; - float PrePadding_View_2908; - int2 View_CursorPosition; - float View_bCheckerboardSubsurfaceProfileRendering; - float PrePadding_View_2924; - packed_float3 View_VolumetricFogInvGridSize; - float PrePadding_View_2940; - packed_float3 View_VolumetricFogGridZParams; - float PrePadding_View_2956; - float2 View_VolumetricFogSVPosToVolumeUV; - float View_VolumetricFogMaxDistance; - float PrePadding_View_2972; - packed_float3 View_VolumetricLightmapWorldToUVScale; - float PrePadding_View_2988; - packed_float3 View_VolumetricLightmapWorldToUVAdd; - float PrePadding_View_3004; - packed_float3 View_VolumetricLightmapIndirectionTextureSize; - float View_VolumetricLightmapBrickSize; - packed_float3 View_VolumetricLightmapBrickTexelSize; - float View_StereoIPD; - float View_IndirectLightingCacheShowFlag; - float View_EyeToPixelSpreadAngle; -}; - -struct type_Primitive -{ - float4x4 Primitive_LocalToWorld; - float4 Primitive_InvNonUniformScaleAndDeterminantSign; - float4 Primitive_ObjectWorldPositionAndRadius; - float4x4 Primitive_WorldToLocal; - float4x4 Primitive_PreviousLocalToWorld; - float4x4 Primitive_PreviousWorldToLocal; - packed_float3 Primitive_ActorWorldPosition; - float Primitive_UseSingleSampleShadowFromStationaryLights; - packed_float3 Primitive_ObjectBounds; - float Primitive_LpvBiasMultiplier; - float Primitive_DecalReceiverMask; - float Primitive_PerObjectGBufferData; - float Primitive_UseVolumetricLightmapShadowFromStationaryLights; - float Primitive_DrawsVelocity; - float4 Primitive_ObjectOrientation; - float4 Primitive_NonUniformScale; - packed_float3 Primitive_LocalObjectBoundsMin; - uint Primitive_LightingChannelMask; - packed_float3 Primitive_LocalObjectBoundsMax; - uint Primitive_LightmapDataIndex; - packed_float3 Primitive_PreSkinnedLocalBounds; - int Primitive_SingleCaptureIndex; - uint Primitive_OutputVelocity; - uint PrePadding_Primitive_420; - uint PrePadding_Primitive_424; - uint PrePadding_Primitive_428; - unsafe_array Primitive_CustomPrimitiveData; -}; - -constant unsafe_array _67 = unsafe_array({ float2(0.0), float2(0.0) }); - -constant float3x3 _68 = {}; -constant float4 _69 = {}; - -struct main0_out -{ - float4 out_var_TEXCOORD10_centroid [[user(locn0)]]; - float4 out_var_TEXCOORD11_centroid [[user(locn1)]]; - float4 out_var_COLOR0 [[user(locn2)]]; - float2 out_var_TEXCOORD0_0 [[user(locn3)]]; - float2 out_var_TEXCOORD0_1 [[user(locn4)]]; - float4 out_var_VS_To_DS_Position [[user(locn5)]]; -}; - -struct main0_in -{ - float4 in_var_ATTRIBUTE0 [[attribute(0)]]; - float3 in_var_ATTRIBUTE1 [[attribute(1)]]; - float4 in_var_ATTRIBUTE2 [[attribute(2)]]; - uint4 in_var_ATTRIBUTE3 [[attribute(3)]]; - float4 in_var_ATTRIBUTE4 [[attribute(4)]]; - float2 in_var_ATTRIBUTE5_0 [[attribute(5)]]; - float2 in_var_ATTRIBUTE5_1 [[attribute(6)]]; - float4 in_var_ATTRIBUTE13 [[attribute(13)]]; -}; - -// Returns 2D texture coords corresponding to 1D texel buffer coords -static inline __attribute__((always_inline)) -uint2 spvTexelBufferCoord(uint tc) -{ - return uint2(tc % 4096, tc / 4096); -} - -vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], texture2d BoneMatrices [[texture(0)]]) -{ - main0_out out = {}; - unsafe_array out_var_TEXCOORD0 = {}; - unsafe_array in_var_ATTRIBUTE5 = {}; - in_var_ATTRIBUTE5[0] = in.in_var_ATTRIBUTE5_0; - in_var_ATTRIBUTE5[1] = in.in_var_ATTRIBUTE5_1; - float4 _83 = float4(in.in_var_ATTRIBUTE4.x); - int _86 = int(in.in_var_ATTRIBUTE3.x) * 3; - float4 _100 = float4(in.in_var_ATTRIBUTE4.y); - int _103 = int(in.in_var_ATTRIBUTE3.y) * 3; - float4 _119 = float4(in.in_var_ATTRIBUTE4.z); - int _122 = int(in.in_var_ATTRIBUTE3.z) * 3; - float4 _138 = float4(in.in_var_ATTRIBUTE4.w); - int _141 = int(in.in_var_ATTRIBUTE3.w) * 3; - float3x4 _156 = float3x4((((_83 * BoneMatrices.read(spvTexelBufferCoord(uint(_86)))) + (_100 * BoneMatrices.read(spvTexelBufferCoord(uint(_103))))) + (_119 * BoneMatrices.read(spvTexelBufferCoord(uint(_122))))) + (_138 * BoneMatrices.read(spvTexelBufferCoord(uint(_141)))), (((_83 * BoneMatrices.read(spvTexelBufferCoord(uint(_86 + 1)))) + (_100 * BoneMatrices.read(spvTexelBufferCoord(uint(_103 + 1))))) + (_119 * BoneMatrices.read(spvTexelBufferCoord(uint(_122 + 1))))) + (_138 * BoneMatrices.read(spvTexelBufferCoord(uint(_141 + 1)))), (((_83 * BoneMatrices.read(spvTexelBufferCoord(uint(_86 + 2)))) + (_100 * BoneMatrices.read(spvTexelBufferCoord(uint(_103 + 2))))) + (_119 * BoneMatrices.read(spvTexelBufferCoord(uint(_122 + 2))))) + (_138 * BoneMatrices.read(spvTexelBufferCoord(uint(_141 + 2))))); - float3 _161 = float4(in.in_var_ATTRIBUTE1, 0.0) * _156; - float3x3 _162 = _68; - _162[0] = _161; - float3 _167 = float4(in.in_var_ATTRIBUTE2.xyz, 0.0) * _156; - float3x3 _168 = _162; - _168[2] = _167; - float3x3 _173 = _168; - _173[1] = cross(_167, _161) * float3(in.in_var_ATTRIBUTE2.w); - float3 _178 = float4(in.in_var_ATTRIBUTE0.xyz, 1.0) * _156; - float4 _205 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _178.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _178.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _178.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0); - unsafe_array _71; - _71 = in_var_ATTRIBUTE5; - unsafe_array _72 = unsafe_array({ float2(0.0), float2(0.0) }); - for (int _207 = 0; _207 < 2; ) - { - _72[_207] = _71[_207]; - _207++; - continue; - } - float4 _217 = _69; - _217.w = 0.0; - float3x3 _231 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz); - _231[0] = Primitive.Primitive_LocalToWorld[0].xyz * float3(Primitive.Primitive_InvNonUniformScaleAndDeterminantSign.x); - float3x3 _235 = _231; - _235[1] = Primitive.Primitive_LocalToWorld[1].xyz * float3(Primitive.Primitive_InvNonUniformScaleAndDeterminantSign.y); - float3x3 _239 = _235; - _239[2] = Primitive.Primitive_LocalToWorld[2].xyz * float3(Primitive.Primitive_InvNonUniformScaleAndDeterminantSign.z); - float3x3 _240 = _239 * _173; - float3 _241 = _240[0]; - out.out_var_TEXCOORD10_centroid = float4(_241.x, _241.y, _241.z, _217.w); - out.out_var_TEXCOORD11_centroid = float4(_240[2], in.in_var_ATTRIBUTE2.w * Primitive.Primitive_InvNonUniformScaleAndDeterminantSign.w); - out.out_var_COLOR0 = in.in_var_ATTRIBUTE13; - out_var_TEXCOORD0 = _72; - out.out_var_VS_To_DS_Position = float4(_205.x, _205.y, _205.z, _205.w); - out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0]; - out.out_var_TEXCOORD0_1 = out_var_TEXCOORD0[1]; - return out; -} - diff --git a/reference/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp index f7c471f3..85185e6d 100644 --- a/reference/shaders-msl/comp/access-private-workgroup-in-function.comp +++ b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp @@ -5,6 +5,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) void set_f(thread int& f) { diff --git a/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp b/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp index f7757cd1..18cfd68c 100644 --- a/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp +++ b/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp @@ -23,6 +23,8 @@ struct SSBO2 float4 v; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { const device SSBO0* ssbo0 [[id(0)]]; diff --git a/reference/shaders-msl/comp/array-length.comp b/reference/shaders-msl/comp/array-length.comp index 8195a788..0617955e 100644 --- a/reference/shaders-msl/comp/array-length.comp +++ b/reference/shaders-msl/comp/array-length.comp @@ -55,13 +55,15 @@ struct SSBO1 spvUnsafeArray bz; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) uint get_size(device SSBO& v_14, constant uint& v_14BufferSize, thread spvUnsafeArray (&ssbos), constant uint* ssbosBufferSize) { return uint(int((v_14BufferSize - 16) / 16) + int((ssbosBufferSize[1] - 0) / 4)); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(3)]], device SSBO& v_14 [[buffer(0)]], device SSBO1* ssbos_0 [[buffer(1)]], device SSBO1* ssbos_1 [[buffer(2)]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device SSBO& v_14 [[buffer(0)]], device SSBO1* ssbos_0 [[buffer(1)]], device SSBO1* ssbos_1 [[buffer(2)]]) { spvUnsafeArray ssbos = { @@ -69,8 +71,8 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(3)]], device SS ssbos_1, }; - constant uint& v_14BufferSize = spvBufferSizeConstants[4]; - constant uint* ssbosBufferSize = &spvBufferSizeConstants[5]; + constant uint& v_14BufferSize = spvBufferSizeConstants[0]; + constant uint* ssbosBufferSize = &spvBufferSizeConstants[1]; v_14.size = get_size(v_14, v_14BufferSize, ssbos, ssbosBufferSize); } diff --git a/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp b/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp index 8c074bb4..6b38e3f0 100644 --- a/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp +++ b/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp @@ -66,6 +66,8 @@ struct SSBO3 spvUnsafeArray bz; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + struct spvDescriptorSetBuffer0 { device SSBO* v_16 [[id(0)]]; @@ -88,7 +90,7 @@ uint get_size(device SSBO& v_16, constant uint& v_16BufferSize, constant spvUnsa return len; } -kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvBufferSizeConstants [[buffer(5)]], device SSBO2& v_38 [[buffer(2)]], device SSBO3* ssbos2_0 [[buffer(3)]], device SSBO3* ssbos2_1 [[buffer(4)]]) +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvBufferSizeConstants [[buffer(25)]], device SSBO2& v_38 [[buffer(2)]], device SSBO3* ssbos2_0 [[buffer(3)]], device SSBO3* ssbos2_1 [[buffer(4)]]) { spvUnsafeArray ssbos2 = { @@ -98,8 +100,8 @@ kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0 constant uint& spvDescriptorSet0_v_16BufferSize = spvDescriptorSet0.spvBufferSizeConstants[0]; constant uint* spvDescriptorSet1_ssbosBufferSize = &spvDescriptorSet1.spvBufferSizeConstants[0]; - constant uint& v_38BufferSize = spvBufferSizeConstants[6]; - constant uint* ssbos2BufferSize = &spvBufferSizeConstants[7]; + constant uint& v_38BufferSize = spvBufferSizeConstants[2]; + constant uint* ssbos2BufferSize = &spvBufferSizeConstants[3]; (*spvDescriptorSet0.v_16).size = get_size((*spvDescriptorSet0.v_16), spvDescriptorSet0_v_16BufferSize, spvDescriptorSet1.ssbos, spvDescriptorSet1_ssbosBufferSize, v_38, v_38BufferSize, ssbos2, ssbos2BufferSize); } diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp index 04721502..fca72bfc 100644 --- a/reference/shaders-msl/comp/atomic.comp +++ b/reference/shaders-msl/comp/atomic.comp @@ -12,6 +12,8 @@ struct SSBO int i32; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& ssbo [[buffer(0)]]) { threadgroup uint shared_u32; diff --git a/reference/shaders-msl/comp/basic.comp b/reference/shaders-msl/comp/basic.comp index 64f67f26..11d7df50 100644 --- a/reference/shaders-msl/comp/basic.comp +++ b/reference/shaders-msl/comp/basic.comp @@ -61,6 +61,8 @@ struct SSBO3 uint counter; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp b/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp index 9888ca42..fa12af6f 100644 --- a/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp +++ b/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp @@ -61,9 +61,11 @@ struct SSBO3 uint counter; }; -kernel void main0(constant uint3& spvDispatchBase [[buffer(3)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + +kernel void main0(constant uint3& spvDispatchBase [[buffer(29)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { - gl_GlobalInvocationID += spvDispatchBase * uint3(1, 1, 1); + gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize; gl_WorkGroupID += spvDispatchBase; uint ident = gl_GlobalInvocationID.x; uint workgroup = gl_WorkGroupID.x; diff --git a/reference/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp b/reference/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp index d2bdbcdf..cde442d4 100644 --- a/reference/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp +++ b/reference/shaders-msl/comp/basic.dynamic-buffer.msl2.invalid.comp @@ -75,7 +75,7 @@ struct spvDescriptorSetBuffer1 spvUnsafeArray, 2>*, 3>, 3>, 2> baz [[id(0)]]; }; -kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]); device spvUnsafeArray, 3>, 2>* baz = diff --git a/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp index fde3cdbf..752a47d0 100644 --- a/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp +++ b/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp @@ -54,6 +54,8 @@ struct SSBO1 spvUnsafeArray outputs; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp index f68744a3..6ae7a7a7 100644 --- a/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp +++ b/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp @@ -59,6 +59,8 @@ struct UBO half4 const0; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/coherent-block.comp b/reference/shaders-msl/comp/coherent-block.comp index 580b9e3e..58bbacb7 100644 --- a/reference/shaders-msl/comp/coherent-block.comp +++ b/reference/shaders-msl/comp/coherent-block.comp @@ -8,6 +8,8 @@ struct SSBO float4 value; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(volatile device SSBO& _10 [[buffer(0)]]) { _10.value = float4(20.0); diff --git a/reference/shaders-msl/comp/coherent-image.comp b/reference/shaders-msl/comp/coherent-image.comp index c6af46b5..50904844 100644 --- a/reference/shaders-msl/comp/coherent-image.comp +++ b/reference/shaders-msl/comp/coherent-image.comp @@ -8,6 +8,8 @@ struct SSBO int4 value; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) { _10.value = uImage.read(uint2(int2(10))); diff --git a/reference/shaders-msl/comp/composite-construct.comp b/reference/shaders-msl/comp/composite-construct.comp index a048af82..b2b855fd 100644 --- a/reference/shaders-msl/comp/composite-construct.comp +++ b/reference/shaders-msl/comp/composite-construct.comp @@ -60,6 +60,8 @@ struct Composite float4 b; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + constant spvUnsafeArray _43 = spvUnsafeArray({ float4(20.0), float4(40.0) }); kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) diff --git a/reference/shaders-msl/comp/copy-array-of-arrays.comp b/reference/shaders-msl/comp/copy-array-of-arrays.comp index a5b873b7..9d675cf2 100644 --- a/reference/shaders-msl/comp/copy-array-of-arrays.comp +++ b/reference/shaders-msl/comp/copy-array-of-arrays.comp @@ -51,6 +51,8 @@ struct BUF float c; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + constant spvUnsafeArray _16 = spvUnsafeArray({ 1.0, 2.0 }); constant spvUnsafeArray _19 = spvUnsafeArray({ 3.0, 4.0 }); constant spvUnsafeArray, 2> _20 = spvUnsafeArray, 2>({ spvUnsafeArray({ 1.0, 2.0 }), spvUnsafeArray({ 3.0, 4.0 }) }); diff --git a/reference/shaders-msl/comp/defer-parens.comp b/reference/shaders-msl/comp/defer-parens.comp index 76dce777..9a567fa6 100644 --- a/reference/shaders-msl/comp/defer-parens.comp +++ b/reference/shaders-msl/comp/defer-parens.comp @@ -9,6 +9,8 @@ struct SSBO int index; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _13 [[buffer(0)]]) { float4 d = _13.data; diff --git a/reference/shaders-msl/comp/dowhile.comp b/reference/shaders-msl/comp/dowhile.comp index 61ba9853..38c268e8 100644 --- a/reference/shaders-msl/comp/dowhile.comp +++ b/reference/shaders-msl/comp/dowhile.comp @@ -55,6 +55,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp index 250cc5f0..856efeab 100644 --- a/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp +++ b/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp @@ -8,13 +8,11 @@ using namespace metal; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -67,10 +65,10 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -kernel void main0(constant uint* spvSwizzleConstants [[buffer(0)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) +kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) { - constant uint& fooSwzl = spvSwizzleConstants[2]; - float4 a = foo.sample(fooSmplr, float2(1.0), level(0.0)); + constant uint& fooSwzl = spvSwizzleConstants[0]; + float4 a = spvTextureSwizzle(foo.sample(fooSmplr, float2(1.0), level(0.0)), fooSwzl); bar.write(a, uint2(int2(0))); } diff --git a/reference/shaders-msl/comp/image-cube-array-load-store.comp b/reference/shaders-msl/comp/image-cube-array-load-store.comp index ef67a326..c0b83c46 100644 --- a/reference/shaders-msl/comp/image-cube-array-load-store.comp +++ b/reference/shaders-msl/comp/image-cube-array-load-store.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texturecube_array uImageIn [[texture(0)]], texturecube_array uImageOut [[texture(1)]]) { int3 coord = int3(9, 7, 11); diff --git a/reference/shaders-msl/comp/image.comp b/reference/shaders-msl/comp/image.comp index f3bc1455..e7c9c763 100644 --- a/reference/shaders-msl/comp/image.comp +++ b/reference/shaders-msl/comp/image.comp @@ -3,6 +3,8 @@ using namespace metal; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(texture2d uImageIn [[texture(0)]], texture2d uImageOut [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 v = uImageIn.read(uint2((int2(gl_GlobalInvocationID.xy) + int2(uImageIn.get_width(), uImageIn.get_height())))); diff --git a/reference/shaders-msl/comp/insert.comp b/reference/shaders-msl/comp/insert.comp index 61c93629..6d3e85af 100644 --- a/reference/shaders-msl/comp/insert.comp +++ b/reference/shaders-msl/comp/insert.comp @@ -49,6 +49,8 @@ struct SSBO spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { float4 v; diff --git a/reference/shaders-msl/comp/int64.invalid.msl22.comp b/reference/shaders-msl/comp/int64.invalid.msl22.comp index 851511fa..d5bbbb47 100644 --- a/reference/shaders-msl/comp/int64.invalid.msl22.comp +++ b/reference/shaders-msl/comp/int64.invalid.msl22.comp @@ -82,6 +82,8 @@ struct SSBO uint u32; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _96 [[buffer(0)]]) { SSBO0_Type ssbo_0; diff --git a/reference/shaders-msl/comp/inverse.comp b/reference/shaders-msl/comp/inverse.comp index 04cfacee..33aed468 100644 --- a/reference/shaders-msl/comp/inverse.comp +++ b/reference/shaders-msl/comp/inverse.comp @@ -19,6 +19,8 @@ struct MatrixIn float4x4 m4in; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + // Returns the determinant of a 2x2 matrix. static inline __attribute__((always_inline)) float spvDet2x2(float a1, float a2, float b1, float b2) diff --git a/reference/shaders-msl/comp/mat3.comp b/reference/shaders-msl/comp/mat3.comp index 614837c9..621a53b3 100644 --- a/reference/shaders-msl/comp/mat3.comp +++ b/reference/shaders-msl/comp/mat3.comp @@ -49,6 +49,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/mod.comp b/reference/shaders-msl/comp/mod.comp index 1fc9dbc7..e81abf2f 100644 --- a/reference/shaders-msl/comp/mod.comp +++ b/reference/shaders-msl/comp/mod.comp @@ -54,6 +54,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() template inline Tx mod(Tx x, Ty y) diff --git a/reference/shaders-msl/comp/modf.comp b/reference/shaders-msl/comp/modf.comp index be2cb448..1129fd84 100644 --- a/reference/shaders-msl/comp/modf.comp +++ b/reference/shaders-msl/comp/modf.comp @@ -54,6 +54,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/outer-product.comp b/reference/shaders-msl/comp/outer-product.comp index 8e32db39..e589642d 100644 --- a/reference/shaders-msl/comp/outer-product.comp +++ b/reference/shaders-msl/comp/outer-product.comp @@ -23,6 +23,8 @@ struct ReadSSBO float4 v4; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]]) { _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y); diff --git a/reference/shaders-msl/comp/read-write-only.comp b/reference/shaders-msl/comp/read-write-only.comp index 7547b417..0cf8d8e3 100644 --- a/reference/shaders-msl/comp/read-write-only.comp +++ b/reference/shaders-msl/comp/read-write-only.comp @@ -21,6 +21,8 @@ struct SSBO1 float4 data3; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], const device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]]) { _10.data4 = _15.data0 + _21.data2; diff --git a/reference/shaders-msl/comp/rmw-matrix.comp b/reference/shaders-msl/comp/rmw-matrix.comp index 150db7ed..b53a3a75 100644 --- a/reference/shaders-msl/comp/rmw-matrix.comp +++ b/reference/shaders-msl/comp/rmw-matrix.comp @@ -13,6 +13,8 @@ struct SSBO float4x4 c1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _11 [[buffer(0)]]) { _11.a *= _11.a1; diff --git a/reference/shaders-msl/comp/rmw-opt.comp b/reference/shaders-msl/comp/rmw-opt.comp index 060f9f9c..229154fc 100644 --- a/reference/shaders-msl/comp/rmw-opt.comp +++ b/reference/shaders-msl/comp/rmw-opt.comp @@ -8,6 +8,8 @@ struct SSBO int a; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.a += 10; diff --git a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp index 312a6f94..0ae6e556 100644 --- a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -12,6 +12,8 @@ struct SSBO float e; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]]) { _9.c = abs(_9.a - _9.b); diff --git a/reference/shaders-msl/comp/spec-constant-op-member-array.comp b/reference/shaders-msl/comp/spec-constant-op-member-array.comp index a708edcd..74984fbc 100644 --- a/reference/shaders-msl/comp/spec-constant-op-member-array.comp +++ b/reference/shaders-msl/comp/spec-constant-op-member-array.comp @@ -81,6 +81,7 @@ struct SSBO constant int e_tmp [[function_constant(3)]]; constant int e = is_function_constant_defined(e_tmp) ? e_tmp : 400; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); kernel void main0(device SSBO& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { diff --git a/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp index 1273386c..901b8099 100644 --- a/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp +++ b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp @@ -65,6 +65,8 @@ struct SSBO spvUnsafeArray sub; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { Sub foo; diff --git a/reference/shaders-msl/comp/struct-layout.comp b/reference/shaders-msl/comp/struct-layout.comp index 009c9aaa..3ac6de60 100644 --- a/reference/shaders-msl/comp/struct-layout.comp +++ b/reference/shaders-msl/comp/struct-layout.comp @@ -59,6 +59,8 @@ struct SSBO spvUnsafeArray in_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _23 [[buffer(0)]], const device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/struct-nested.comp b/reference/shaders-msl/comp/struct-nested.comp index 9c026dfc..98ff0223 100644 --- a/reference/shaders-msl/comp/struct-nested.comp +++ b/reference/shaders-msl/comp/struct-nested.comp @@ -69,6 +69,8 @@ struct dstbuffer spvUnsafeArray test; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device dstbuffer& _19 [[buffer(0)]]) { s2 testVal; diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp index 286a1c81..bb991dd5 100644 --- a/reference/shaders-msl/comp/struct-packing.comp +++ b/reference/shaders-msl/comp/struct-packing.comp @@ -159,6 +159,8 @@ struct SSBO0 spvUnsafeArray array; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]]) { Content_1 _60 = ssbo_140.content; diff --git a/reference/shaders-msl/comp/torture-loop.comp b/reference/shaders-msl/comp/torture-loop.comp index fbb35862..e6c29779 100644 --- a/reference/shaders-msl/comp/torture-loop.comp +++ b/reference/shaders-msl/comp/torture-loop.comp @@ -55,6 +55,8 @@ struct SSBO2 spvUnsafeArray out_data; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ident = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/comp/type-alias.comp b/reference/shaders-msl/comp/type-alias.comp index 5c33ef7e..6145ac98 100644 --- a/reference/shaders-msl/comp/type-alias.comp +++ b/reference/shaders-msl/comp/type-alias.comp @@ -79,6 +79,8 @@ struct SSBO2 spvUnsafeArray outputs; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + static inline __attribute__((always_inline)) float4 overload(thread const S0& s0) { diff --git a/reference/shaders-msl/comp/udiv.comp b/reference/shaders-msl/comp/udiv.comp index d7f4034f..0b5cc6a6 100644 --- a/reference/shaders-msl/comp/udiv.comp +++ b/reference/shaders-msl/comp/udiv.comp @@ -54,6 +54,8 @@ struct SSBO spvUnsafeArray inputs; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u; diff --git a/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp b/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp index a37fe519..cea12980 100644 --- a/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp +++ b/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp @@ -91,6 +91,8 @@ struct ResType_7 int4 _m1; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1)]]) { ResType _25; diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag index 6a126294..833ddec0 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag @@ -25,13 +25,11 @@ struct main0_in template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -87,26 +85,26 @@ inline T spvTextureSwizzle(T x, uint s) static inline __attribute__((always_inline)) float4 sample_in_func_1(thread const array, 4> uSampler0, thread const array uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV) { - return uSampler0[2].sample(uSampler0Smplr[2], vUV); + return spvTextureSwizzle(uSampler0[2].sample(uSampler0Smplr[2], vUV), uSampler0Swzl[2]); } static inline __attribute__((always_inline)) float4 sample_in_func_2(thread float2& vUV, thread texture2d uSampler1, thread const sampler uSampler1Smplr, constant uint& uSampler1Swzl) { - return uSampler1.sample(uSampler1Smplr, vUV); + return spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, vUV), uSampler1Swzl); } static inline __attribute__((always_inline)) float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { - return s.sample(sSmplr, vUV); + return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } -fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(1)]], texture2d uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]]) +fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]], texture2d uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]]) { main0_out out = {}; constant uint* spvDescriptorSet0_uSampler0Swzl = &spvDescriptorSet0.spvSwizzleConstants[0]; - constant uint& uSampler1Swzl = spvSwizzleConstants[1]; + constant uint& uSampler1Swzl = spvSwizzleConstants[0]; out.FragColor = sample_in_func_1(spvDescriptorSet0.uSampler0, spvDescriptorSet0.uSampler0Smplr, spvDescriptorSet0_uSampler0Swzl, in.vUV); out.FragColor += sample_in_func_2(in.vUV, uSampler1, uSampler1Smplr, uSampler1Swzl); out.FragColor += sample_single_in_func(spvDescriptorSet0.uSampler0[1], spvDescriptorSet0.uSampler0Smplr[1], spvDescriptorSet0_uSampler0Swzl[1], in.vUV); diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag index 08faf0e6..64b361ec 100644 --- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag +++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag @@ -18,13 +18,11 @@ struct main0_in template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x) { return static_cast(x); } -template -inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) +template inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x) { return static_cast(x); } @@ -80,19 +78,19 @@ inline T spvTextureSwizzle(T x, uint s) static inline __attribute__((always_inline)) float4 sample_in_func(thread const array, 4> uSampler, thread const array uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV) { - return uSampler[2].sample(uSamplerSmplr[2], vUV); + return spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], vUV), uSamplerSwzl[2]); } static inline __attribute__((always_inline)) float4 sample_single_in_func(thread const texture2d s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV) { - return s.sample(sSmplr, vUV); + return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl); } -fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(0)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) { main0_out out = {}; - constant uint* uSamplerSwzl = &spvSwizzleConstants[4]; + constant uint* uSamplerSwzl = &spvSwizzleConstants[0]; out.FragColor = sample_in_func(uSampler, uSamplerSmplr, uSamplerSwzl, in.vUV); out.FragColor += sample_single_in_func(uSampler[1], uSamplerSmplr[1], uSamplerSwzl[1], in.vUV); return out; diff --git a/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp index 10ced4f3..76b575aa 100644 --- a/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp +++ b/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp @@ -162,6 +162,8 @@ struct SSBO2 packed_rm_float3x2 m2; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]], device SSBO2& ssbo_scalar2 [[buffer(2)]]) { spvUnsafeArray(ssbo_scalar.content.m0s[0].a[0]) = ssbo_140.content.m0s[0].a[0].xy; diff --git a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp index 4ebab8c7..2fd77131 100644 --- a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp +++ b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp @@ -10,6 +10,8 @@ struct SSBO float FragColor; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + inline uint4 spvSubgroupBallot(bool value) { simd_vote vote = simd_ballot(value); diff --git a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp index 84fcb9c3..5c67f6e4 100644 --- a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp +++ b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp @@ -8,6 +8,8 @@ struct SSBO float FragColor; }; +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); + kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]]) { _9.FragColor = float(gl_NumSubgroups); diff --git a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag index d5f48ccd..67895e3e 100644 --- a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag +++ b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag @@ -58,7 +58,7 @@ struct main0_in float2 vTex_3 [[user(locn4)]]; }; -fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(0)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) { main0_out out = {}; spvUnsafeArray vTex = {}; diff --git a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert index 16473bf0..8164bd23 100644 --- a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert +++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert @@ -60,7 +60,7 @@ struct main0_in float4 Position [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(1)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) { main0_out out = {}; uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1]; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 359b4196..63cc8e5d 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3806,15 +3806,13 @@ void CompilerMSL::emit_custom_functions() statement("template struct spvRemoveReference { typedef T type; };"); statement("template struct spvRemoveReference { typedef T type; };"); statement("template struct spvRemoveReference { typedef T type; };"); - - statement("template"); - statement("inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type& x)"); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type& x)"); begin_scope(); statement("return static_cast(x);"); end_scope(); - - statement("template"); - statement("inline constexpr thread T&& spvForward(thread typename spvRemoveReference::type&& x)"); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type&& x)"); begin_scope(); statement("return static_cast(x);"); end_scope(); @@ -6123,203 +6121,22 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) previous_instruction_opcode = opcode; } -// If the underlying resource has been used for comparison then duplicate loads of that resource must be too -static inline bool image_opcode_is_sample_no_dref(Op op) -{ - switch (op) - { - case OpImageSampleExplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjExplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageFetch: - case OpImageRead: - case OpImageSparseSampleExplicitLod: - case OpImageSparseSampleImplicitLod: - case OpImageSparseSampleProjExplicitLod: - case OpImageSparseSampleProjImplicitLod: - case OpImageSparseFetch: - case OpImageSparseRead: - return true; - - default: - return false; - } -} - void CompilerMSL::emit_texture_op(const Instruction &i) { auto *ops = stream(i); auto op = static_cast(i.op); - uint32_t length = i.length; - vector inherited_expressions; + SmallVector inherited_expressions; - uint32_t result_type = ops[0]; + uint32_t result_type_id = ops[0]; uint32_t id = ops[1]; uint32_t img = ops[2]; - uint32_t coord = ops[3]; - uint32_t dref = 0; - uint32_t comp = 0; - bool gather = false; - bool proj = false; - bool fetch = false; - const uint32_t *opt = nullptr; - inherited_expressions.push_back(coord); - - switch (op) - { - case OpImageSampleDrefImplicitLod: - case OpImageSampleDrefExplicitLod: - dref = ops[4]; - opt = &ops[5]; - length -= 5; - break; - - case OpImageSampleProjDrefImplicitLod: - case OpImageSampleProjDrefExplicitLod: - dref = ops[4]; - opt = &ops[5]; - length -= 5; - proj = true; - break; - - case OpImageDrefGather: - dref = ops[4]; - opt = &ops[5]; - length -= 5; - gather = true; - break; - - case OpImageGather: - comp = ops[4]; - opt = &ops[5]; - length -= 5; - gather = true; - break; - - case OpImageFetch: - case OpImageRead: // Reads == fetches in Metal (other langs will not get here) - opt = &ops[4]; - length -= 4; - fetch = true; - break; - - case OpImageSampleProjImplicitLod: - case OpImageSampleProjExplicitLod: - opt = &ops[4]; - length -= 4; - proj = true; - break; - - default: - opt = &ops[4]; - length -= 4; - break; - } - - // Bypass pointers because we need the real image struct auto &type = expression_type(img); auto &imgtype = get(type.self); - uint32_t coord_components = 0; - switch (imgtype.image.dim) - { - case spv::Dim1D: - coord_components = 1; - break; - case spv::Dim2D: - coord_components = 2; - break; - case spv::Dim3D: - coord_components = 3; - break; - case spv::DimCube: - coord_components = 3; - break; - case spv::DimBuffer: - coord_components = 1; - break; - default: - coord_components = 2; - break; - } - - if (dref) - inherited_expressions.push_back(dref); - - if (proj) - coord_components++; - if (imgtype.image.arrayed) - coord_components++; - - uint32_t bias = 0; - uint32_t lod = 0; - uint32_t grad_x = 0; - uint32_t grad_y = 0; - uint32_t coffset = 0; - uint32_t offset = 0; - uint32_t coffsets = 0; - uint32_t sample = 0; - uint32_t minlod = 0; - uint32_t flags = 0; - - if (length) - { - flags = *opt++; - length--; - } - - auto test = [&](uint32_t &v, uint32_t flag) { - if (length && (flags & flag)) - { - v = *opt++; - inherited_expressions.push_back(v); - length--; - } - }; - - test(bias, ImageOperandsBiasMask); - test(lod, ImageOperandsLodMask); - test(grad_x, ImageOperandsGradMask); - test(grad_y, ImageOperandsGradMask); - test(coffset, ImageOperandsConstOffsetMask); - test(offset, ImageOperandsOffsetMask); - test(coffsets, ImageOperandsConstOffsetsMask); - test(sample, ImageOperandsSampleMask); - test(minlod, ImageOperandsMinLodMask); - - string expr; bool forward = false; - expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset), - (!!grad_x || !!grad_y), !!dref, lod, minlod); - expr += "("; - expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod, - coffset, offset, bias, comp, sample, minlod, &forward); - expr += ")"; - - // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. - if (is_legacy() && image_is_comparison(imgtype, img)) - expr += ".r"; - - // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. - // Remap back to 4 components as sampling opcodes expect. - bool image_is_depth; - const auto *combined = maybe_get(img); - if (combined) - image_is_depth = image_is_comparison(imgtype, combined->image); - else - image_is_depth = image_is_comparison(imgtype, img); - - if (image_is_depth && backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) - { - expr = remap_swizzle(get(result_type), 1, expr); - } - - // Deals with reads from MSL. We might need to downconvert to fewer components. - if (op == OpImageRead) - expr = remap_swizzle(get(result_type), 4, expr); + string expr = to_texture_op(i, &forward, inherited_expressions); // Use Metal's native frame-buffer fetch API for subpass inputs. if (imgtype.image.dim == DimSubpassData && msl_options.is_ios() && msl_options.ios_use_framebuffer_fetch_subpasses) @@ -6327,7 +6144,7 @@ void CompilerMSL::emit_texture_op(const Instruction &i) expr = to_expression(img); } - emit_op(result_type, id, expr, forward); + emit_op(result_type_id, id, expr, forward); for (auto &inherit : inherited_expressions) inherit_expression_dependencies(id, inherit); @@ -9634,8 +9451,9 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base } } - // Always determine resource index and don't opt out if the variable already has the "resource_decoration" flag. - // This doesn't work with atomics that need to be split into two resources. + // If we have already allocated an index, keep using it. + if (has_extended_decoration(var.self, resource_decoration)) + return get_extended_decoration(var.self, resource_decoration); // Allow user to enable decoration binding if (msl_options.enable_decoration_binding)