Removed bounds checks in favor of SPIRV-Tools pass '--graphics-robust-access'

This commit is contained in:
Lukas Hermanns 2019-10-21 16:39:53 -04:00
parent 2482ff708c
commit e1b161b54b
11 changed files with 534 additions and 110 deletions

View File

@ -387,7 +387,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
float _1276 = _1258.y;
float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1275, _1276));
float4 _1283 = Texture1.sample(Texture1Sampler, float2(_1275 + 0.0625, _1276));
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0]) * _1256) + (float3(_Globals.LUTWeights[1]) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0].x) * _1256) + (float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow((_1289 * float3(0.94786727428436279296875)) + float3(0.0521326996386051177978515625), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375));
float3 _1324 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1295 * _1295)) + (float3(_Globals.MappingPolynomial.y) * _1295)) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
float3 _3103;

View File

@ -68,9 +68,6 @@ struct main0_in
uint in_var_TEXCOORD0 [[user(locn0)]];
};
// Returns buffer coords clamped to storage buffer size
#define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / sizeof(type)) - 1)
fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};

View File

@ -0,0 +1,122 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct type_StructuredBuffer_v4float
{
spvUnsafeArray<float4, 1> _m0;
};
struct type_Globals
{
uint2 ShadowTileListGroupSize;
};
constant float3 _70 = {};
struct main0_out
{
float4 out_var_SV_Target0 [[color(0)]];
};
struct main0_in
{
uint in_var_TEXCOORD0 [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvBufferSizeConstants [[buffer(25)]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
constant uint& CulledObjectBoxBoundsBufferSize = spvBufferSizeConstants[0];
uint2 _77 = uint2(gl_FragCoord.xy);
uint _78 = _77.y;
uint _83 = _77.x;
float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
float2 _93 = float2(_Globals.ShadowTileListGroupSize);
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
float3 _102 = float3(_100.x, _100.y, _70.z);
_102.z = 1.0;
uint _103 = in.in_var_TEXCOORD0 * 5u;
uint _186 = clamp(_103 + 1u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u);
if (all(CulledObjectBoxBounds._m0[_186].xy > _96.xy) && all(CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz < _102))
{
float _122 = _96.x;
float _123 = _96.y;
spvUnsafeArray<float3, 8> _73;
_73[0] = float3(_122, _123, -1000.0);
float _126 = _100.x;
_73[1] = float3(_126, _123, -1000.0);
float _129 = _100.y;
_73[2] = float3(_122, _129, -1000.0);
_73[3] = float3(_126, _129, -1000.0);
_73[4] = float3(_122, _123, 1.0);
_73[5] = float3(_126, _123, 1.0);
_73[6] = float3(_122, _129, 1.0);
_73[7] = float3(_126, _129, 1.0);
float3 _155;
float3 _158;
_155 = float3(-500000.0);
_158 = float3(500000.0);
for (int _160 = 0; _160 < 8; )
{
float3 _166 = _73[int(clamp(uint(_160), uint(0), uint(7)))] - (float3(0.5) * (CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz + CulledObjectBoxBounds._m0[_186].xyz));
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 2u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 3u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 4u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz));
_155 = fast::max(_155, _170);
_158 = fast::min(_158, _170);
_160++;
continue;
}
if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
{
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
}
}
out.out_var_SV_Target0 = float4(0.0);
return out;
}

View File

@ -1,12 +1,53 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct SSBO
{
float4 a[16];
float4 b[16];
spvUnsafeArray<float4, 16> a;
spvUnsafeArray<float4, 16> b;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);

View File

@ -388,7 +388,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
float _1276 = _1258.y;
float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1275, _1276));
float4 _1283 = Texture1.sample(Texture1Sampler, float2(_1275 + 0.0625, _1276));
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0]) * _1256) + (float3(_Globals.LUTWeights[1]) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0].x) * _1256) + (float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow((_1289 * float3(0.94786727428436279296875)) + float3(0.0521326996386051177978515625), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375));
float3 _1324 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1295 * _1295)) + (float3(_Globals.MappingPolynomial.y) * _1295)) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
float3 _3103;

View File

@ -68,9 +68,6 @@ struct main0_in
uint in_var_TEXCOORD0 [[user(locn0)]];
};
// Returns buffer coords clamped to storage buffer size
#define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / sizeof(type)) - 1)
fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};

View File

@ -0,0 +1,121 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct type_StructuredBuffer_v4float
{
spvUnsafeArray<float4, 1> _m0;
};
struct type_Globals
{
uint2 ShadowTileListGroupSize;
};
constant float3 _70 = {};
struct main0_out
{
float4 out_var_SV_Target0 [[color(0)]];
};
struct main0_in
{
uint in_var_TEXCOORD0 [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
uint2 _77 = uint2(gl_FragCoord.xy);
uint _78 = _77.y;
uint _83 = _77.x;
float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
float2 _93 = float2(_Globals.ShadowTileListGroupSize);
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
float3 _102 = float3(_100.x, _100.y, _70.z);
_102.z = 1.0;
uint _103 = in.in_var_TEXCOORD0 * 5u;
uint _107 = _103 + 1u;
if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
{
float _122 = _96.x;
float _123 = _96.y;
spvUnsafeArray<float3, 8> _73;
_73[0] = float3(_122, _123, -1000.0);
float _126 = _100.x;
_73[1] = float3(_126, _123, -1000.0);
float _129 = _100.y;
_73[2] = float3(_122, _129, -1000.0);
_73[3] = float3(_126, _129, -1000.0);
_73[4] = float3(_122, _123, 1.0);
_73[5] = float3(_126, _123, 1.0);
_73[6] = float3(_122, _129, 1.0);
_73[7] = float3(_126, _129, 1.0);
float3 _155;
float3 _158;
_155 = float3(-500000.0);
_158 = float3(500000.0);
for (int _160 = 0; _160 < 8; )
{
float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
_155 = fast::max(_155, _170);
_158 = fast::min(_158, _170);
_160++;
continue;
}
if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
{
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
}
}
out.out_var_SV_Target0 = float4(0.0);
return out;
}

View File

@ -0,0 +1,242 @@
; SPIR-V
; Version: 1.0
; Generator: Google spiregg; 0
; Bound: 180
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpExtension "SPV_GOOGLE_hlsl_functionality1"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0
OpExecutionMode %ShadowObjectCullPS OriginUpperLeft
OpSource HLSL 600
OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
OpName %CulledObjectBoxBounds "CulledObjectBoxBounds"
OpName %type__Globals "type.$Globals"
OpMemberName %type__Globals 0 "ShadowTileListGroupSize"
OpName %_Globals "$Globals"
OpName %type_buffer_image "type.buffer.image"
OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects"
OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
OpName %out_var_SV_Target0 "out.var.SV_Target0"
OpName %ShadowObjectCullPS "ShadowObjectCullPS"
OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
OpDecorate %in_var_TEXCOORD0 Flat
OpDecorate %gl_FragCoord BuiltIn FragCoord
OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
OpDecorate %in_var_TEXCOORD0 Location 0
OpDecorate %out_var_SV_Target0 Location 0
OpDecorate %CulledObjectBoxBounds DescriptorSet 0
OpDecorate %CulledObjectBoxBounds Binding 1
OpDecorate %_Globals DescriptorSet 0
OpDecorate %_Globals Binding 2
OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0
OpDecorate %RWShadowTileNumCulledObjects Binding 0
OpDecorate %_runtimearr_v4float ArrayStride 16
OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
OpDecorate %type_StructuredBuffer_v4float BufferBlock
OpMemberDecorate %type__Globals 0 Offset 0
OpDecorate %type__Globals Block
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%v3float = OpTypeVector %float 3
%v2float = OpTypeVector %float 2
%int = OpTypeInt 32 1
%uint = OpTypeInt 32 0
%uint_2 = OpConstant %uint 2
%uint_4 = OpConstant %uint 4
%float_0 = OpConstant %float 0
%22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
%int_1 = OpConstant %int 1
%int_0 = OpConstant %int 0
%uint_1 = OpConstant %uint 1
%float_2 = OpConstant %float 2
%27 = OpConstantComposite %v2float %float_2 %float_2
%float_1 = OpConstant %float 1
%29 = OpConstantComposite %v2float %float_1 %float_1
%float_n1000 = OpConstant %float -1000
%int_2 = OpConstant %int 2
%float_0_5 = OpConstant %float 0.5
%33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
%float_500000 = OpConstant %float 500000
%35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000
%float_n500000 = OpConstant %float -500000
%37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000
%int_3 = OpConstant %int 3
%int_4 = OpConstant %int 4
%int_5 = OpConstant %int 5
%int_6 = OpConstant %int 6
%int_7 = OpConstant %int 7
%int_8 = OpConstant %int 8
%44 = OpConstantComposite %v3float %float_1 %float_1 %float_1
%float_n1 = OpConstant %float -1
%46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
%uint_5 = OpConstant %uint 5
%uint_0 = OpConstant %uint 0
%uint_3 = OpConstant %uint 3
%_runtimearr_v4float = OpTypeRuntimeArray %v4float
%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
%v2uint = OpTypeVector %uint 2
%type__Globals = OpTypeStruct %v2uint
%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui
%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
%_ptr_Input_uint = OpTypePointer Input %uint
%_ptr_Input_v4float = OpTypePointer Input %v4float
%_ptr_Output_v4float = OpTypePointer Output %v4float
%void = OpTypeVoid
%58 = OpTypeFunction %void
%_ptr_Function_v3float = OpTypePointer Function %v3float
%uint_8 = OpConstant %uint 8
%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8
%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8
%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%bool = OpTypeBool
%v2bool = OpTypeVector %bool 2
%v3bool = OpTypeVector %bool 3
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
%_ptr_Image_uint = OpTypePointer Image %uint
%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
%_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input
%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
%70 = OpUndef %v3float
%71 = OpConstantNull %v3float
%ShadowObjectCullPS = OpFunction %void None %58
%72 = OpLabel
%73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function
%74 = OpLoad %uint %in_var_TEXCOORD0
%75 = OpLoad %v4float %gl_FragCoord
%76 = OpVectorShuffle %v2float %75 %75 0 1
%77 = OpConvertFToU %v2uint %76
%78 = OpCompositeExtract %uint %77 1
%79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0
%80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0
%81 = OpLoad %uint %80
%82 = OpIMul %uint %78 %81
%83 = OpCompositeExtract %uint %77 0
%84 = OpIAdd %uint %82 %83
%85 = OpConvertUToF %float %83
%86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1
%87 = OpLoad %uint %86
%88 = OpISub %uint %87 %uint_1
%89 = OpISub %uint %88 %78
%90 = OpConvertUToF %float %89
%91 = OpCompositeConstruct %v2float %85 %90
%92 = OpLoad %v2uint %79
%93 = OpConvertUToF %v2float %92
%94 = OpFDiv %v2float %91 %93
%95 = OpFMul %v2float %94 %27
%96 = OpFSub %v2float %95 %29
%97 = OpFAdd %v2float %91 %29
%98 = OpFDiv %v2float %97 %93
%99 = OpFMul %v2float %98 %27
%100 = OpFSub %v2float %99 %29
%101 = OpVectorShuffle %v3float %70 %100 3 4 2
%102 = OpCompositeInsert %v3float %float_1 %101 2
%103 = OpIMul %uint %74 %uint_5
%104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103
%105 = OpLoad %v4float %104
%106 = OpVectorShuffle %v3float %105 %105 0 1 2
%107 = OpIAdd %uint %103 %uint_1
%108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107
%109 = OpLoad %v4float %108
%110 = OpVectorShuffle %v3float %109 %109 0 1 2
%111 = OpVectorShuffle %v2float %109 %71 0 1
%112 = OpVectorShuffle %v2float %96 %71 0 1
%113 = OpFOrdGreaterThan %v2bool %111 %112
%114 = OpAll %bool %113
%115 = OpFOrdLessThan %v3bool %106 %102
%116 = OpAll %bool %115
%117 = OpLogicalAnd %bool %114 %116
OpSelectionMerge %118 DontFlatten
OpBranchConditional %117 %119 %118
%119 = OpLabel
%120 = OpFAdd %v3float %106 %110
%121 = OpFMul %v3float %33 %120
%122 = OpCompositeExtract %float %96 0
%123 = OpCompositeExtract %float %96 1
%124 = OpCompositeConstruct %v3float %122 %123 %float_n1000
%125 = OpAccessChain %_ptr_Function_v3float %73 %int_0
OpStore %125 %124
%126 = OpCompositeExtract %float %100 0
%127 = OpCompositeConstruct %v3float %126 %123 %float_n1000
%128 = OpAccessChain %_ptr_Function_v3float %73 %int_1
OpStore %128 %127
%129 = OpCompositeExtract %float %100 1
%130 = OpCompositeConstruct %v3float %122 %129 %float_n1000
%131 = OpAccessChain %_ptr_Function_v3float %73 %int_2
OpStore %131 %130
%132 = OpCompositeConstruct %v3float %126 %129 %float_n1000
%133 = OpAccessChain %_ptr_Function_v3float %73 %int_3
OpStore %133 %132
%134 = OpCompositeConstruct %v3float %122 %123 %float_1
%135 = OpAccessChain %_ptr_Function_v3float %73 %int_4
OpStore %135 %134
%136 = OpCompositeConstruct %v3float %126 %123 %float_1
%137 = OpAccessChain %_ptr_Function_v3float %73 %int_5
OpStore %137 %136
%138 = OpCompositeConstruct %v3float %122 %129 %float_1
%139 = OpAccessChain %_ptr_Function_v3float %73 %int_6
OpStore %139 %138
%140 = OpCompositeConstruct %v3float %126 %129 %float_1
%141 = OpAccessChain %_ptr_Function_v3float %73 %int_7
OpStore %141 %140
%142 = OpIAdd %uint %103 %uint_2
%143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142
%144 = OpLoad %v4float %143
%145 = OpVectorShuffle %v3float %144 %144 0 1 2
%146 = OpIAdd %uint %103 %uint_3
%147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146
%148 = OpLoad %v4float %147
%149 = OpVectorShuffle %v3float %148 %148 0 1 2
%150 = OpIAdd %uint %103 %uint_4
%151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150
%152 = OpLoad %v4float %151
%153 = OpVectorShuffle %v3float %152 %152 0 1 2
OpBranch %154
%154 = OpLabel
%155 = OpPhi %v3float %37 %119 %156 %157
%158 = OpPhi %v3float %35 %119 %159 %157
%160 = OpPhi %int %int_0 %119 %161 %157
%162 = OpSLessThan %bool %160 %int_8
OpLoopMerge %163 %157 Unroll
OpBranchConditional %162 %157 %163
%157 = OpLabel
%164 = OpAccessChain %_ptr_Function_v3float %73 %160
%165 = OpLoad %v3float %164
%166 = OpFSub %v3float %165 %121
%167 = OpDot %float %166 %145
%168 = OpDot %float %166 %149
%169 = OpDot %float %166 %153
%170 = OpCompositeConstruct %v3float %167 %168 %169
%159 = OpExtInst %v3float %1 FMin %158 %170
%156 = OpExtInst %v3float %1 FMax %155 %170
%161 = OpIAdd %int %160 %int_1
OpBranch %154
%163 = OpLabel
%171 = OpFOrdLessThan %v3bool %158 %44
%172 = OpAll %bool %171
%173 = OpFOrdGreaterThan %v3bool %155 %46
%174 = OpAll %bool %173
%175 = OpLogicalAnd %bool %172 %174
OpSelectionMerge %176 DontFlatten
OpBranchConditional %175 %177 %176
%177 = OpLabel
%178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0
%179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1
OpBranch %176
%176 = OpLabel
OpBranch %118
%118 = OpLabel
OpStore %out_var_SV_Target0 %22
OpReturn
OpFunctionEnd

View File

@ -3571,16 +3571,6 @@ void CompilerMSL::emit_custom_functions()
break;
}
// Storage buffer robustness
case SPVFuncImplStorageBufferCoords:
{
statement("// Returns buffer coords clamped to storage buffer size");
statement("#define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / "
"sizeof(type)) - 1)");
statement("");
break;
}
// "fadd" intrinsic support
case SPVFuncImplFAdd:
statement("template<typename T>");
@ -5596,27 +5586,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")");
}
// Storage buffer robustness
if (msl_options.enforce_storge_buffer_bounds)
{
const auto *var_type = var ? maybe_get<SPIRType>(var->basetype) : nullptr;
uint32_t var_index = get_metal_resource_index(*var, var_type->basetype);
const auto &innertype =
var_type->basetype == SPIRType::Image ? get<SPIRType>(var_type->image.type) : *var_type;
uint32_t desc_set = get_decoration(ops[2], DecorationDescriptorSet);
if (descriptor_set_is_argument_buffer(desc_set))
{
coord = join("spvStorageBufferCoords(", convert_to_string(var_index), ", ",
to_name(argument_buffer_ids[desc_set]), ".spvBufferSizeConstants, ",
type_to_glsl(innertype), ", ", coord, ")");
}
else
{
coord = join("spvStorageBufferCoords(", convert_to_string(var_index), ", spvBufferSizeConstants, ",
type_to_glsl(innertype), ", ", coord, ")");
}
}
auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true);
e.loaded_from = var ? var->self : ID(0);
}
@ -11790,11 +11759,6 @@ std::string CompilerMSL::access_chain_internal(uint32_t base, const uint32_t *in
const auto *type = &get_pointee_type(type_id);
auto *var = maybe_get<SPIRVariable>(base);
const auto *var_type = var ? maybe_get<SPIRType>(var->basetype) : nullptr;
bool ssbo = msl_options.enforce_storge_buffer_bounds && var && var_type &&
(var->storage == StorageClassStorageBuffer ||
(var_type->basetype == SPIRType::Struct && var->storage == StorageClassUniform &&
has_decoration(var_type->self, DecorationBufferBlock)));
bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
@ -11840,27 +11804,6 @@ std::string CompilerMSL::access_chain_internal(uint32_t base, const uint32_t *in
expr += "[";
if (ssbo)
{
expr += "spvStorageBufferCoords(";
uint32_t var_index = get_metal_resource_index(*var, var_type->basetype);
expr += convert_to_string(var_index);
expr += ", ";
uint32_t desc_set = get_decoration(base, DecorationDescriptorSet);
if (descriptor_set_is_argument_buffer(desc_set))
{
expr += to_name(argument_buffer_ids[desc_set]);
expr += ".";
}
expr += "spvBufferSizeConstants";
expr += ", ";
const SPIRType &innertype = this->get<SPIRType>(type->parent_type);
expr += type_to_glsl(innertype);
expr += ", ";
}
// If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
bool nonuniform_index =
has_decoration(index, DecorationNonUniformEXT) &&
@ -11879,12 +11822,6 @@ std::string CompilerMSL::access_chain_internal(uint32_t base, const uint32_t *in
if (nonuniform_index)
expr += ")";
if (ssbo)
{
expr += ")";
ssbo = false;
}
expr += "]";
if (tess_eval_input_array)
{
@ -12179,29 +12116,6 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
{
switch (opcode)
{
// Storage buffer robustness
case OpInBoundsAccessChain:
case OpAccessChain:
case OpPtrAccessChain:
{
if (compiler.msl_options.enforce_storge_buffer_bounds)
{
auto *var = compiler.maybe_get<SPIRVariable>(args[2]);
if (var)
{
auto &type = compiler.get<SPIRType>(var->basetype);
bool ssbo = compiler.has_decoration(type.self, DecorationBufferBlock);
if ((var->storage == StorageClassStorageBuffer ||
(type.basetype == SPIRType::Struct && var->storage == StorageClassUniform && ssbo)))
{
compiler.buffers_requiring_array_length.insert(var->self);
return SPVFuncImplStorageBufferCoords;
}
}
}
break;
}
case OpFMod:
return SPVFuncImplMod;
@ -12259,17 +12173,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
if (it != image_pointers.end())
{
uint32_t tid = it->second->basetype;
// Storage buffer robustness
if (compiler.msl_options.enforce_storge_buffer_bounds)
{
compiler.buffers_requiring_array_length.insert(args[opcode == OpAtomicStore ? 0 : 2]);
}
if (tid && compiler.get<SPIRType>(tid).image.dim == Dim2D)
return SPVFuncImplImage2DAtomicCoords;
return SPVFuncImplStorageBufferCoords;
}
break;
}

View File

@ -293,9 +293,6 @@ public:
// Use Metal's native frame-buffer fetch API for subpass inputs.
bool ios_use_framebuffer_fetch_subpasses = false;
// Storage buffer robustness - clamps access to SSBOs to the size of the buffer
bool enforce_storge_buffer_bounds = false;
// Enables use of "fma" intrinsic for invariant float math
bool invariant_float_math = false;
@ -506,7 +503,6 @@ protected:
SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6,
SPVFuncImplTexelBufferCoords,
SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
SPVFuncImplStorageBufferCoords, // Storage buffer robustness
SPVFuncImplFMul,
SPVFuncImplFAdd,
SPVFuncImplCubemapTo2DArrayFace,

View File

@ -174,7 +174,10 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
subprocess.check_call([paths.glslang, '--amb' ,'--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
if opt:
subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
if '.graphics-robust-access.' in shader:
subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '--graphics-robust-access', '-o', spirv_path, spirv_path])
else:
subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
spirv_cross_path = paths.spirv_cross