Removed bounds checks in favor of SPIRV-Tools pass '--graphics-robust-access'
This commit is contained in:
parent
2482ff708c
commit
e1b161b54b
@ -387,7 +387,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
|
||||
float _1276 = _1258.y;
|
||||
float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1275, _1276));
|
||||
float4 _1283 = Texture1.sample(Texture1Sampler, float2(_1275 + 0.0625, _1276));
|
||||
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0]) * _1256) + (float3(_Globals.LUTWeights[1]) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
|
||||
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0].x) * _1256) + (float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
|
||||
float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow((_1289 * float3(0.94786727428436279296875)) + float3(0.0521326996386051177978515625), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375));
|
||||
float3 _1324 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1295 * _1295)) + (float3(_Globals.MappingPolynomial.y) * _1295)) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
|
||||
float3 _3103;
|
||||
|
@ -68,9 +68,6 @@ struct main0_in
|
||||
uint in_var_TEXCOORD0 [[user(locn0)]];
|
||||
};
|
||||
|
||||
// Returns buffer coords clamped to storage buffer size
|
||||
#define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / sizeof(type)) - 1)
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
|
@ -0,0 +1,122 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct type_StructuredBuffer_v4float
|
||||
{
|
||||
spvUnsafeArray<float4, 1> _m0;
|
||||
};
|
||||
|
||||
struct type_Globals
|
||||
{
|
||||
uint2 ShadowTileListGroupSize;
|
||||
};
|
||||
|
||||
constant float3 _70 = {};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 out_var_SV_Target0 [[color(0)]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
uint in_var_TEXCOORD0 [[user(locn0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvBufferSizeConstants [[buffer(25)]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
constant uint& CulledObjectBoxBoundsBufferSize = spvBufferSizeConstants[0];
|
||||
uint2 _77 = uint2(gl_FragCoord.xy);
|
||||
uint _78 = _77.y;
|
||||
uint _83 = _77.x;
|
||||
float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
|
||||
float2 _93 = float2(_Globals.ShadowTileListGroupSize);
|
||||
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
|
||||
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
|
||||
float3 _102 = float3(_100.x, _100.y, _70.z);
|
||||
_102.z = 1.0;
|
||||
uint _103 = in.in_var_TEXCOORD0 * 5u;
|
||||
uint _186 = clamp(_103 + 1u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u);
|
||||
if (all(CulledObjectBoxBounds._m0[_186].xy > _96.xy) && all(CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz < _102))
|
||||
{
|
||||
float _122 = _96.x;
|
||||
float _123 = _96.y;
|
||||
spvUnsafeArray<float3, 8> _73;
|
||||
_73[0] = float3(_122, _123, -1000.0);
|
||||
float _126 = _100.x;
|
||||
_73[1] = float3(_126, _123, -1000.0);
|
||||
float _129 = _100.y;
|
||||
_73[2] = float3(_122, _129, -1000.0);
|
||||
_73[3] = float3(_126, _129, -1000.0);
|
||||
_73[4] = float3(_122, _123, 1.0);
|
||||
_73[5] = float3(_126, _123, 1.0);
|
||||
_73[6] = float3(_122, _129, 1.0);
|
||||
_73[7] = float3(_126, _129, 1.0);
|
||||
float3 _155;
|
||||
float3 _158;
|
||||
_155 = float3(-500000.0);
|
||||
_158 = float3(500000.0);
|
||||
for (int _160 = 0; _160 < 8; )
|
||||
{
|
||||
float3 _166 = _73[int(clamp(uint(_160), uint(0), uint(7)))] - (float3(0.5) * (CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz + CulledObjectBoxBounds._m0[_186].xyz));
|
||||
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 2u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 3u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 4u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz));
|
||||
_155 = fast::max(_155, _170);
|
||||
_158 = fast::min(_158, _170);
|
||||
_160++;
|
||||
continue;
|
||||
}
|
||||
if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
|
||||
{
|
||||
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
out.out_var_SV_Target0 = float4(0.0);
|
||||
return out;
|
||||
}
|
||||
|
@ -1,12 +1,53 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 a[16];
|
||||
float4 b[16];
|
||||
spvUnsafeArray<float4, 16> a;
|
||||
spvUnsafeArray<float4, 16> b;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
@ -388,7 +388,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
|
||||
float _1276 = _1258.y;
|
||||
float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1275, _1276));
|
||||
float4 _1283 = Texture1.sample(Texture1Sampler, float2(_1275 + 0.0625, _1276));
|
||||
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0]) * _1256) + (float3(_Globals.LUTWeights[1]) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
|
||||
float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0].x) * _1256) + (float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
|
||||
float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow((_1289 * float3(0.94786727428436279296875)) + float3(0.0521326996386051177978515625), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375));
|
||||
float3 _1324 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1295 * _1295)) + (float3(_Globals.MappingPolynomial.y) * _1295)) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
|
||||
float3 _3103;
|
||||
|
@ -68,9 +68,6 @@ struct main0_in
|
||||
uint in_var_TEXCOORD0 [[user(locn0)]];
|
||||
};
|
||||
|
||||
// Returns buffer coords clamped to storage buffer size
|
||||
#define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / sizeof(type)) - 1)
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
|
@ -0,0 +1,121 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct type_StructuredBuffer_v4float
|
||||
{
|
||||
spvUnsafeArray<float4, 1> _m0;
|
||||
};
|
||||
|
||||
struct type_Globals
|
||||
{
|
||||
uint2 ShadowTileListGroupSize;
|
||||
};
|
||||
|
||||
constant float3 _70 = {};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 out_var_SV_Target0 [[color(0)]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
uint in_var_TEXCOORD0 [[user(locn0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
uint2 _77 = uint2(gl_FragCoord.xy);
|
||||
uint _78 = _77.y;
|
||||
uint _83 = _77.x;
|
||||
float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
|
||||
float2 _93 = float2(_Globals.ShadowTileListGroupSize);
|
||||
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
|
||||
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
|
||||
float3 _102 = float3(_100.x, _100.y, _70.z);
|
||||
_102.z = 1.0;
|
||||
uint _103 = in.in_var_TEXCOORD0 * 5u;
|
||||
uint _107 = _103 + 1u;
|
||||
if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
|
||||
{
|
||||
float _122 = _96.x;
|
||||
float _123 = _96.y;
|
||||
spvUnsafeArray<float3, 8> _73;
|
||||
_73[0] = float3(_122, _123, -1000.0);
|
||||
float _126 = _100.x;
|
||||
_73[1] = float3(_126, _123, -1000.0);
|
||||
float _129 = _100.y;
|
||||
_73[2] = float3(_122, _129, -1000.0);
|
||||
_73[3] = float3(_126, _129, -1000.0);
|
||||
_73[4] = float3(_122, _123, 1.0);
|
||||
_73[5] = float3(_126, _123, 1.0);
|
||||
_73[6] = float3(_122, _129, 1.0);
|
||||
_73[7] = float3(_126, _129, 1.0);
|
||||
float3 _155;
|
||||
float3 _158;
|
||||
_155 = float3(-500000.0);
|
||||
_158 = float3(500000.0);
|
||||
for (int _160 = 0; _160 < 8; )
|
||||
{
|
||||
float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
|
||||
float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
|
||||
_155 = fast::max(_155, _170);
|
||||
_158 = fast::min(_158, _170);
|
||||
_160++;
|
||||
continue;
|
||||
}
|
||||
if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
|
||||
{
|
||||
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
out.out_var_SV_Target0 = float4(0.0);
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,242 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Google spiregg; 0
|
||||
; Bound: 180
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability SampledBuffer
|
||||
OpCapability ImageBuffer
|
||||
OpExtension "SPV_GOOGLE_hlsl_functionality1"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0
|
||||
OpExecutionMode %ShadowObjectCullPS OriginUpperLeft
|
||||
OpSource HLSL 600
|
||||
OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
|
||||
OpName %CulledObjectBoxBounds "CulledObjectBoxBounds"
|
||||
OpName %type__Globals "type.$Globals"
|
||||
OpMemberName %type__Globals 0 "ShadowTileListGroupSize"
|
||||
OpName %_Globals "$Globals"
|
||||
OpName %type_buffer_image "type.buffer.image"
|
||||
OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects"
|
||||
OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
|
||||
OpName %out_var_SV_Target0 "out.var.SV_Target0"
|
||||
OpName %ShadowObjectCullPS "ShadowObjectCullPS"
|
||||
OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
|
||||
OpDecorate %in_var_TEXCOORD0 Flat
|
||||
OpDecorate %gl_FragCoord BuiltIn FragCoord
|
||||
OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
|
||||
OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
|
||||
OpDecorate %in_var_TEXCOORD0 Location 0
|
||||
OpDecorate %out_var_SV_Target0 Location 0
|
||||
OpDecorate %CulledObjectBoxBounds DescriptorSet 0
|
||||
OpDecorate %CulledObjectBoxBounds Binding 1
|
||||
OpDecorate %_Globals DescriptorSet 0
|
||||
OpDecorate %_Globals Binding 2
|
||||
OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0
|
||||
OpDecorate %RWShadowTileNumCulledObjects Binding 0
|
||||
OpDecorate %_runtimearr_v4float ArrayStride 16
|
||||
OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
|
||||
OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
|
||||
OpDecorate %type_StructuredBuffer_v4float BufferBlock
|
||||
OpMemberDecorate %type__Globals 0 Offset 0
|
||||
OpDecorate %type__Globals Block
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%v3float = OpTypeVector %float 3
|
||||
%v2float = OpTypeVector %float 2
|
||||
%int = OpTypeInt 32 1
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%float_0 = OpConstant %float 0
|
||||
%22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
|
||||
%int_1 = OpConstant %int 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%float_2 = OpConstant %float 2
|
||||
%27 = OpConstantComposite %v2float %float_2 %float_2
|
||||
%float_1 = OpConstant %float 1
|
||||
%29 = OpConstantComposite %v2float %float_1 %float_1
|
||||
%float_n1000 = OpConstant %float -1000
|
||||
%int_2 = OpConstant %int 2
|
||||
%float_0_5 = OpConstant %float 0.5
|
||||
%33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
|
||||
%float_500000 = OpConstant %float 500000
|
||||
%35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000
|
||||
%float_n500000 = OpConstant %float -500000
|
||||
%37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000
|
||||
%int_3 = OpConstant %int 3
|
||||
%int_4 = OpConstant %int 4
|
||||
%int_5 = OpConstant %int 5
|
||||
%int_6 = OpConstant %int 6
|
||||
%int_7 = OpConstant %int 7
|
||||
%int_8 = OpConstant %int 8
|
||||
%44 = OpConstantComposite %v3float %float_1 %float_1 %float_1
|
||||
%float_n1 = OpConstant %float -1
|
||||
%46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
|
||||
%uint_5 = OpConstant %uint 5
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_3 = OpConstant %uint 3
|
||||
%_runtimearr_v4float = OpTypeRuntimeArray %v4float
|
||||
%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
|
||||
%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
|
||||
%v2uint = OpTypeVector %uint 2
|
||||
%type__Globals = OpTypeStruct %v2uint
|
||||
%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
|
||||
%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui
|
||||
%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
|
||||
%_ptr_Input_uint = OpTypePointer Input %uint
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%void = OpTypeVoid
|
||||
%58 = OpTypeFunction %void
|
||||
%_ptr_Function_v3float = OpTypePointer Function %v3float
|
||||
%uint_8 = OpConstant %uint 8
|
||||
%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8
|
||||
%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8
|
||||
%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%bool = OpTypeBool
|
||||
%v2bool = OpTypeVector %bool 2
|
||||
%v3bool = OpTypeVector %bool 3
|
||||
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
|
||||
%_ptr_Image_uint = OpTypePointer Image %uint
|
||||
%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
|
||||
%_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
|
||||
%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
|
||||
%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input
|
||||
%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
|
||||
%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
|
||||
%70 = OpUndef %v3float
|
||||
%71 = OpConstantNull %v3float
|
||||
%ShadowObjectCullPS = OpFunction %void None %58
|
||||
%72 = OpLabel
|
||||
%73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function
|
||||
%74 = OpLoad %uint %in_var_TEXCOORD0
|
||||
%75 = OpLoad %v4float %gl_FragCoord
|
||||
%76 = OpVectorShuffle %v2float %75 %75 0 1
|
||||
%77 = OpConvertFToU %v2uint %76
|
||||
%78 = OpCompositeExtract %uint %77 1
|
||||
%79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0
|
||||
%80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0
|
||||
%81 = OpLoad %uint %80
|
||||
%82 = OpIMul %uint %78 %81
|
||||
%83 = OpCompositeExtract %uint %77 0
|
||||
%84 = OpIAdd %uint %82 %83
|
||||
%85 = OpConvertUToF %float %83
|
||||
%86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1
|
||||
%87 = OpLoad %uint %86
|
||||
%88 = OpISub %uint %87 %uint_1
|
||||
%89 = OpISub %uint %88 %78
|
||||
%90 = OpConvertUToF %float %89
|
||||
%91 = OpCompositeConstruct %v2float %85 %90
|
||||
%92 = OpLoad %v2uint %79
|
||||
%93 = OpConvertUToF %v2float %92
|
||||
%94 = OpFDiv %v2float %91 %93
|
||||
%95 = OpFMul %v2float %94 %27
|
||||
%96 = OpFSub %v2float %95 %29
|
||||
%97 = OpFAdd %v2float %91 %29
|
||||
%98 = OpFDiv %v2float %97 %93
|
||||
%99 = OpFMul %v2float %98 %27
|
||||
%100 = OpFSub %v2float %99 %29
|
||||
%101 = OpVectorShuffle %v3float %70 %100 3 4 2
|
||||
%102 = OpCompositeInsert %v3float %float_1 %101 2
|
||||
%103 = OpIMul %uint %74 %uint_5
|
||||
%104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103
|
||||
%105 = OpLoad %v4float %104
|
||||
%106 = OpVectorShuffle %v3float %105 %105 0 1 2
|
||||
%107 = OpIAdd %uint %103 %uint_1
|
||||
%108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107
|
||||
%109 = OpLoad %v4float %108
|
||||
%110 = OpVectorShuffle %v3float %109 %109 0 1 2
|
||||
%111 = OpVectorShuffle %v2float %109 %71 0 1
|
||||
%112 = OpVectorShuffle %v2float %96 %71 0 1
|
||||
%113 = OpFOrdGreaterThan %v2bool %111 %112
|
||||
%114 = OpAll %bool %113
|
||||
%115 = OpFOrdLessThan %v3bool %106 %102
|
||||
%116 = OpAll %bool %115
|
||||
%117 = OpLogicalAnd %bool %114 %116
|
||||
OpSelectionMerge %118 DontFlatten
|
||||
OpBranchConditional %117 %119 %118
|
||||
%119 = OpLabel
|
||||
%120 = OpFAdd %v3float %106 %110
|
||||
%121 = OpFMul %v3float %33 %120
|
||||
%122 = OpCompositeExtract %float %96 0
|
||||
%123 = OpCompositeExtract %float %96 1
|
||||
%124 = OpCompositeConstruct %v3float %122 %123 %float_n1000
|
||||
%125 = OpAccessChain %_ptr_Function_v3float %73 %int_0
|
||||
OpStore %125 %124
|
||||
%126 = OpCompositeExtract %float %100 0
|
||||
%127 = OpCompositeConstruct %v3float %126 %123 %float_n1000
|
||||
%128 = OpAccessChain %_ptr_Function_v3float %73 %int_1
|
||||
OpStore %128 %127
|
||||
%129 = OpCompositeExtract %float %100 1
|
||||
%130 = OpCompositeConstruct %v3float %122 %129 %float_n1000
|
||||
%131 = OpAccessChain %_ptr_Function_v3float %73 %int_2
|
||||
OpStore %131 %130
|
||||
%132 = OpCompositeConstruct %v3float %126 %129 %float_n1000
|
||||
%133 = OpAccessChain %_ptr_Function_v3float %73 %int_3
|
||||
OpStore %133 %132
|
||||
%134 = OpCompositeConstruct %v3float %122 %123 %float_1
|
||||
%135 = OpAccessChain %_ptr_Function_v3float %73 %int_4
|
||||
OpStore %135 %134
|
||||
%136 = OpCompositeConstruct %v3float %126 %123 %float_1
|
||||
%137 = OpAccessChain %_ptr_Function_v3float %73 %int_5
|
||||
OpStore %137 %136
|
||||
%138 = OpCompositeConstruct %v3float %122 %129 %float_1
|
||||
%139 = OpAccessChain %_ptr_Function_v3float %73 %int_6
|
||||
OpStore %139 %138
|
||||
%140 = OpCompositeConstruct %v3float %126 %129 %float_1
|
||||
%141 = OpAccessChain %_ptr_Function_v3float %73 %int_7
|
||||
OpStore %141 %140
|
||||
%142 = OpIAdd %uint %103 %uint_2
|
||||
%143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142
|
||||
%144 = OpLoad %v4float %143
|
||||
%145 = OpVectorShuffle %v3float %144 %144 0 1 2
|
||||
%146 = OpIAdd %uint %103 %uint_3
|
||||
%147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146
|
||||
%148 = OpLoad %v4float %147
|
||||
%149 = OpVectorShuffle %v3float %148 %148 0 1 2
|
||||
%150 = OpIAdd %uint %103 %uint_4
|
||||
%151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150
|
||||
%152 = OpLoad %v4float %151
|
||||
%153 = OpVectorShuffle %v3float %152 %152 0 1 2
|
||||
OpBranch %154
|
||||
%154 = OpLabel
|
||||
%155 = OpPhi %v3float %37 %119 %156 %157
|
||||
%158 = OpPhi %v3float %35 %119 %159 %157
|
||||
%160 = OpPhi %int %int_0 %119 %161 %157
|
||||
%162 = OpSLessThan %bool %160 %int_8
|
||||
OpLoopMerge %163 %157 Unroll
|
||||
OpBranchConditional %162 %157 %163
|
||||
%157 = OpLabel
|
||||
%164 = OpAccessChain %_ptr_Function_v3float %73 %160
|
||||
%165 = OpLoad %v3float %164
|
||||
%166 = OpFSub %v3float %165 %121
|
||||
%167 = OpDot %float %166 %145
|
||||
%168 = OpDot %float %166 %149
|
||||
%169 = OpDot %float %166 %153
|
||||
%170 = OpCompositeConstruct %v3float %167 %168 %169
|
||||
%159 = OpExtInst %v3float %1 FMin %158 %170
|
||||
%156 = OpExtInst %v3float %1 FMax %155 %170
|
||||
%161 = OpIAdd %int %160 %int_1
|
||||
OpBranch %154
|
||||
%163 = OpLabel
|
||||
%171 = OpFOrdLessThan %v3bool %158 %44
|
||||
%172 = OpAll %bool %171
|
||||
%173 = OpFOrdGreaterThan %v3bool %155 %46
|
||||
%174 = OpAll %bool %173
|
||||
%175 = OpLogicalAnd %bool %172 %174
|
||||
OpSelectionMerge %176 DontFlatten
|
||||
OpBranchConditional %175 %177 %176
|
||||
%177 = OpLabel
|
||||
%178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0
|
||||
%179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1
|
||||
OpBranch %176
|
||||
%176 = OpLabel
|
||||
OpBranch %118
|
||||
%118 = OpLabel
|
||||
OpStore %out_var_SV_Target0 %22
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -3571,16 +3571,6 @@ void CompilerMSL::emit_custom_functions()
|
||||
break;
|
||||
}
|
||||
|
||||
// Storage buffer robustness
|
||||
case SPVFuncImplStorageBufferCoords:
|
||||
{
|
||||
statement("// Returns buffer coords clamped to storage buffer size");
|
||||
statement("#define spvStorageBufferCoords(idx, sizes, type, coord) metal::min((coord), (sizes[(idx)*2] / "
|
||||
"sizeof(type)) - 1)");
|
||||
statement("");
|
||||
break;
|
||||
}
|
||||
|
||||
// "fadd" intrinsic support
|
||||
case SPVFuncImplFAdd:
|
||||
statement("template<typename T>");
|
||||
@ -5596,27 +5586,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")");
|
||||
}
|
||||
|
||||
// Storage buffer robustness
|
||||
if (msl_options.enforce_storge_buffer_bounds)
|
||||
{
|
||||
const auto *var_type = var ? maybe_get<SPIRType>(var->basetype) : nullptr;
|
||||
uint32_t var_index = get_metal_resource_index(*var, var_type->basetype);
|
||||
const auto &innertype =
|
||||
var_type->basetype == SPIRType::Image ? get<SPIRType>(var_type->image.type) : *var_type;
|
||||
uint32_t desc_set = get_decoration(ops[2], DecorationDescriptorSet);
|
||||
if (descriptor_set_is_argument_buffer(desc_set))
|
||||
{
|
||||
coord = join("spvStorageBufferCoords(", convert_to_string(var_index), ", ",
|
||||
to_name(argument_buffer_ids[desc_set]), ".spvBufferSizeConstants, ",
|
||||
type_to_glsl(innertype), ", ", coord, ")");
|
||||
}
|
||||
else
|
||||
{
|
||||
coord = join("spvStorageBufferCoords(", convert_to_string(var_index), ", spvBufferSizeConstants, ",
|
||||
type_to_glsl(innertype), ", ", coord, ")");
|
||||
}
|
||||
}
|
||||
|
||||
auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true);
|
||||
e.loaded_from = var ? var->self : ID(0);
|
||||
}
|
||||
@ -11790,11 +11759,6 @@ std::string CompilerMSL::access_chain_internal(uint32_t base, const uint32_t *in
|
||||
const auto *type = &get_pointee_type(type_id);
|
||||
|
||||
auto *var = maybe_get<SPIRVariable>(base);
|
||||
const auto *var_type = var ? maybe_get<SPIRType>(var->basetype) : nullptr;
|
||||
bool ssbo = msl_options.enforce_storge_buffer_bounds && var && var_type &&
|
||||
(var->storage == StorageClassStorageBuffer ||
|
||||
(var_type->basetype == SPIRType::Struct && var->storage == StorageClassUniform &&
|
||||
has_decoration(var_type->self, DecorationBufferBlock)));
|
||||
|
||||
bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
|
||||
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
|
||||
@ -11840,27 +11804,6 @@ std::string CompilerMSL::access_chain_internal(uint32_t base, const uint32_t *in
|
||||
|
||||
expr += "[";
|
||||
|
||||
if (ssbo)
|
||||
{
|
||||
expr += "spvStorageBufferCoords(";
|
||||
uint32_t var_index = get_metal_resource_index(*var, var_type->basetype);
|
||||
expr += convert_to_string(var_index);
|
||||
expr += ", ";
|
||||
|
||||
uint32_t desc_set = get_decoration(base, DecorationDescriptorSet);
|
||||
if (descriptor_set_is_argument_buffer(desc_set))
|
||||
{
|
||||
expr += to_name(argument_buffer_ids[desc_set]);
|
||||
expr += ".";
|
||||
}
|
||||
|
||||
expr += "spvBufferSizeConstants";
|
||||
expr += ", ";
|
||||
const SPIRType &innertype = this->get<SPIRType>(type->parent_type);
|
||||
expr += type_to_glsl(innertype);
|
||||
expr += ", ";
|
||||
}
|
||||
|
||||
// If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
|
||||
bool nonuniform_index =
|
||||
has_decoration(index, DecorationNonUniformEXT) &&
|
||||
@ -11879,12 +11822,6 @@ std::string CompilerMSL::access_chain_internal(uint32_t base, const uint32_t *in
|
||||
if (nonuniform_index)
|
||||
expr += ")";
|
||||
|
||||
if (ssbo)
|
||||
{
|
||||
expr += ")";
|
||||
ssbo = false;
|
||||
}
|
||||
|
||||
expr += "]";
|
||||
if (tess_eval_input_array)
|
||||
{
|
||||
@ -12179,29 +12116,6 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
||||
{
|
||||
switch (opcode)
|
||||
{
|
||||
// Storage buffer robustness
|
||||
case OpInBoundsAccessChain:
|
||||
case OpAccessChain:
|
||||
case OpPtrAccessChain:
|
||||
{
|
||||
if (compiler.msl_options.enforce_storge_buffer_bounds)
|
||||
{
|
||||
auto *var = compiler.maybe_get<SPIRVariable>(args[2]);
|
||||
if (var)
|
||||
{
|
||||
auto &type = compiler.get<SPIRType>(var->basetype);
|
||||
bool ssbo = compiler.has_decoration(type.self, DecorationBufferBlock);
|
||||
if ((var->storage == StorageClassStorageBuffer ||
|
||||
(type.basetype == SPIRType::Struct && var->storage == StorageClassUniform && ssbo)))
|
||||
{
|
||||
compiler.buffers_requiring_array_length.insert(var->self);
|
||||
return SPVFuncImplStorageBufferCoords;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpFMod:
|
||||
return SPVFuncImplMod;
|
||||
|
||||
@ -12259,17 +12173,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
||||
if (it != image_pointers.end())
|
||||
{
|
||||
uint32_t tid = it->second->basetype;
|
||||
|
||||
// Storage buffer robustness
|
||||
if (compiler.msl_options.enforce_storge_buffer_bounds)
|
||||
{
|
||||
compiler.buffers_requiring_array_length.insert(args[opcode == OpAtomicStore ? 0 : 2]);
|
||||
}
|
||||
|
||||
if (tid && compiler.get<SPIRType>(tid).image.dim == Dim2D)
|
||||
return SPVFuncImplImage2DAtomicCoords;
|
||||
|
||||
return SPVFuncImplStorageBufferCoords;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -293,9 +293,6 @@ public:
|
||||
// Use Metal's native frame-buffer fetch API for subpass inputs.
|
||||
bool ios_use_framebuffer_fetch_subpasses = false;
|
||||
|
||||
// Storage buffer robustness - clamps access to SSBOs to the size of the buffer
|
||||
bool enforce_storge_buffer_bounds = false;
|
||||
|
||||
// Enables use of "fma" intrinsic for invariant float math
|
||||
bool invariant_float_math = false;
|
||||
|
||||
@ -506,7 +503,6 @@ protected:
|
||||
SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6,
|
||||
SPVFuncImplTexelBufferCoords,
|
||||
SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
|
||||
SPVFuncImplStorageBufferCoords, // Storage buffer robustness
|
||||
SPVFuncImplFMul,
|
||||
SPVFuncImplFAdd,
|
||||
SPVFuncImplCubemapTo2DArrayFace,
|
||||
|
@ -174,7 +174,10 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
|
||||
subprocess.check_call([paths.glslang, '--amb' ,'--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
|
||||
|
||||
if opt:
|
||||
subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
|
||||
if '.graphics-robust-access.' in shader:
|
||||
subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '--graphics-robust-access', '-o', spirv_path, spirv_path])
|
||||
else:
|
||||
subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
|
||||
|
||||
spirv_cross_path = paths.spirv_cross
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user