MSL: Support atomic access to images from argument buffers.
This was not added when Epic contributed atomic image support. Fixes #1484.
This commit is contained in:
parent
401af49326
commit
9cafea6cf8
@ -0,0 +1,34 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 outdata;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
struct spvDescriptorSetBuffer0
|
||||
{
|
||||
texture2d<uint> uImage [[id(0)]];
|
||||
device atomic_uint* uImage_atomic [[id(1)]];
|
||||
device SSBO* m_31 [[id(2)]];
|
||||
texture2d<float> uTexture [[id(3)]];
|
||||
sampler uTextureSmplr [[id(4)]];
|
||||
};
|
||||
|
||||
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
|
||||
#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y)
|
||||
|
||||
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed);
|
||||
(*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(_26));
|
||||
}
|
||||
|
@ -0,0 +1,84 @@
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct type_StructuredBuffer_v4float
|
||||
{
|
||||
float4 _m0[1];
|
||||
};
|
||||
|
||||
struct type_Globals
|
||||
{
|
||||
uint2 ShadowTileListGroupSize;
|
||||
};
|
||||
|
||||
struct spvDescriptorSetBuffer0
|
||||
{
|
||||
const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]];
|
||||
constant type_Globals* _Globals [[id(1)]];
|
||||
texture2d<uint> RWShadowTileNumCulledObjects [[id(2)]];
|
||||
device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]];
|
||||
};
|
||||
|
||||
constant float3 _70 = {};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 out_var_SV_Target0 [[color(0)]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
uint in_var_TEXCOORD0 [[user(locn0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
uint2 _77 = uint2(gl_FragCoord.xy);
|
||||
uint _78 = _77.y;
|
||||
uint _83 = _77.x;
|
||||
float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78));
|
||||
float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize);
|
||||
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
|
||||
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
|
||||
float3 _102 = float3(_100.x, _100.y, _70.z);
|
||||
_102.z = 1.0;
|
||||
uint _103 = in.in_var_TEXCOORD0 * 5u;
|
||||
uint _107 = _103 + 1u;
|
||||
if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _102))
|
||||
{
|
||||
float3 _121 = float3(0.5) * ((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz);
|
||||
float _122 = _96.x;
|
||||
float _123 = _96.y;
|
||||
float _126 = _100.x;
|
||||
float _129 = _100.y;
|
||||
float3 _166 = float3(_122, _123, -1000.0) - _121;
|
||||
float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
float3 _189 = float3(_126, _123, -1000.0) - _121;
|
||||
float3 _193 = float3(dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
float3 _205 = float3(_122, _129, -1000.0) - _121;
|
||||
float3 _209 = float3(dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
float3 _221 = float3(_126, _129, -1000.0) - _121;
|
||||
float3 _225 = float3(dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
float3 _237 = float3(_122, _123, 1.0) - _121;
|
||||
float3 _241 = float3(dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
float3 _253 = float3(_126, _123, 1.0) - _121;
|
||||
float3 _257 = float3(dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
float3 _269 = float3(_122, _129, 1.0) - _121;
|
||||
float3 _273 = float3(dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
float3 _285 = float3(_126, _129, 1.0) - _121;
|
||||
float3 _289 = float3(dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0)))
|
||||
{
|
||||
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
out.out_var_SV_Target0 = float4(0.0);
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,35 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct SSBO
|
||||
{
|
||||
float4 outdata;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
struct spvDescriptorSetBuffer0
|
||||
{
|
||||
texture2d<uint> uImage [[id(0)]];
|
||||
device atomic_uint* uImage_atomic [[id(1)]];
|
||||
device SSBO* m_31 [[id(2)]];
|
||||
texture2d<float> uTexture [[id(3)]];
|
||||
sampler uTextureSmplr [[id(4)]];
|
||||
};
|
||||
|
||||
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
|
||||
#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y)
|
||||
|
||||
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed);
|
||||
uint ret = _26;
|
||||
(*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(ret));
|
||||
}
|
||||
|
@ -0,0 +1,130 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct type_StructuredBuffer_v4float
|
||||
{
|
||||
float4 _m0[1];
|
||||
};
|
||||
|
||||
struct type_Globals
|
||||
{
|
||||
uint2 ShadowTileListGroupSize;
|
||||
};
|
||||
|
||||
struct spvDescriptorSetBuffer0
|
||||
{
|
||||
const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]];
|
||||
constant type_Globals* _Globals [[id(1)]];
|
||||
texture2d<uint> RWShadowTileNumCulledObjects [[id(2)]];
|
||||
device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]];
|
||||
};
|
||||
|
||||
constant float3 _70 = {};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 out_var_SV_Target0 [[color(0)]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
uint in_var_TEXCOORD0 [[user(locn0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]])
|
||||
{
|
||||
main0_out out = {};
|
||||
uint2 _77 = uint2(gl_FragCoord.xy);
|
||||
uint _78 = _77.y;
|
||||
uint _83 = _77.x;
|
||||
float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78));
|
||||
float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize);
|
||||
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
|
||||
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
|
||||
float3 _102 = float3(_100.x, _100.y, _70.z);
|
||||
_102.z = 1.0;
|
||||
uint _103 = in.in_var_TEXCOORD0 * 5u;
|
||||
uint _107 = _103 + 1u;
|
||||
if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _102))
|
||||
{
|
||||
float3 _121 = float3(0.5) * ((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz);
|
||||
float _122 = _96.x;
|
||||
float _123 = _96.y;
|
||||
spvUnsafeArray<float3, 8> _73;
|
||||
_73[0] = float3(_122, _123, -1000.0);
|
||||
float _126 = _100.x;
|
||||
_73[1] = float3(_126, _123, -1000.0);
|
||||
float _129 = _100.y;
|
||||
_73[2] = float3(_122, _129, -1000.0);
|
||||
_73[3] = float3(_126, _129, -1000.0);
|
||||
_73[4] = float3(_122, _123, 1.0);
|
||||
_73[5] = float3(_126, _123, 1.0);
|
||||
_73[6] = float3(_122, _129, 1.0);
|
||||
_73[7] = float3(_126, _129, 1.0);
|
||||
float3 _155;
|
||||
float3 _158;
|
||||
_155 = float3(-500000.0);
|
||||
_158 = float3(500000.0);
|
||||
for (int _160 = 0; _160 < 8; )
|
||||
{
|
||||
float3 _166 = _73[_160] - _121;
|
||||
float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
|
||||
_155 = fast::max(_155, _170);
|
||||
_158 = fast::min(_158, _170);
|
||||
_160++;
|
||||
continue;
|
||||
}
|
||||
if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
|
||||
{
|
||||
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
out.out_var_SV_Target0 = float4(0.0);
|
||||
return out;
|
||||
}
|
||||
|
@ -0,0 +1,16 @@
|
||||
#version 450
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage;
|
||||
layout(set = 0, binding = 1) uniform sampler2D uTexture;
|
||||
|
||||
layout(set = 0, binding = 2) buffer SSBO
|
||||
{
|
||||
vec4 outdata;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ret = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), 10u);
|
||||
outdata = textureLod(uTexture, vec2(gl_GlobalInvocationID.xy), 0.0) + float(ret);
|
||||
}
|
242
shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
Normal file
242
shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
Normal file
@ -0,0 +1,242 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Google spiregg; 0
|
||||
; Bound: 180
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability SampledBuffer
|
||||
OpCapability ImageBuffer
|
||||
OpExtension "SPV_GOOGLE_hlsl_functionality1"
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0
|
||||
OpExecutionMode %ShadowObjectCullPS OriginUpperLeft
|
||||
OpSource HLSL 600
|
||||
OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
|
||||
OpName %CulledObjectBoxBounds "CulledObjectBoxBounds"
|
||||
OpName %type__Globals "type.$Globals"
|
||||
OpMemberName %type__Globals 0 "ShadowTileListGroupSize"
|
||||
OpName %_Globals "$Globals"
|
||||
OpName %type_buffer_image "type.buffer.image"
|
||||
OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects"
|
||||
OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
|
||||
OpName %out_var_SV_Target0 "out.var.SV_Target0"
|
||||
OpName %ShadowObjectCullPS "ShadowObjectCullPS"
|
||||
OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
|
||||
OpDecorate %in_var_TEXCOORD0 Flat
|
||||
OpDecorate %gl_FragCoord BuiltIn FragCoord
|
||||
OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
|
||||
OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
|
||||
OpDecorate %in_var_TEXCOORD0 Location 0
|
||||
OpDecorate %out_var_SV_Target0 Location 0
|
||||
OpDecorate %CulledObjectBoxBounds DescriptorSet 0
|
||||
OpDecorate %CulledObjectBoxBounds Binding 1
|
||||
OpDecorate %_Globals DescriptorSet 0
|
||||
OpDecorate %_Globals Binding 2
|
||||
OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0
|
||||
OpDecorate %RWShadowTileNumCulledObjects Binding 0
|
||||
OpDecorate %_runtimearr_v4float ArrayStride 16
|
||||
OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
|
||||
OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
|
||||
OpDecorate %type_StructuredBuffer_v4float BufferBlock
|
||||
OpMemberDecorate %type__Globals 0 Offset 0
|
||||
OpDecorate %type__Globals Block
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%v3float = OpTypeVector %float 3
|
||||
%v2float = OpTypeVector %float 2
|
||||
%int = OpTypeInt 32 1
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%float_0 = OpConstant %float 0
|
||||
%22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
|
||||
%int_1 = OpConstant %int 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%float_2 = OpConstant %float 2
|
||||
%27 = OpConstantComposite %v2float %float_2 %float_2
|
||||
%float_1 = OpConstant %float 1
|
||||
%29 = OpConstantComposite %v2float %float_1 %float_1
|
||||
%float_n1000 = OpConstant %float -1000
|
||||
%int_2 = OpConstant %int 2
|
||||
%float_0_5 = OpConstant %float 0.5
|
||||
%33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
|
||||
%float_500000 = OpConstant %float 500000
|
||||
%35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000
|
||||
%float_n500000 = OpConstant %float -500000
|
||||
%37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000
|
||||
%int_3 = OpConstant %int 3
|
||||
%int_4 = OpConstant %int 4
|
||||
%int_5 = OpConstant %int 5
|
||||
%int_6 = OpConstant %int 6
|
||||
%int_7 = OpConstant %int 7
|
||||
%int_8 = OpConstant %int 8
|
||||
%44 = OpConstantComposite %v3float %float_1 %float_1 %float_1
|
||||
%float_n1 = OpConstant %float -1
|
||||
%46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
|
||||
%uint_5 = OpConstant %uint 5
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%uint_3 = OpConstant %uint 3
|
||||
%_runtimearr_v4float = OpTypeRuntimeArray %v4float
|
||||
%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
|
||||
%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
|
||||
%v2uint = OpTypeVector %uint 2
|
||||
%type__Globals = OpTypeStruct %v2uint
|
||||
%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
|
||||
%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui
|
||||
%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
|
||||
%_ptr_Input_uint = OpTypePointer Input %uint
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%void = OpTypeVoid
|
||||
%58 = OpTypeFunction %void
|
||||
%_ptr_Function_v3float = OpTypePointer Function %v3float
|
||||
%uint_8 = OpConstant %uint 8
|
||||
%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8
|
||||
%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8
|
||||
%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
|
||||
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
|
||||
%bool = OpTypeBool
|
||||
%v2bool = OpTypeVector %bool 2
|
||||
%v3bool = OpTypeVector %bool 3
|
||||
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
|
||||
%_ptr_Image_uint = OpTypePointer Image %uint
|
||||
%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
|
||||
%_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
|
||||
%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
|
||||
%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input
|
||||
%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
|
||||
%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
|
||||
%70 = OpUndef %v3float
|
||||
%71 = OpConstantNull %v3float
|
||||
%ShadowObjectCullPS = OpFunction %void None %58
|
||||
%72 = OpLabel
|
||||
%73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function
|
||||
%74 = OpLoad %uint %in_var_TEXCOORD0
|
||||
%75 = OpLoad %v4float %gl_FragCoord
|
||||
%76 = OpVectorShuffle %v2float %75 %75 0 1
|
||||
%77 = OpConvertFToU %v2uint %76
|
||||
%78 = OpCompositeExtract %uint %77 1
|
||||
%79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0
|
||||
%80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0
|
||||
%81 = OpLoad %uint %80
|
||||
%82 = OpIMul %uint %78 %81
|
||||
%83 = OpCompositeExtract %uint %77 0
|
||||
%84 = OpIAdd %uint %82 %83
|
||||
%85 = OpConvertUToF %float %83
|
||||
%86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1
|
||||
%87 = OpLoad %uint %86
|
||||
%88 = OpISub %uint %87 %uint_1
|
||||
%89 = OpISub %uint %88 %78
|
||||
%90 = OpConvertUToF %float %89
|
||||
%91 = OpCompositeConstruct %v2float %85 %90
|
||||
%92 = OpLoad %v2uint %79
|
||||
%93 = OpConvertUToF %v2float %92
|
||||
%94 = OpFDiv %v2float %91 %93
|
||||
%95 = OpFMul %v2float %94 %27
|
||||
%96 = OpFSub %v2float %95 %29
|
||||
%97 = OpFAdd %v2float %91 %29
|
||||
%98 = OpFDiv %v2float %97 %93
|
||||
%99 = OpFMul %v2float %98 %27
|
||||
%100 = OpFSub %v2float %99 %29
|
||||
%101 = OpVectorShuffle %v3float %70 %100 3 4 2
|
||||
%102 = OpCompositeInsert %v3float %float_1 %101 2
|
||||
%103 = OpIMul %uint %74 %uint_5
|
||||
%104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103
|
||||
%105 = OpLoad %v4float %104
|
||||
%106 = OpVectorShuffle %v3float %105 %105 0 1 2
|
||||
%107 = OpIAdd %uint %103 %uint_1
|
||||
%108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107
|
||||
%109 = OpLoad %v4float %108
|
||||
%110 = OpVectorShuffle %v3float %109 %109 0 1 2
|
||||
%111 = OpVectorShuffle %v2float %109 %71 0 1
|
||||
%112 = OpVectorShuffle %v2float %96 %71 0 1
|
||||
%113 = OpFOrdGreaterThan %v2bool %111 %112
|
||||
%114 = OpAll %bool %113
|
||||
%115 = OpFOrdLessThan %v3bool %106 %102
|
||||
%116 = OpAll %bool %115
|
||||
%117 = OpLogicalAnd %bool %114 %116
|
||||
OpSelectionMerge %118 DontFlatten
|
||||
OpBranchConditional %117 %119 %118
|
||||
%119 = OpLabel
|
||||
%120 = OpFAdd %v3float %106 %110
|
||||
%121 = OpFMul %v3float %33 %120
|
||||
%122 = OpCompositeExtract %float %96 0
|
||||
%123 = OpCompositeExtract %float %96 1
|
||||
%124 = OpCompositeConstruct %v3float %122 %123 %float_n1000
|
||||
%125 = OpAccessChain %_ptr_Function_v3float %73 %int_0
|
||||
OpStore %125 %124
|
||||
%126 = OpCompositeExtract %float %100 0
|
||||
%127 = OpCompositeConstruct %v3float %126 %123 %float_n1000
|
||||
%128 = OpAccessChain %_ptr_Function_v3float %73 %int_1
|
||||
OpStore %128 %127
|
||||
%129 = OpCompositeExtract %float %100 1
|
||||
%130 = OpCompositeConstruct %v3float %122 %129 %float_n1000
|
||||
%131 = OpAccessChain %_ptr_Function_v3float %73 %int_2
|
||||
OpStore %131 %130
|
||||
%132 = OpCompositeConstruct %v3float %126 %129 %float_n1000
|
||||
%133 = OpAccessChain %_ptr_Function_v3float %73 %int_3
|
||||
OpStore %133 %132
|
||||
%134 = OpCompositeConstruct %v3float %122 %123 %float_1
|
||||
%135 = OpAccessChain %_ptr_Function_v3float %73 %int_4
|
||||
OpStore %135 %134
|
||||
%136 = OpCompositeConstruct %v3float %126 %123 %float_1
|
||||
%137 = OpAccessChain %_ptr_Function_v3float %73 %int_5
|
||||
OpStore %137 %136
|
||||
%138 = OpCompositeConstruct %v3float %122 %129 %float_1
|
||||
%139 = OpAccessChain %_ptr_Function_v3float %73 %int_6
|
||||
OpStore %139 %138
|
||||
%140 = OpCompositeConstruct %v3float %126 %129 %float_1
|
||||
%141 = OpAccessChain %_ptr_Function_v3float %73 %int_7
|
||||
OpStore %141 %140
|
||||
%142 = OpIAdd %uint %103 %uint_2
|
||||
%143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142
|
||||
%144 = OpLoad %v4float %143
|
||||
%145 = OpVectorShuffle %v3float %144 %144 0 1 2
|
||||
%146 = OpIAdd %uint %103 %uint_3
|
||||
%147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146
|
||||
%148 = OpLoad %v4float %147
|
||||
%149 = OpVectorShuffle %v3float %148 %148 0 1 2
|
||||
%150 = OpIAdd %uint %103 %uint_4
|
||||
%151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150
|
||||
%152 = OpLoad %v4float %151
|
||||
%153 = OpVectorShuffle %v3float %152 %152 0 1 2
|
||||
OpBranch %154
|
||||
%154 = OpLabel
|
||||
%155 = OpPhi %v3float %37 %119 %156 %157
|
||||
%158 = OpPhi %v3float %35 %119 %159 %157
|
||||
%160 = OpPhi %int %int_0 %119 %161 %157
|
||||
%162 = OpSLessThan %bool %160 %int_8
|
||||
OpLoopMerge %163 %157 Unroll
|
||||
OpBranchConditional %162 %157 %163
|
||||
%157 = OpLabel
|
||||
%164 = OpAccessChain %_ptr_Function_v3float %73 %160
|
||||
%165 = OpLoad %v3float %164
|
||||
%166 = OpFSub %v3float %165 %121
|
||||
%167 = OpDot %float %166 %145
|
||||
%168 = OpDot %float %166 %149
|
||||
%169 = OpDot %float %166 %153
|
||||
%170 = OpCompositeConstruct %v3float %167 %168 %169
|
||||
%159 = OpExtInst %v3float %1 FMin %158 %170
|
||||
%156 = OpExtInst %v3float %1 FMax %155 %170
|
||||
%161 = OpIAdd %int %160 %int_1
|
||||
OpBranch %154
|
||||
%163 = OpLabel
|
||||
%171 = OpFOrdLessThan %v3bool %158 %44
|
||||
%172 = OpAll %bool %171
|
||||
%173 = OpFOrdGreaterThan %v3bool %155 %46
|
||||
%174 = OpAll %bool %173
|
||||
%175 = OpLogicalAnd %bool %172 %174
|
||||
OpSelectionMerge %176 DontFlatten
|
||||
OpBranchConditional %175 %177 %176
|
||||
%177 = OpLabel
|
||||
%178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0
|
||||
%179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1
|
||||
OpBranch %176
|
||||
%176 = OpLabel
|
||||
OpBranch %118
|
||||
%118 = OpLabel
|
||||
OpStore %out_var_SV_Target0 %22
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -13500,6 +13500,14 @@ void CompilerMSL::analyze_argument_buffers()
|
||||
add_resource_name(var_id);
|
||||
resources_in_set[desc_set].push_back(
|
||||
{ &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 });
|
||||
|
||||
// Emulate texture2D atomic operations
|
||||
if (atomic_image_vars.count(var.self))
|
||||
{
|
||||
uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
|
||||
resources_in_set[desc_set].push_back(
|
||||
{ &var, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -13675,6 +13683,30 @@ void CompilerMSL::analyze_argument_buffers()
|
||||
buffer_type.member_types.push_back(get_variable_data_type_id(var));
|
||||
set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
|
||||
}
|
||||
else if (atomic_image_vars.count(var.self))
|
||||
{
|
||||
// Emulate texture2D atomic operations.
|
||||
// Don't set the qualified name: it's already set for this variable,
|
||||
// and the code that references the buffer manually appends "_atomic"
|
||||
// to the name.
|
||||
uint32_t offset = ir.increase_bound_by(2);
|
||||
uint32_t atomic_type_id = offset;
|
||||
uint32_t type_ptr_id = offset + 1;
|
||||
|
||||
SPIRType atomic_type;
|
||||
atomic_type.basetype = SPIRType::AtomicCounter;
|
||||
atomic_type.width = 32;
|
||||
atomic_type.vecsize = 1;
|
||||
set<SPIRType>(atomic_type_id, atomic_type);
|
||||
|
||||
atomic_type.pointer = true;
|
||||
atomic_type.parent_type = atomic_type_id;
|
||||
atomic_type.storage = StorageClassStorageBuffer;
|
||||
auto &atomic_ptr_type = set<SPIRType>(type_ptr_id, atomic_type);
|
||||
atomic_ptr_type.self = atomic_type_id;
|
||||
|
||||
buffer_type.member_types.push_back(type_ptr_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Resources will be declared as pointers not references, so automatically dereference as appropriate.
|
||||
|
Loading…
Reference in New Issue
Block a user