MSL: Support atomic access to images from argument buffers.

This was not added when Epic contributed atomic image support.

Fixes #1484.
This commit is contained in:
Chip Davis 2020-10-13 02:33:06 -05:00
parent 401af49326
commit 9cafea6cf8
7 changed files with 573 additions and 0 deletions

View File

@ -0,0 +1,34 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct SSBO
{
float4 outdata;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
struct spvDescriptorSetBuffer0
{
texture2d<uint> uImage [[id(0)]];
device atomic_uint* uImage_atomic [[id(1)]];
device SSBO* m_31 [[id(2)]];
texture2d<float> uTexture [[id(3)]];
sampler uTextureSmplr [[id(4)]];
};
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y)
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed);
(*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(_26));
}

View File

@ -0,0 +1,84 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct type_StructuredBuffer_v4float
{
float4 _m0[1];
};
struct type_Globals
{
uint2 ShadowTileListGroupSize;
};
struct spvDescriptorSetBuffer0
{
const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]];
constant type_Globals* _Globals [[id(1)]];
texture2d<uint> RWShadowTileNumCulledObjects [[id(2)]];
device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]];
};
constant float3 _70 = {};
struct main0_out
{
float4 out_var_SV_Target0 [[color(0)]];
};
struct main0_in
{
uint in_var_TEXCOORD0 [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
uint2 _77 = uint2(gl_FragCoord.xy);
uint _78 = _77.y;
uint _83 = _77.x;
float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78));
float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize);
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
float3 _102 = float3(_100.x, _100.y, _70.z);
_102.z = 1.0;
uint _103 = in.in_var_TEXCOORD0 * 5u;
uint _107 = _103 + 1u;
if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _102))
{
float3 _121 = float3(0.5) * ((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz);
float _122 = _96.x;
float _123 = _96.y;
float _126 = _100.x;
float _129 = _100.y;
float3 _166 = float3(_122, _123, -1000.0) - _121;
float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
float3 _189 = float3(_126, _123, -1000.0) - _121;
float3 _193 = float3(dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
float3 _205 = float3(_122, _129, -1000.0) - _121;
float3 _209 = float3(dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
float3 _221 = float3(_126, _129, -1000.0) - _121;
float3 _225 = float3(dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
float3 _237 = float3(_122, _123, 1.0) - _121;
float3 _241 = float3(dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
float3 _253 = float3(_126, _123, 1.0) - _121;
float3 _257 = float3(dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
float3 _269 = float3(_122, _129, 1.0) - _121;
float3 _273 = float3(dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
float3 _285 = float3(_126, _129, 1.0) - _121;
float3 _289 = float3(dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0)))
{
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
}
}
out.out_var_SV_Target0 = float4(0.0);
return out;
}

View File

@ -0,0 +1,35 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct SSBO
{
float4 outdata;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
struct spvDescriptorSetBuffer0
{
texture2d<uint> uImage [[id(0)]];
device atomic_uint* uImage_atomic [[id(1)]];
device SSBO* m_31 [[id(2)]];
texture2d<float> uTexture [[id(3)]];
sampler uTextureSmplr [[id(4)]];
};
// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y)
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed);
uint ret = _26;
(*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(ret));
}

View File

@ -0,0 +1,130 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct type_StructuredBuffer_v4float
{
float4 _m0[1];
};
struct type_Globals
{
uint2 ShadowTileListGroupSize;
};
struct spvDescriptorSetBuffer0
{
const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]];
constant type_Globals* _Globals [[id(1)]];
texture2d<uint> RWShadowTileNumCulledObjects [[id(2)]];
device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]];
};
constant float3 _70 = {};
struct main0_out
{
float4 out_var_SV_Target0 [[color(0)]];
};
struct main0_in
{
uint in_var_TEXCOORD0 [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]])
{
main0_out out = {};
uint2 _77 = uint2(gl_FragCoord.xy);
uint _78 = _77.y;
uint _83 = _77.x;
float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78));
float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize);
float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
float3 _102 = float3(_100.x, _100.y, _70.z);
_102.z = 1.0;
uint _103 = in.in_var_TEXCOORD0 * 5u;
uint _107 = _103 + 1u;
if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _102))
{
float3 _121 = float3(0.5) * ((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz);
float _122 = _96.x;
float _123 = _96.y;
spvUnsafeArray<float3, 8> _73;
_73[0] = float3(_122, _123, -1000.0);
float _126 = _100.x;
_73[1] = float3(_126, _123, -1000.0);
float _129 = _100.y;
_73[2] = float3(_122, _129, -1000.0);
_73[3] = float3(_126, _129, -1000.0);
_73[4] = float3(_122, _123, 1.0);
_73[5] = float3(_126, _123, 1.0);
_73[6] = float3(_122, _129, 1.0);
_73[7] = float3(_126, _129, 1.0);
float3 _155;
float3 _158;
_155 = float3(-500000.0);
_158 = float3(500000.0);
for (int _160 = 0; _160 < 8; )
{
float3 _166 = _73[_160] - _121;
float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
_155 = fast::max(_155, _170);
_158 = fast::min(_158, _170);
_160++;
continue;
}
if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
{
uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
}
}
out.out_var_SV_Target0 = float4(0.0);
return out;
}

View File

@ -0,0 +1,16 @@
#version 450
layout(local_size_x = 1) in;
layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage;
layout(set = 0, binding = 1) uniform sampler2D uTexture;
layout(set = 0, binding = 2) buffer SSBO
{
vec4 outdata;
};
void main()
{
uint ret = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), 10u);
outdata = textureLod(uTexture, vec2(gl_GlobalInvocationID.xy), 0.0) + float(ret);
}

View File

@ -0,0 +1,242 @@
; SPIR-V
; Version: 1.0
; Generator: Google spiregg; 0
; Bound: 180
; Schema: 0
OpCapability Shader
OpCapability SampledBuffer
OpCapability ImageBuffer
OpExtension "SPV_GOOGLE_hlsl_functionality1"
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0
OpExecutionMode %ShadowObjectCullPS OriginUpperLeft
OpSource HLSL 600
OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
OpName %CulledObjectBoxBounds "CulledObjectBoxBounds"
OpName %type__Globals "type.$Globals"
OpMemberName %type__Globals 0 "ShadowTileListGroupSize"
OpName %_Globals "$Globals"
OpName %type_buffer_image "type.buffer.image"
OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects"
OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
OpName %out_var_SV_Target0 "out.var.SV_Target0"
OpName %ShadowObjectCullPS "ShadowObjectCullPS"
OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
OpDecorate %in_var_TEXCOORD0 Flat
OpDecorate %gl_FragCoord BuiltIn FragCoord
OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
OpDecorate %in_var_TEXCOORD0 Location 0
OpDecorate %out_var_SV_Target0 Location 0
OpDecorate %CulledObjectBoxBounds DescriptorSet 0
OpDecorate %CulledObjectBoxBounds Binding 1
OpDecorate %_Globals DescriptorSet 0
OpDecorate %_Globals Binding 2
OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0
OpDecorate %RWShadowTileNumCulledObjects Binding 0
OpDecorate %_runtimearr_v4float ArrayStride 16
OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
OpDecorate %type_StructuredBuffer_v4float BufferBlock
OpMemberDecorate %type__Globals 0 Offset 0
OpDecorate %type__Globals Block
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%v3float = OpTypeVector %float 3
%v2float = OpTypeVector %float 2
%int = OpTypeInt 32 1
%uint = OpTypeInt 32 0
%uint_2 = OpConstant %uint 2
%uint_4 = OpConstant %uint 4
%float_0 = OpConstant %float 0
%22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
%int_1 = OpConstant %int 1
%int_0 = OpConstant %int 0
%uint_1 = OpConstant %uint 1
%float_2 = OpConstant %float 2
%27 = OpConstantComposite %v2float %float_2 %float_2
%float_1 = OpConstant %float 1
%29 = OpConstantComposite %v2float %float_1 %float_1
%float_n1000 = OpConstant %float -1000
%int_2 = OpConstant %int 2
%float_0_5 = OpConstant %float 0.5
%33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
%float_500000 = OpConstant %float 500000
%35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000
%float_n500000 = OpConstant %float -500000
%37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000
%int_3 = OpConstant %int 3
%int_4 = OpConstant %int 4
%int_5 = OpConstant %int 5
%int_6 = OpConstant %int 6
%int_7 = OpConstant %int 7
%int_8 = OpConstant %int 8
%44 = OpConstantComposite %v3float %float_1 %float_1 %float_1
%float_n1 = OpConstant %float -1
%46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
%uint_5 = OpConstant %uint 5
%uint_0 = OpConstant %uint 0
%uint_3 = OpConstant %uint 3
%_runtimearr_v4float = OpTypeRuntimeArray %v4float
%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
%v2uint = OpTypeVector %uint 2
%type__Globals = OpTypeStruct %v2uint
%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui
%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
%_ptr_Input_uint = OpTypePointer Input %uint
%_ptr_Input_v4float = OpTypePointer Input %v4float
%_ptr_Output_v4float = OpTypePointer Output %v4float
%void = OpTypeVoid
%58 = OpTypeFunction %void
%_ptr_Function_v3float = OpTypePointer Function %v3float
%uint_8 = OpConstant %uint 8
%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8
%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8
%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%bool = OpTypeBool
%v2bool = OpTypeVector %bool 2
%v3bool = OpTypeVector %bool 3
%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
%_ptr_Image_uint = OpTypePointer Image %uint
%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
%_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input
%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
%70 = OpUndef %v3float
%71 = OpConstantNull %v3float
%ShadowObjectCullPS = OpFunction %void None %58
%72 = OpLabel
%73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function
%74 = OpLoad %uint %in_var_TEXCOORD0
%75 = OpLoad %v4float %gl_FragCoord
%76 = OpVectorShuffle %v2float %75 %75 0 1
%77 = OpConvertFToU %v2uint %76
%78 = OpCompositeExtract %uint %77 1
%79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0
%80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0
%81 = OpLoad %uint %80
%82 = OpIMul %uint %78 %81
%83 = OpCompositeExtract %uint %77 0
%84 = OpIAdd %uint %82 %83
%85 = OpConvertUToF %float %83
%86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1
%87 = OpLoad %uint %86
%88 = OpISub %uint %87 %uint_1
%89 = OpISub %uint %88 %78
%90 = OpConvertUToF %float %89
%91 = OpCompositeConstruct %v2float %85 %90
%92 = OpLoad %v2uint %79
%93 = OpConvertUToF %v2float %92
%94 = OpFDiv %v2float %91 %93
%95 = OpFMul %v2float %94 %27
%96 = OpFSub %v2float %95 %29
%97 = OpFAdd %v2float %91 %29
%98 = OpFDiv %v2float %97 %93
%99 = OpFMul %v2float %98 %27
%100 = OpFSub %v2float %99 %29
%101 = OpVectorShuffle %v3float %70 %100 3 4 2
%102 = OpCompositeInsert %v3float %float_1 %101 2
%103 = OpIMul %uint %74 %uint_5
%104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103
%105 = OpLoad %v4float %104
%106 = OpVectorShuffle %v3float %105 %105 0 1 2
%107 = OpIAdd %uint %103 %uint_1
%108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107
%109 = OpLoad %v4float %108
%110 = OpVectorShuffle %v3float %109 %109 0 1 2
%111 = OpVectorShuffle %v2float %109 %71 0 1
%112 = OpVectorShuffle %v2float %96 %71 0 1
%113 = OpFOrdGreaterThan %v2bool %111 %112
%114 = OpAll %bool %113
%115 = OpFOrdLessThan %v3bool %106 %102
%116 = OpAll %bool %115
%117 = OpLogicalAnd %bool %114 %116
OpSelectionMerge %118 DontFlatten
OpBranchConditional %117 %119 %118
%119 = OpLabel
%120 = OpFAdd %v3float %106 %110
%121 = OpFMul %v3float %33 %120
%122 = OpCompositeExtract %float %96 0
%123 = OpCompositeExtract %float %96 1
%124 = OpCompositeConstruct %v3float %122 %123 %float_n1000
%125 = OpAccessChain %_ptr_Function_v3float %73 %int_0
OpStore %125 %124
%126 = OpCompositeExtract %float %100 0
%127 = OpCompositeConstruct %v3float %126 %123 %float_n1000
%128 = OpAccessChain %_ptr_Function_v3float %73 %int_1
OpStore %128 %127
%129 = OpCompositeExtract %float %100 1
%130 = OpCompositeConstruct %v3float %122 %129 %float_n1000
%131 = OpAccessChain %_ptr_Function_v3float %73 %int_2
OpStore %131 %130
%132 = OpCompositeConstruct %v3float %126 %129 %float_n1000
%133 = OpAccessChain %_ptr_Function_v3float %73 %int_3
OpStore %133 %132
%134 = OpCompositeConstruct %v3float %122 %123 %float_1
%135 = OpAccessChain %_ptr_Function_v3float %73 %int_4
OpStore %135 %134
%136 = OpCompositeConstruct %v3float %126 %123 %float_1
%137 = OpAccessChain %_ptr_Function_v3float %73 %int_5
OpStore %137 %136
%138 = OpCompositeConstruct %v3float %122 %129 %float_1
%139 = OpAccessChain %_ptr_Function_v3float %73 %int_6
OpStore %139 %138
%140 = OpCompositeConstruct %v3float %126 %129 %float_1
%141 = OpAccessChain %_ptr_Function_v3float %73 %int_7
OpStore %141 %140
%142 = OpIAdd %uint %103 %uint_2
%143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142
%144 = OpLoad %v4float %143
%145 = OpVectorShuffle %v3float %144 %144 0 1 2
%146 = OpIAdd %uint %103 %uint_3
%147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146
%148 = OpLoad %v4float %147
%149 = OpVectorShuffle %v3float %148 %148 0 1 2
%150 = OpIAdd %uint %103 %uint_4
%151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150
%152 = OpLoad %v4float %151
%153 = OpVectorShuffle %v3float %152 %152 0 1 2
OpBranch %154
%154 = OpLabel
%155 = OpPhi %v3float %37 %119 %156 %157
%158 = OpPhi %v3float %35 %119 %159 %157
%160 = OpPhi %int %int_0 %119 %161 %157
%162 = OpSLessThan %bool %160 %int_8
OpLoopMerge %163 %157 Unroll
OpBranchConditional %162 %157 %163
%157 = OpLabel
%164 = OpAccessChain %_ptr_Function_v3float %73 %160
%165 = OpLoad %v3float %164
%166 = OpFSub %v3float %165 %121
%167 = OpDot %float %166 %145
%168 = OpDot %float %166 %149
%169 = OpDot %float %166 %153
%170 = OpCompositeConstruct %v3float %167 %168 %169
%159 = OpExtInst %v3float %1 FMin %158 %170
%156 = OpExtInst %v3float %1 FMax %155 %170
%161 = OpIAdd %int %160 %int_1
OpBranch %154
%163 = OpLabel
%171 = OpFOrdLessThan %v3bool %158 %44
%172 = OpAll %bool %171
%173 = OpFOrdGreaterThan %v3bool %155 %46
%174 = OpAll %bool %173
%175 = OpLogicalAnd %bool %172 %174
OpSelectionMerge %176 DontFlatten
OpBranchConditional %175 %177 %176
%177 = OpLabel
%178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0
%179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1
OpBranch %176
%176 = OpLabel
OpBranch %118
%118 = OpLabel
OpStore %out_var_SV_Target0 %22
OpReturn
OpFunctionEnd

View File

@ -13500,6 +13500,14 @@ void CompilerMSL::analyze_argument_buffers()
add_resource_name(var_id);
resources_in_set[desc_set].push_back(
{ &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 });
// Emulate texture2D atomic operations
if (atomic_image_vars.count(var.self))
{
uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
resources_in_set[desc_set].push_back(
{ &var, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 });
}
}
}
@ -13675,6 +13683,30 @@ void CompilerMSL::analyze_argument_buffers()
buffer_type.member_types.push_back(get_variable_data_type_id(var));
set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
}
else if (atomic_image_vars.count(var.self))
{
// Emulate texture2D atomic operations.
// Don't set the qualified name: it's already set for this variable,
// and the code that references the buffer manually appends "_atomic"
// to the name.
uint32_t offset = ir.increase_bound_by(2);
uint32_t atomic_type_id = offset;
uint32_t type_ptr_id = offset + 1;
SPIRType atomic_type;
atomic_type.basetype = SPIRType::AtomicCounter;
atomic_type.width = 32;
atomic_type.vecsize = 1;
set<SPIRType>(atomic_type_id, atomic_type);
atomic_type.pointer = true;
atomic_type.parent_type = atomic_type_id;
atomic_type.storage = StorageClassStorageBuffer;
auto &atomic_ptr_type = set<SPIRType>(type_ptr_id, atomic_type);
atomic_ptr_type.self = atomic_type_id;
buffer_type.member_types.push_back(type_ptr_id);
}
else
{
// Resources will be declared as pointers not references, so automatically dereference as appropriate.