diff --git a/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk new file mode 100644 index 00000000..22834fa8 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk @@ -0,0 +1,36 @@ +#version 450 +#extension GL_KHR_shader_subgroup_ballot : require +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform usamplerBuffer _4; +layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _5; + +uvec4 WaveMatch(uint _45) +{ + uvec4 _52; + for (;;) + { + bool _51 = _45 == subgroupBroadcastFirst(_45); + _52 = subgroupBallot(_51); + if (_51) + { + break; + } + else + { + continue; + } + } + return _52; +} + +void main() +{ + uvec4 _32 = WaveMatch(texelFetch(_4, int(gl_GlobalInvocationID.x)).x); + uint _37 = gl_GlobalInvocationID.x * 4u; + imageStore(_5, int(_37), uvec4(_32.x)); + imageStore(_5, int(_37 + 1u), uvec4(_32.y)); + imageStore(_5, int(_37 + 2u), uvec4(_32.z)); + imageStore(_5, int(_37 + 3u), uvec4(_32.w)); +} + diff --git a/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp new file mode 100644 index 00000000..deaae421 --- /dev/null +++ b/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp @@ -0,0 +1,86 @@ +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 55 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpCapability GroupNonUniform + OpCapability GroupNonUniformBallot + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 64 1 1 + OpName %main "main" + OpName %WaveMatch "WaveMatch" + OpDecorate %8 DescriptorSet 0 + OpDecorate %8 Binding 0 + OpDecorate %11 DescriptorSet 0 + OpDecorate %11 Binding 0 + OpDecorate %11 NonReadable + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %2 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %6 = OpTypeImage %uint Buffer 0 0 0 1 Unknown +%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6 + %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant + %9 = OpTypeImage %uint Buffer 0 0 0 2 R32ui +%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9 + %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_0 = OpConstant %uint 0 + %v4uint = OpTypeVector %uint 4 + %24 = OpTypeFunction %v4uint %uint + %uint_3 = OpConstant %uint 3 + %bool = OpTypeBool + %uint_4 = OpConstant %uint 4 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %53 + %53 = OpLabel + %12 = OpLoad %9 %11 + %13 = OpLoad %6 %8 + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %20 = OpLoad %uint %18 + %22 = OpImageFetch %v4uint %13 %20 + %23 = OpCompositeExtract %uint %22 0 + %37 = OpFunctionCall %v4uint %WaveMatch %23 + %38 = OpCompositeExtract %uint %37 0 + %39 = OpCompositeExtract %uint %37 1 + %40 = OpCompositeExtract %uint %37 2 + %41 = OpCompositeExtract %uint %37 3 + %42 = OpIMul %uint %20 %uint_4 + %44 = OpCompositeConstruct %v4uint %38 %38 %38 %38 + OpImageWrite %12 %42 %44 + %45 = OpCompositeConstruct %v4uint %39 %39 %39 %39 + %46 = OpIAdd %uint %42 %uint_1 + OpImageWrite %12 %46 %45 + %48 = OpCompositeConstruct %v4uint %40 %40 %40 %40 + %49 = OpIAdd %uint %42 %uint_2 + OpImageWrite %12 %49 %48 + %51 = OpCompositeConstruct %v4uint %41 %41 %41 %41 + %52 = OpIAdd %uint %42 %uint_3 + OpImageWrite %12 %52 %51 + OpReturn + OpFunctionEnd + %WaveMatch = OpFunction %v4uint None %24 + %25 = OpFunctionParameter %uint + %27 = OpLabel + OpBranch %28 + %28 = OpLabel + OpLoopMerge %30 %29 None + OpBranch %29 + %29 = OpLabel + %31 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %25 + %34 = OpIEqual %bool %25 %31 + %35 = OpGroupNonUniformBallot %v4uint %uint_3 %34 + OpBranchConditional %34 %30 %28 + %30 = OpLabel + OpReturnValue %35 + OpFunctionEnd diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 4fcd969a..d053aac8 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -1659,6 +1659,9 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand } } + if (!handler.handle_terminator(block)) + return false; + return true; } @@ -3055,6 +3058,27 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression); } +bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block) +{ + switch (block.terminator) + { + case SPIRBlock::Return: + if (block.return_value) + notify_variable_access(block.return_value, block.self); + break; + + case SPIRBlock::Select: + case SPIRBlock::MultiSelect: + notify_variable_access(block.condition, block.self); + break; + + default: + break; + } + + return true; +} + bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) { // Keep track of the types of temporaries, so we can hoist them out as necessary. diff --git a/spirv_cross.hpp b/spirv_cross.hpp index d1688a67..27308601 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -765,6 +765,10 @@ protected: // Return true if traversal should continue. // If false, traversal will end immediately. virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0; + virtual bool handle_terminator(const SPIRBlock &) + { + return true; + } virtual bool follow_function_call(const SPIRFunction &) { @@ -979,6 +983,7 @@ protected: bool id_is_phi_variable(uint32_t id) const; bool id_is_potential_temporary(uint32_t id) const; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool handle_terminator(const SPIRBlock &block) override; Compiler &compiler; SPIRFunction &entry;