diff --git a/reference/opt/shaders/asm/comp/op-phi-swap.asm.comp b/reference/opt/shaders/asm/comp/op-phi-swap.asm.comp new file mode 100644 index 00000000..f2847f43 --- /dev/null +++ b/reference/opt/shaders/asm/comp/op-phi-swap.asm.comp @@ -0,0 +1,38 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _3_4 +{ + float _m0[]; +} _4; + +layout(binding = 1, std430) buffer _3_5 +{ + float _m0[]; +} _5; + +void main() +{ + bool _34; + float _35; + float _36; + _34 = true; + _35 = _4._m0[gl_GlobalInvocationID.x]; + _36 = 8.5; + for (;;) + { + if (_34) + { + _34 = false; + float _35_copy = _35; + _35 = _36; + _36 = _35_copy; + } + else + { + break; + } + } + _5._m0[gl_GlobalInvocationID.x] = _35 - _36; +} + diff --git a/reference/shaders/asm/comp/op-phi-swap.asm.comp b/reference/shaders/asm/comp/op-phi-swap.asm.comp new file mode 100644 index 00000000..76f4ccb1 --- /dev/null +++ b/reference/shaders/asm/comp/op-phi-swap.asm.comp @@ -0,0 +1,39 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _3_4 +{ + float _m0[]; +} _4; + +layout(binding = 1, std430) buffer _3_5 +{ + float _m0[]; +} _5; + +void main() +{ + float _26 = 8.5; + bool _34; + float _35; + float _36; + _34 = true; + _35 = _4._m0[gl_GlobalInvocationID.x]; + _36 = _26; + for (;;) + { + if (_34) + { + _34 = false; + float _35_copy = _35; + _35 = _36; + _36 = _35_copy; + } + else + { + break; + } + } + _5._m0[gl_GlobalInvocationID.x] = _35 - _36; +} + diff --git a/shaders/asm/comp/op-phi-swap.asm.comp b/shaders/asm/comp/op-phi-swap.asm.comp new file mode 100644 index 00000000..dc18d697 --- /dev/null +++ b/shaders/asm/comp/op-phi-swap.asm.comp @@ -0,0 +1,63 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 39 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_struct_3 BufferBlock + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 0 + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 1 + OpDecorate %_runtimearr_float ArrayStride 4 + OpMemberDecorate %_struct_3 0 Offset 0 + %bool = OpTypeBool + %void = OpTypeVoid + %9 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %float = OpTypeFloat 32 + %v3uint = OpTypeVector %uint 3 + %v3float = OpTypeVector %float 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%_ptr_Uniform_int = OpTypePointer Uniform %int +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_runtimearr_int = OpTypeRuntimeArray %int +%_runtimearr_float = OpTypeRuntimeArray %float + %_struct_3 = OpTypeStruct %_runtimearr_float +%_ptr_Uniform__struct_3 = OpTypePointer Uniform %_struct_3 + %4 = OpVariable %_ptr_Uniform__struct_3 Uniform + %5 = OpVariable %_ptr_Uniform__struct_3 Uniform +%_ptr_Function_float = OpTypePointer Function %float +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %true = OpConstantTrue %bool + %false = OpConstantFalse %bool + %int_0 = OpConstant %int 0 + %float_8_5 = OpConstant %float 8.5 + %main = OpFunction %void None %9 + %25 = OpLabel + %26 = OpVariable %_ptr_Function_float Function %float_8_5 + %27 = OpLoad %v3uint %gl_GlobalInvocationID + %28 = OpCompositeExtract %uint %27 0 + %29 = OpAccessChain %_ptr_Uniform_float %4 %int_0 %28 + %30 = OpAccessChain %_ptr_Uniform_float %5 %int_0 %28 + %31 = OpLoad %float %29 + %32 = OpLoad %float %26 + OpBranch %33 + %33 = OpLabel + %34 = OpPhi %bool %true %25 %false %33 + %35 = OpPhi %float %31 %25 %36 %33 + %36 = OpPhi %float %32 %25 %35 %33 + OpLoopMerge %37 %33 None + OpBranchConditional %34 %33 %37 + %37 = OpLabel + %38 = OpFSub %float %35 %36 + OpStore %30 %38 + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 14328fe0..a8c58d8f 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -1283,7 +1283,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) auto &dec = ir.meta[var.self].decoration; auto &type = get(var.basetype); - auto flags = dec.decoration_flags; + auto &flags = dec.decoration_flags; auto typeflags = ir.meta[type.self].decoration.decoration_flags; if (options.vulkan_semantics && var.storage == StorageClassPushConstant) @@ -3459,7 +3459,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { auto &type = get(result_type); - auto flags = ir.meta[result_id].decoration.decoration_flags; + auto &flags = ir.meta[result_id].decoration.decoration_flags; // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. @@ -4568,7 +4568,7 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, { forced_temporaries.insert(id); auto &type = get(result_type); - auto flags = ir.meta[id].decoration.decoration_flags; + auto &flags = ir.meta[id].decoration.decoration_flags; statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); set(id, to_name(id), result_type, true); @@ -4691,7 +4691,7 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, { forced_temporaries.insert(id); auto &type = get(result_type); - auto flags = ir.meta[id].decoration.decoration_flags; + auto &flags = ir.meta[id].decoration.decoration_flags; statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); set(id, to_name(id), result_type, true); @@ -6840,7 +6840,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // We cannot construct array of arrays because we cannot treat the inputs // as value types. Need to declare the array-of-arrays, and copy in elements one by one. forced_temporaries.insert(id); - auto flags = ir.meta[id].decoration.decoration_flags; + auto &flags = ir.meta[id].decoration.decoration_flags; statement(flags_to_precision_qualifiers_glsl(out_type, flags), variable_decl(out_type, to_name(id)), ";"); set(id, to_name(id), result_type, true); for (uint32_t i = 0; i < length; i++) @@ -7225,7 +7225,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t op1 = ops[3]; forced_temporaries.insert(result_id); auto &type = get(result_type); - auto flags = ir.meta[result_id].decoration.decoration_flags; + auto &flags = ir.meta[result_id].decoration.decoration_flags; statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); set(result_id, to_name(result_id), result_type, true); @@ -7250,7 +7250,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t op1 = ops[3]; forced_temporaries.insert(result_id); auto &type = get(result_type); - auto flags = ir.meta[result_id].decoration.decoration_flags; + auto &flags = ir.meta[result_id].decoration.decoration_flags; statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); set(result_id, to_name(result_id), result_type, true); @@ -8791,7 +8791,7 @@ const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) string CompilerGLSL::to_qualifiers_glsl(uint32_t id) { - auto flags = ir.meta[id].decoration.decoration_flags; + auto &flags = ir.meta[id].decoration.decoration_flags; string res; auto *var = maybe_get(id); @@ -8870,7 +8870,7 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable) const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) { - auto flags = ir.meta[variable.self].decoration.decoration_flags; + auto &flags = ir.meta[variable.self].decoration.decoration_flags; if (flags.get(DecorationRelaxedPrecision)) return "mediump "; else @@ -9301,7 +9301,7 @@ void CompilerGLSL::flatten_buffer_block(uint32_t id) auto &var = get(id); auto &type = get(var.basetype); auto name = to_name(type.self, false); - auto flags = ir.meta.at(type.self).decoration.decoration_flags; + auto &flags = ir.meta.at(type.self).decoration.decoration_flags; if (!type.array.empty()) SPIRV_CROSS_THROW(name + " is an array of UBOs."); @@ -9598,8 +9598,12 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) { auto &child = get(to); - for (auto &phi : child.phi_variables) + unordered_set temporary_phi_variables; + + for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) { + auto &phi = *itr; + if (phi.parent == from) { auto &var = get(phi.function_variable); @@ -9611,10 +9615,35 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) { flush_variable_declaration(phi.function_variable); + // Check if we are going to write to a Phi variable that another statement will read from + // as part of another Phi node in our target block. + // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. + // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. + bool need_saved_temporary = + find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { + return future_phi.local_variable == phi.function_variable && future_phi.parent == from; + }) != end(child.phi_variables); + + if (need_saved_temporary) + { + temporary_phi_variables.insert(phi.function_variable); + auto &type = expression_type(phi.function_variable); + auto &flags = ir.meta[phi.function_variable].decoration.decoration_flags; + statement(flags_to_precision_qualifiers_glsl(type, flags), + variable_decl(type, join("_", phi.function_variable, "_copy")), " = ", + to_name(phi.function_variable), ";"); + } + // This might be called in continue block, so make sure we // use this to emit ESSL 1.0 compliant increments/decrements. auto lhs = to_expression(phi.function_variable); - auto rhs = to_expression(phi.local_variable); + + string rhs; + if (temporary_phi_variables.count(phi.local_variable)) + rhs = join("_", phi.local_variable, "_copy"); + else + rhs = to_expression(phi.local_variable); + if (!optimize_read_modify_write(get(var.basetype), lhs, rhs)) statement(lhs, " = ", rhs, ";"); } @@ -10129,7 +10158,7 @@ void CompilerGLSL::emit_hoisted_temporaries(vector> &te for (auto &tmp : temporaries) { add_local_variable_name(tmp.second); - auto flags = ir.meta[tmp.second].decoration.decoration_flags; + auto &flags = ir.meta[tmp.second].decoration.decoration_flags; auto &type = get(tmp.first); statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");