Merge pull request #495 from KhronosGroup/fix-493
Fix cases where loops and branches merge into continue blocks.
This commit is contained in:
commit
01fbb3a342
@ -28,11 +28,15 @@ void main()
|
||||
float _58;
|
||||
_55 = 0.0;
|
||||
_58 = 0.0;
|
||||
float _64;
|
||||
vec4 _72;
|
||||
float _78;
|
||||
for (int _60 = -3; _60 <= 3; _64 = float(_60), _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)), _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)), _55 += (_72.x * _78), _58 += _78, _60++)
|
||||
for (int _60 = -3; _60 <= 3; )
|
||||
{
|
||||
float _64 = float(_60);
|
||||
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
|
||||
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
|
||||
_55 += (_72.x * _78);
|
||||
_58 += _78;
|
||||
_60++;
|
||||
continue;
|
||||
}
|
||||
_entryPointOutput = vec4(_55 / _58, _50, 0.0, 1.0);
|
||||
}
|
||||
|
@ -0,0 +1,22 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(location = 0) in vec4 v0;
|
||||
|
||||
int _51;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vec4(1.0);
|
||||
int _53;
|
||||
int _52;
|
||||
for (int _50 = 0; _50 < 4; _50++, _52 = _53)
|
||||
{
|
||||
_53 = 0;
|
||||
for (; _53 < 4; _53++)
|
||||
{
|
||||
FragColor += vec4(v0[(_50 + _53) & 3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,23 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(location = 0) in vec4 v0;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vec4(1.0);
|
||||
for (int _54 = 0; _54 < 4; _54++)
|
||||
{
|
||||
if (v0.x == 20.0)
|
||||
{
|
||||
FragColor += vec4(v0[_54 & 3]);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
FragColor += vec4(v0[_54 & 1]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,30 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vec4(1.0);
|
||||
for (int _52 = 0; _52 < 4; _52++)
|
||||
{
|
||||
switch (_52)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
FragColor.x += 1.0;
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
FragColor.y += 3.0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
FragColor.z += 3.0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,11 +28,15 @@ void main()
|
||||
float _58;
|
||||
_55 = 0.0;
|
||||
_58 = 0.0;
|
||||
float _64;
|
||||
vec4 _72;
|
||||
float _78;
|
||||
for (int _60 = -3; _60 <= 3; _64 = float(_60), _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)), _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)), _55 += (_72.x * _78), _58 += _78, _60++)
|
||||
for (int _60 = -3; _60 <= 3; )
|
||||
{
|
||||
float _64 = float(_60);
|
||||
vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
|
||||
float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
|
||||
_55 += (_72.x * _78);
|
||||
_58 += _78;
|
||||
_60++;
|
||||
continue;
|
||||
}
|
||||
_entryPointOutput = vec4(_55 / _58, _50, 0.0, 1.0);
|
||||
}
|
||||
|
17
reference/shaders/asm/frag/loop-merge-to-continue.asm.frag
Normal file
17
reference/shaders/asm/frag/loop-merge-to-continue.asm.frag
Normal file
@ -0,0 +1,17 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(location = 0) in vec4 v0;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vec4(1.0);
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
FragColor += vec4(v0[(i + j) & 3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,23 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(location = 0) in vec4 v0;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vec4(1.0);
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
if (v0.x == 20.0)
|
||||
{
|
||||
FragColor += vec4(v0[i & 3]);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
FragColor += vec4(v0[i & 1]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
30
reference/shaders/asm/frag/switch-merge-to-continue.asm.frag
Normal file
30
reference/shaders/asm/frag/switch-merge-to-continue.asm.frag
Normal file
@ -0,0 +1,30 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vec4(1.0);
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
FragColor.x += 1.0;
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
FragColor.y += 3.0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
FragColor.z += 3.0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
85
shaders/asm/frag/loop-merge-to-continue.asm.frag
Normal file
85
shaders/asm/frag/loop-merge-to-continue.asm.frag
Normal file
@ -0,0 +1,85 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 3
|
||||
; Bound: 51
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %FragColor %v0
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %FragColor "FragColor"
|
||||
OpName %i "i"
|
||||
OpName %j "j"
|
||||
OpName %v0 "v0"
|
||||
OpDecorate %FragColor Location 0
|
||||
OpDecorate %v0 Location 0
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%FragColor = OpVariable %_ptr_Output_v4float Output
|
||||
%float_1 = OpConstant %float 1
|
||||
%11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
|
||||
%int = OpTypeInt 32 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%int_0 = OpConstant %int 0
|
||||
%int_4 = OpConstant %int 4
|
||||
%bool = OpTypeBool
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%v0 = OpVariable %_ptr_Input_v4float Input
|
||||
%int_3 = OpConstant %int 3
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%int_1 = OpConstant %int 1
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%i = OpVariable %_ptr_Function_int Function
|
||||
%j = OpVariable %_ptr_Function_int Function
|
||||
OpStore %FragColor %11
|
||||
OpStore %i %int_0
|
||||
OpBranch %16
|
||||
%16 = OpLabel
|
||||
OpLoopMerge %18 %19 None
|
||||
OpBranch %20
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %int %i
|
||||
%24 = OpSLessThan %bool %21 %int_4
|
||||
OpBranchConditional %24 %17 %18
|
||||
%17 = OpLabel
|
||||
OpStore %j %int_0
|
||||
OpBranch %26
|
||||
%26 = OpLabel
|
||||
OpLoopMerge %19 %29 None
|
||||
OpBranch %30
|
||||
%30 = OpLabel
|
||||
%31 = OpLoad %int %j
|
||||
%32 = OpSLessThan %bool %31 %int_4
|
||||
OpBranchConditional %32 %27 %19
|
||||
%27 = OpLabel
|
||||
%35 = OpLoad %int %i
|
||||
%36 = OpLoad %int %j
|
||||
%37 = OpIAdd %int %35 %36
|
||||
%39 = OpBitwiseAnd %int %37 %int_3
|
||||
%41 = OpAccessChain %_ptr_Input_float %v0 %39
|
||||
%42 = OpLoad %float %41
|
||||
%43 = OpLoad %v4float %FragColor
|
||||
%44 = OpCompositeConstruct %v4float %42 %42 %42 %42
|
||||
%45 = OpFAdd %v4float %43 %44
|
||||
OpStore %FragColor %45
|
||||
OpBranch %29
|
||||
%29 = OpLabel
|
||||
%46 = OpLoad %int %j
|
||||
%48 = OpIAdd %int %46 %int_1
|
||||
OpStore %j %48
|
||||
OpBranch %26
|
||||
%19 = OpLabel
|
||||
%49 = OpLoad %int %i
|
||||
%50 = OpIAdd %int %49 %int_1
|
||||
OpStore %i %50
|
||||
OpBranch %16
|
||||
%18 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
85
shaders/asm/frag/selection-merge-to-continue.asm.frag
Normal file
85
shaders/asm/frag/selection-merge-to-continue.asm.frag
Normal file
@ -0,0 +1,85 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 3
|
||||
; Bound: 55
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %FragColor %v0
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %FragColor "FragColor"
|
||||
OpName %i "i"
|
||||
OpName %v0 "v0"
|
||||
OpDecorate %FragColor Location 0
|
||||
OpDecorate %v0 Location 0
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%FragColor = OpVariable %_ptr_Output_v4float Output
|
||||
%float_1 = OpConstant %float 1
|
||||
%11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
|
||||
%int = OpTypeInt 32 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%int_0 = OpConstant %int 0
|
||||
%int_4 = OpConstant %int 4
|
||||
%bool = OpTypeBool
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%v0 = OpVariable %_ptr_Input_v4float Input
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%float_20 = OpConstant %float 20
|
||||
%int_3 = OpConstant %int 3
|
||||
%int_1 = OpConstant %int 1
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%i = OpVariable %_ptr_Function_int Function
|
||||
OpStore %FragColor %11
|
||||
OpStore %i %int_0
|
||||
OpBranch %16
|
||||
%16 = OpLabel
|
||||
OpLoopMerge %18 %19 None
|
||||
OpBranch %20
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %int %i
|
||||
%24 = OpSLessThan %bool %21 %int_4
|
||||
OpBranchConditional %24 %17 %18
|
||||
%17 = OpLabel
|
||||
%30 = OpAccessChain %_ptr_Input_float %v0 %uint_0
|
||||
%31 = OpLoad %float %30
|
||||
%33 = OpFOrdEqual %bool %31 %float_20
|
||||
OpSelectionMerge %19 None
|
||||
OpBranchConditional %33 %34 %44
|
||||
%34 = OpLabel
|
||||
%36 = OpLoad %int %i
|
||||
%38 = OpBitwiseAnd %int %36 %int_3
|
||||
%39 = OpAccessChain %_ptr_Input_float %v0 %38
|
||||
%40 = OpLoad %float %39
|
||||
%41 = OpLoad %v4float %FragColor
|
||||
%42 = OpCompositeConstruct %v4float %40 %40 %40 %40
|
||||
%43 = OpFAdd %v4float %41 %42
|
||||
OpStore %FragColor %43
|
||||
OpBranch %19
|
||||
%44 = OpLabel
|
||||
%45 = OpLoad %int %i
|
||||
%47 = OpBitwiseAnd %int %45 %int_1
|
||||
%48 = OpAccessChain %_ptr_Input_float %v0 %47
|
||||
%49 = OpLoad %float %48
|
||||
%50 = OpLoad %v4float %FragColor
|
||||
%51 = OpCompositeConstruct %v4float %49 %49 %49 %49
|
||||
%52 = OpFAdd %v4float %50 %51
|
||||
OpStore %FragColor %52
|
||||
OpBranch %19
|
||||
%19 = OpLabel
|
||||
%53 = OpLoad %int %i
|
||||
%54 = OpIAdd %int %53 %int_1
|
||||
OpStore %i %54
|
||||
OpBranch %16
|
||||
%18 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
85
shaders/asm/frag/switch-merge-to-continue.asm.frag
Normal file
85
shaders/asm/frag/switch-merge-to-continue.asm.frag
Normal file
@ -0,0 +1,85 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 3
|
||||
; Bound: 57
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %FragColor %v0
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource GLSL 450
|
||||
OpName %main "main"
|
||||
OpName %FragColor "FragColor"
|
||||
OpName %i "i"
|
||||
OpName %v0 "v0"
|
||||
OpDecorate %FragColor Location 0
|
||||
OpDecorate %v0 Location 0
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%FragColor = OpVariable %_ptr_Output_v4float Output
|
||||
%float_1 = OpConstant %float 1
|
||||
%11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
|
||||
%int = OpTypeInt 32 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%int_0 = OpConstant %int 0
|
||||
%int_4 = OpConstant %int 4
|
||||
%bool = OpTypeBool
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Output_float = OpTypePointer Output %float
|
||||
%float_3 = OpConstant %float 3
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%uint_2 = OpConstant %uint 2
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%v0 = OpVariable %_ptr_Input_v4float Input
|
||||
%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%i = OpVariable %_ptr_Function_int Function
|
||||
OpStore %FragColor %11
|
||||
OpStore %i %int_0
|
||||
OpBranch %16
|
||||
%16 = OpLabel
|
||||
OpLoopMerge %18 %19 None
|
||||
OpBranch %20
|
||||
%20 = OpLabel
|
||||
%21 = OpLoad %int %i
|
||||
%24 = OpSLessThan %bool %21 %int_4
|
||||
OpBranchConditional %24 %17 %18
|
||||
%17 = OpLabel
|
||||
%25 = OpLoad %int %i
|
||||
OpSelectionMerge %19 None
|
||||
OpSwitch %25 %28 0 %26 1 %27
|
||||
%28 = OpLabel
|
||||
%46 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
|
||||
%47 = OpLoad %float %46
|
||||
%48 = OpFAdd %float %47 %float_3
|
||||
%49 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
|
||||
OpStore %49 %48
|
||||
OpBranch %19
|
||||
%26 = OpLabel
|
||||
%33 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
|
||||
%34 = OpLoad %float %33
|
||||
%35 = OpFAdd %float %34 %float_1
|
||||
%36 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
|
||||
OpStore %36 %35
|
||||
OpBranch %19
|
||||
%27 = OpLabel
|
||||
%40 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
|
||||
%41 = OpLoad %float %40
|
||||
%42 = OpFAdd %float %41 %float_3
|
||||
%43 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
|
||||
OpStore %43 %42
|
||||
OpBranch %19
|
||||
%19 = OpLabel
|
||||
%52 = OpLoad %int %i
|
||||
%54 = OpIAdd %int %52 %int_1
|
||||
OpStore %i %54
|
||||
OpBranch %16
|
||||
%18 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -460,7 +460,8 @@ struct SPIRBlock : IVariant
|
||||
enum Method
|
||||
{
|
||||
MergeToSelectForLoop,
|
||||
MergeToDirectForLoop
|
||||
MergeToDirectForLoop,
|
||||
MergeToSelectContinueForLoop
|
||||
};
|
||||
|
||||
enum ContinueBlockType
|
||||
|
@ -2108,7 +2108,7 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
|
||||
if (block.disable_block_optimization || block.complex_continue)
|
||||
return false;
|
||||
|
||||
if (method == SPIRBlock::MergeToSelectForLoop)
|
||||
if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
|
||||
{
|
||||
// Try to detect common for loop pattern
|
||||
// which the code backend can use to create cleaner code.
|
||||
@ -2118,6 +2118,9 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
|
||||
block.true_block != block.merge_block && block.true_block != block.self &&
|
||||
block.false_block == block.merge_block;
|
||||
|
||||
if (ret && method == SPIRBlock::MergeToSelectContinueForLoop)
|
||||
ret = block.true_block == block.continue_block;
|
||||
|
||||
// If we have OpPhi which depends on branches which came from our own block,
|
||||
// we need to flush phi variables in else block instead of a trivial break,
|
||||
// so we cannot assume this is a for loop candidate.
|
||||
|
@ -585,6 +585,11 @@ protected:
|
||||
multiselect_merge_targets.find(next) != end(multiselect_merge_targets);
|
||||
}
|
||||
|
||||
inline bool is_loop_break(uint32_t next) const
|
||||
{
|
||||
return loop_merge_targets.find(next) != end(loop_merge_targets);
|
||||
}
|
||||
|
||||
inline bool is_conditional(uint32_t next) const
|
||||
{
|
||||
return selection_merge_targets.find(next) != end(selection_merge_targets) &&
|
||||
|
196
spirv_glsl.cpp
196
spirv_glsl.cpp
@ -8316,6 +8316,63 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
|
||||
}
|
||||
}
|
||||
|
||||
void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
|
||||
{
|
||||
assert(is_continue(to));
|
||||
|
||||
auto &to_block = get<SPIRBlock>(to);
|
||||
if (to_block.complex_continue)
|
||||
{
|
||||
// Just emit the whole block chain as is.
|
||||
auto usage_counts = expression_usage_counts;
|
||||
auto invalid = invalid_expressions;
|
||||
|
||||
emit_block_chain(to_block);
|
||||
|
||||
// Expression usage counts and invalid expressions
|
||||
// are moot after returning from the continue block.
|
||||
// Since we emit the same block multiple times,
|
||||
// we don't want to invalidate ourselves.
|
||||
expression_usage_counts = usage_counts;
|
||||
invalid_expressions = invalid;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto &from_block = get<SPIRBlock>(from);
|
||||
bool outside_control_flow = false;
|
||||
uint32_t loop_dominator = 0;
|
||||
|
||||
// FIXME: Refactor this to not use the old loop_dominator tracking.
|
||||
if (from_block.merge_block)
|
||||
{
|
||||
// If we are a loop header, we don't set the loop dominator,
|
||||
// so just use "self" here.
|
||||
loop_dominator = from;
|
||||
}
|
||||
else if (from_block.loop_dominator != SPIRBlock::NoDominator)
|
||||
{
|
||||
loop_dominator = from_block.loop_dominator;
|
||||
}
|
||||
|
||||
if (loop_dominator != 0)
|
||||
{
|
||||
auto &dominator = get<SPIRBlock>(loop_dominator);
|
||||
|
||||
// For non-complex continue blocks, we implicitly branch to the continue block
|
||||
// by having the continue block be part of the loop header in for (; ; continue-block).
|
||||
outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
|
||||
}
|
||||
|
||||
// Some simplification for for-loops. We always end up with a useless continue;
|
||||
// statement since we branch to a loop block.
|
||||
// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
|
||||
// we can avoid writing out an explicit continue statement.
|
||||
// Similar optimization to return statements if we know we're outside flow control.
|
||||
if (!outside_control_flow)
|
||||
statement("continue;");
|
||||
}
|
||||
}
|
||||
|
||||
void CompilerGLSL::branch(uint32_t from, uint32_t to)
|
||||
{
|
||||
flush_phi(from, to);
|
||||
@ -8329,64 +8386,17 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to)
|
||||
// and end the chain here.
|
||||
statement("continue;");
|
||||
}
|
||||
else if (is_continue(to))
|
||||
{
|
||||
auto &to_block = get<SPIRBlock>(to);
|
||||
if (to_block.complex_continue)
|
||||
{
|
||||
// Just emit the whole block chain as is.
|
||||
auto usage_counts = expression_usage_counts;
|
||||
auto invalid = invalid_expressions;
|
||||
|
||||
emit_block_chain(to_block);
|
||||
|
||||
// Expression usage counts and invalid expressions
|
||||
// are moot after returning from the continue block.
|
||||
// Since we emit the same block multiple times,
|
||||
// we don't want to invalidate ourselves.
|
||||
expression_usage_counts = usage_counts;
|
||||
invalid_expressions = invalid;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto &from_block = get<SPIRBlock>(from);
|
||||
bool outside_control_flow = false;
|
||||
uint32_t loop_dominator = 0;
|
||||
|
||||
// FIXME: Refactor this to not use the old loop_dominator tracking.
|
||||
if (from_block.merge_block)
|
||||
{
|
||||
// If we are a loop header, we don't set the loop dominator,
|
||||
// so just use "self" here.
|
||||
loop_dominator = from;
|
||||
}
|
||||
else if (from_block.loop_dominator != SPIRBlock::NoDominator)
|
||||
{
|
||||
loop_dominator = from_block.loop_dominator;
|
||||
}
|
||||
|
||||
if (loop_dominator != 0)
|
||||
{
|
||||
auto &dominator = get<SPIRBlock>(loop_dominator);
|
||||
|
||||
// For non-complex continue blocks, we implicitly branch to the continue block
|
||||
// by having the continue block be part of the loop header in for (; ; continue-block).
|
||||
outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
|
||||
}
|
||||
|
||||
// Some simplification for for-loops. We always end up with a useless continue;
|
||||
// statement since we branch to a loop block.
|
||||
// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
|
||||
// we can avoid writing out an explicit continue statement.
|
||||
// Similar optimization to return statements if we know we're outside flow control.
|
||||
if (!outside_control_flow)
|
||||
statement("continue;");
|
||||
}
|
||||
}
|
||||
else if (is_break(to))
|
||||
statement("break;");
|
||||
else if (is_continue(to))
|
||||
branch_to_continue(from, to);
|
||||
else if (!is_conditional(to))
|
||||
emit_block_chain(get<SPIRBlock>(to));
|
||||
|
||||
// It is important that we check for break before continue.
|
||||
// A block might serve two purposes, a break block for the inner scope, and
|
||||
// a continue block in the outer scope.
|
||||
// Inner scope always takes precedence.
|
||||
}
|
||||
|
||||
void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
|
||||
@ -8395,6 +8405,9 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
|
||||
bool true_sub = !is_conditional(true_block);
|
||||
bool false_sub = !is_conditional(false_block);
|
||||
|
||||
// It is possible that a selection merge target also serves as a break/continue block.
|
||||
// We will not emit break or continue here, but defer that to the outer scope.
|
||||
|
||||
if (true_sub)
|
||||
{
|
||||
statement("if (", to_expression(cond), ")");
|
||||
@ -8420,7 +8433,7 @@ void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uin
|
||||
else if (false_sub && !true_sub)
|
||||
{
|
||||
// Only need false path, use negative conditional.
|
||||
statement("if (!", to_expression(cond), ")");
|
||||
statement("if (!", to_enclosed_expression(cond), ")");
|
||||
begin_scope();
|
||||
branch(from, false_block);
|
||||
end_scope();
|
||||
@ -8622,7 +8635,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
|
||||
{
|
||||
SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
|
||||
|
||||
if (method == SPIRBlock::MergeToSelectForLoop)
|
||||
if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
|
||||
{
|
||||
uint32_t current_count = statement_count;
|
||||
// If we're trying to create a true for loop,
|
||||
@ -8646,8 +8659,13 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
|
||||
// emitting the continue block can invalidate the condition expression.
|
||||
auto initializer = emit_for_loop_initializers(block);
|
||||
auto condition = to_expression(block.condition);
|
||||
auto continue_block = emit_continue_block(block.continue_block);
|
||||
statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
|
||||
if (method != SPIRBlock::MergeToSelectContinueForLoop)
|
||||
{
|
||||
auto continue_block = emit_continue_block(block.continue_block);
|
||||
statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
|
||||
}
|
||||
else
|
||||
statement("for (", initializer, "; ", condition, "; )");
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8750,6 +8768,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
bool select_branch_to_true_block = false;
|
||||
bool skip_direct_branch = false;
|
||||
bool emitted_for_loop_header = false;
|
||||
bool force_complex_continue_block = false;
|
||||
|
||||
// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
|
||||
// Need to sort these to ensure that reference output is stable.
|
||||
@ -8774,8 +8793,22 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
for (auto var : block.loop_variables)
|
||||
get<SPIRVariable>(var).loop_variable_enable = true;
|
||||
|
||||
// This is the method often used by spirv-opt to implement loops.
|
||||
// The loop header goes straight into the continue block.
|
||||
// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
|
||||
// it *MUST* be used in the continue block. This loop method will not work.
|
||||
if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
|
||||
{
|
||||
flush_undeclared_variables(block);
|
||||
if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
|
||||
{
|
||||
select_branch_to_true_block = true;
|
||||
emitted_for_loop_header = true;
|
||||
force_complex_continue_block = true;
|
||||
}
|
||||
}
|
||||
// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
|
||||
if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
|
||||
else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
|
||||
{
|
||||
flush_undeclared_variables(block);
|
||||
if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
|
||||
@ -8856,9 +8889,23 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
break;
|
||||
|
||||
case SPIRBlock::Select:
|
||||
// True if MergeToSelectForLoop succeeded.
|
||||
// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
|
||||
if (select_branch_to_true_block)
|
||||
branch(block.self, block.true_block);
|
||||
{
|
||||
if (force_complex_continue_block)
|
||||
{
|
||||
assert(block.true_block == block.continue_block);
|
||||
|
||||
// We're going to emit a continue block directly here, so make sure it's marked as complex.
|
||||
auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
|
||||
bool old_complex = complex_continue;
|
||||
complex_continue = true;
|
||||
branch(block.self, block.true_block);
|
||||
complex_continue = old_complex;
|
||||
}
|
||||
else
|
||||
branch(block.self, block.true_block);
|
||||
}
|
||||
else
|
||||
branch(block.self, block.condition, block.true_block, block.false_block);
|
||||
break;
|
||||
@ -8959,7 +9006,23 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
// that block after this. If we had selection merge, we already flushed phi variables.
|
||||
if (block.merge != SPIRBlock::MergeSelection)
|
||||
flush_phi(block.self, block.next_block);
|
||||
emit_block_chain(get<SPIRBlock>(block.next_block));
|
||||
|
||||
// For merge selects we might have ignored the fact that a merge target
|
||||
// could have been a break; or continue;
|
||||
// We will need to deal with it here.
|
||||
if (is_loop_break(block.next_block))
|
||||
{
|
||||
// Cannot check for just break, because switch statements will also use break.
|
||||
assert(block.merge == SPIRBlock::MergeSelection);
|
||||
statement("break;");
|
||||
}
|
||||
else if (is_continue(block.next_block))
|
||||
{
|
||||
assert(block.merge == SPIRBlock::MergeSelection);
|
||||
branch_to_continue(block.self, block.next_block);
|
||||
}
|
||||
else
|
||||
emit_block_chain(get<SPIRBlock>(block.next_block));
|
||||
}
|
||||
|
||||
if (block.merge == SPIRBlock::MergeLoop)
|
||||
@ -8982,8 +9045,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
else
|
||||
end_scope();
|
||||
|
||||
flush_phi(block.self, block.merge_block);
|
||||
emit_block_chain(get<SPIRBlock>(block.merge_block));
|
||||
// We cannot break out of two loops at once, so don't check for break; here.
|
||||
// Using block.self as the "from" block isn't quite right, but it has the same scope
|
||||
// and dominance structure, so it's fine.
|
||||
if (is_continue(block.merge_block))
|
||||
branch_to_continue(block.self, block.merge_block);
|
||||
else
|
||||
emit_block_chain(get<SPIRBlock>(block.merge_block));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -356,6 +356,7 @@ protected:
|
||||
void propagate_loop_dominators(const SPIRBlock &block);
|
||||
|
||||
void branch(uint32_t from, uint32_t to);
|
||||
void branch_to_continue(uint32_t from, uint32_t to);
|
||||
void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block);
|
||||
void flush_phi(uint32_t from, uint32_t to);
|
||||
bool flush_phi_required(uint32_t from, uint32_t to);
|
||||
|
Loading…
Reference in New Issue
Block a user