Fix ADCE pass bug for mulitple entries (#3470)

When there are multiple entries and the shader has a variable with
WorkGroup storage class, those multiple entry functions store values to
the variable. Since ADCE pass uses def-use chains to propagate the work
list, some of instructions in the work list are not actually a part of
the currently processed function. As a result, it adds instructions in
other functions and put them in |live_insts_|. However, it does not
have the control flow information for those instructions in other
functions i.e., |block2headerBranch_| and |header2nextHeaderBranch_|.
When it processes those instructions (they are added when it processes a
different function), it skips handling them because they are already in
|live_insts_| and does not check |block2headerBranch_| and
|header2nextHeaderBranch_|, which results in skipping some branches.
Even though those branches are live branches, it considers they are dead
branches.
This commit is contained in:
Jaebaek Seo 2020-06-29 13:08:48 -04:00 committed by GitHub
parent 91c50e3fc9
commit fc0dc3a9c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 139 additions and 13 deletions

View File

@ -105,13 +105,17 @@ bool AggressiveDCEPass::IsLocalVar(uint32_t varId) {
IsVarOfStorage(varId, SpvStorageClassWorkgroup);
}
void AggressiveDCEPass::AddStores(uint32_t ptrId) {
get_def_use_mgr()->ForEachUser(ptrId, [this, ptrId](Instruction* user) {
void AggressiveDCEPass::AddStores(Function* func, uint32_t ptrId) {
get_def_use_mgr()->ForEachUser(ptrId, [this, ptrId, func](Instruction* user) {
// If the user is not a part of |func|, skip it.
BasicBlock* blk = context()->get_instr_block(user);
if (blk && blk->GetParent() != func) return;
switch (user->opcode()) {
case SpvOpAccessChain:
case SpvOpInBoundsAccessChain:
case SpvOpCopyObject:
this->AddStores(user->result_id());
this->AddStores(func, user->result_id());
break;
case SpvOpLoad:
break;
@ -169,13 +173,13 @@ bool AggressiveDCEPass::IsTargetDead(Instruction* inst) {
return IsDead(tInst);
}
void AggressiveDCEPass::ProcessLoad(uint32_t varId) {
void AggressiveDCEPass::ProcessLoad(Function* func, uint32_t varId) {
// Only process locals
if (!IsLocalVar(varId)) return;
// Return if already processed
if (live_local_vars_.find(varId) != live_local_vars_.end()) return;
// Mark all stores to varId as live
AddStores(varId);
AddStores(func, varId);
// Cache varId as processed
live_local_vars_.insert(varId);
}
@ -332,6 +336,7 @@ bool AggressiveDCEPass::AggressiveDCE(Function* func) {
call_in_func_ = false;
func_is_entry_point_ = false;
private_stores_.clear();
live_local_vars_.clear();
// Stacks to keep track of when we are inside an if- or loop-construct.
// When immediately inside an if- or loop-construct, we do not initially
// mark branches live. All other branches must be marked live.
@ -454,7 +459,7 @@ bool AggressiveDCEPass::AggressiveDCE(Function* func) {
uint32_t varId;
(void)GetPtr(liveInst, &varId);
if (varId != 0) {
ProcessLoad(varId);
ProcessLoad(func, varId);
}
// Process memory copies like loads
} else if (liveInst->opcode() == SpvOpCopyMemory ||
@ -463,7 +468,7 @@ bool AggressiveDCEPass::AggressiveDCE(Function* func) {
(void)GetPtr(liveInst->GetSingleWordInOperand(kCopyMemorySourceAddrInIdx),
&varId);
if (varId != 0) {
ProcessLoad(varId);
ProcessLoad(func, varId);
}
// If merge, add other branches that are part of its control structure
} else if (liveInst->opcode() == SpvOpLoopMerge ||
@ -471,23 +476,23 @@ bool AggressiveDCEPass::AggressiveDCE(Function* func) {
AddBreaksAndContinuesToWorklist(liveInst);
// If function call, treat as if it loads from all pointer arguments
} else if (liveInst->opcode() == SpvOpFunctionCall) {
liveInst->ForEachInId([this](const uint32_t* iid) {
liveInst->ForEachInId([this, func](const uint32_t* iid) {
// Skip non-ptr args
if (!IsPtr(*iid)) return;
uint32_t varId;
(void)GetPtr(*iid, &varId);
ProcessLoad(varId);
ProcessLoad(func, varId);
});
// If function parameter, treat as if it's result id is loaded from
} else if (liveInst->opcode() == SpvOpFunctionParameter) {
ProcessLoad(liveInst->result_id());
ProcessLoad(func, liveInst->result_id());
// We treat an OpImageTexelPointer as a load of the pointer, and
// that value is manipulated to get the result.
} else if (liveInst->opcode() == SpvOpImageTexelPointer) {
uint32_t varId;
(void)GetPtr(liveInst, &varId);
if (varId != 0) {
ProcessLoad(varId);
ProcessLoad(func, varId);
}
}

View File

@ -85,7 +85,7 @@ class AggressiveDCEPass : public MemPass {
// Add all store instruction which use |ptrId|, directly or indirectly,
// to the live instruction worklist.
void AddStores(uint32_t ptrId);
void AddStores(Function* func, uint32_t ptrId);
// Initialize extensions allowlist
void InitExtensions();
@ -99,7 +99,7 @@ class AggressiveDCEPass : public MemPass {
bool IsTargetDead(Instruction* inst);
// If |varId| is local, mark all stores of varId as live.
void ProcessLoad(uint32_t varId);
void ProcessLoad(Function* func, uint32_t varId);
// If |bp| is structured header block, returns true and sets |mergeInst| to
// the merge instruction, |branchInst| to the branch and |mergeBlockId| to the

View File

@ -6761,6 +6761,127 @@ OpFunctionEnd
predefs1 + names_after + predefs2_after + func_after, true, true);
}
TEST_F(AggressiveDCETest, MultipleFunctionProcessIndependently) {
const std::string spirv = R"(
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %entryHistogram "entryHistogram" %gl_GlobalInvocationID %gl_LocalInvocationIndex
OpEntryPoint GLCompute %entryAverage "entryAverage" %gl_GlobalInvocationID %gl_LocalInvocationIndex
OpExecutionMode %entryHistogram LocalSize 16 16 1
OpExecutionMode %entryAverage LocalSize 256 1 1
OpSource HLSL 640
OpName %type_RWStructuredBuffer_uint "type.RWStructuredBuffer.uint"
OpName %uHistogram "uHistogram"
OpName %type_ACSBuffer_counter "type.ACSBuffer.counter"
OpMemberName %type_ACSBuffer_counter 0 "counter"
OpName %counter_var_uHistogram "counter.var.uHistogram"
OpName %sharedHistogram "sharedHistogram"
OpName %entryHistogram "entryHistogram"
OpName %param_var_id "param.var.id"
OpName %param_var_idx "param.var.idx"
OpName %entryAverage "entryAverage"
OpName %param_var_id_0 "param.var.id"
OpName %param_var_idx_0 "param.var.idx"
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
OpDecorate %uHistogram DescriptorSet 0
OpDecorate %uHistogram Binding 0
OpDecorate %counter_var_uHistogram DescriptorSet 0
OpDecorate %counter_var_uHistogram Binding 1
OpDecorate %_runtimearr_uint ArrayStride 4
OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
OpDecorate %type_RWStructuredBuffer_uint BufferBlock
OpMemberDecorate %type_ACSBuffer_counter 0 Offset 0
OpDecorate %type_ACSBuffer_counter BufferBlock
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uint_4 = OpConstant %uint 4
%uint_8 = OpConstant %uint 8
%uint_16 = OpConstant %uint 16
%uint_32 = OpConstant %uint 32
%uint_64 = OpConstant %uint 64
%uint_128 = OpConstant %uint 128
%uint_256 = OpConstant %uint 256
%uint_512 = OpConstant %uint 512
%uint_254 = OpConstant %uint 254
%uint_255 = OpConstant %uint 255
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_runtimearr_uint = OpTypeRuntimeArray %uint
%type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
%_ptr_Uniform_type_RWStructuredBuffer_uint = OpTypePointer Uniform %type_RWStructuredBuffer_uint
%type_ACSBuffer_counter = OpTypeStruct %int
%_ptr_Uniform_type_ACSBuffer_counter = OpTypePointer Uniform %type_ACSBuffer_counter
%_arr_uint_uint_256 = OpTypeArray %uint %uint_256
%_ptr_Workgroup__arr_uint_uint_256 = OpTypePointer Workgroup %_arr_uint_uint_256
%v3uint = OpTypeVector %uint 3
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
%_ptr_Input_uint = OpTypePointer Input %uint
%void = OpTypeVoid
%49 = OpTypeFunction %void
%_ptr_Function_v3uint = OpTypePointer Function %v3uint
%_ptr_Function_uint = OpTypePointer Function %uint
%52 = OpTypeFunction %void %_ptr_Function_v3uint %_ptr_Function_uint
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
%uint_264 = OpConstant %uint 264
%bool = OpTypeBool
%_ptr_Uniform_uint = OpTypePointer Uniform %uint
%uHistogram = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_uint Uniform
%counter_var_uHistogram = OpVariable %_ptr_Uniform_type_ACSBuffer_counter Uniform
%sharedHistogram = OpVariable %_ptr_Workgroup__arr_uint_uint_256 Workgroup
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
%entryHistogram = OpFunction %void None %49
%57 = OpLabel
%param_var_id = OpVariable %_ptr_Function_v3uint Function
%param_var_idx = OpVariable %_ptr_Function_uint Function
%58 = OpLoad %v3uint %gl_GlobalInvocationID
%59 = OpLoad %uint %gl_LocalInvocationIndex
%79 = OpAccessChain %_ptr_Workgroup_uint %sharedHistogram %int_0
%80 = OpAtomicIAdd %uint %79 %uint_1 %uint_0 %uint_1
OpReturn
OpFunctionEnd
%entryAverage = OpFunction %void None %49
%63 = OpLabel
%param_var_id_0 = OpVariable %_ptr_Function_v3uint Function
%param_var_idx_0 = OpVariable %_ptr_Function_uint Function
%64 = OpLoad %v3uint %gl_GlobalInvocationID
%65 = OpLoad %uint %gl_LocalInvocationIndex
OpStore %param_var_idx_0 %65
%83 = OpAccessChain %_ptr_Workgroup_uint %sharedHistogram %65
OpStore %83 %uint_0
; CHECK: [[ieq:%\w+]] = OpIEqual
; CHECK-NEXT: OpSelectionMerge [[merge:%\w+]]
; CHECK-NEXT: OpBranchConditional [[ieq]] [[not_elim:%\w+]] [[merge]]
; CHECK-NEXT: [[not_elim]] = OpLabel
; CHECK: [[merge]] = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%85 = OpIEqual %bool %65 %uint_0
OpSelectionMerge %89 None
OpBranchConditional %85 %86 %89
%86 = OpLabel
%88 = OpAccessChain %_ptr_Workgroup_uint %sharedHistogram %65
OpStore %88 %uint_1
OpBranch %89
%89 = OpLabel
OpControlBarrier %uint_2 %uint_2 %uint_264
%91 = OpAccessChain %_ptr_Workgroup_uint %sharedHistogram %65
%92 = OpLoad %uint %91
%94 = OpAccessChain %_ptr_Uniform_uint %uHistogram %int_0 %65
OpStore %94 %92
OpReturn
OpFunctionEnd
)";
SetTargetEnv(SPV_ENV_UNIVERSAL_1_3);
SinglePassRunAndMatch<AggressiveDCEPass>(spirv, true);
}
// TODO(greg-lunarg): Add tests to verify handling of these cases:
//
// Check that logical addressing required