Rewrite local-single-store-elim to not create large data structures.

The local-single-store-elim algorithm is not fundamentally bad.
However, when there are a large number of variables, some of the
maps that are used can become very large.  These large data structures
then take a very long time to be destroyed.  I've seen cases around 40%
if the time.

I've rewritten that algorithm to not use as much memory.  This give a
significant improvement when running a large number of shader through
DXC.

I've also made a small change to local-single-block-elim to delete the
loads that is has replaced.  That way local-single-store-elim will not
have to look at those.  local-single-store-elim now does the same thing.

The time for one set goes from 309s down to 126s.  For another set, the
time goes from 102s down to 88s.
This commit is contained in:
Steven Perron 2018-04-11 11:58:47 -04:00
parent 0fa42996b5
commit c20a718e00
6 changed files with 238 additions and 324 deletions

View File

@ -56,16 +56,12 @@ bool DominatorAnalysisBase::Dominates(ir::Instruction* a,
return tree_.Dominates(bb_a, bb_b);
}
for (ir::Instruction& inst : *bb_a) {
if (&inst == a) {
ir::Instruction* current_inst = a;
while ((current_inst = current_inst->NextNode())) {
if (current_inst == b) {
return true;
} else if (&inst == b) {
return false;
}
}
assert(false &&
"We did not find the load or store in the block they are "
"supposed to be in.");
return false;
}

View File

@ -51,10 +51,10 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
ir::Function* func) {
// Perform local store/load and load/load elimination on each block
bool modified = false;
std::vector<ir::Instruction*> instructions_to_kill;
for (auto bi = func->begin(); bi != func->end(); ++bi) {
var2store_.clear();
var2load_.clear();
pinned_vars_.clear();
auto next = bi->begin();
for (auto ii = next; ii != bi->end(); ii = next) {
++next;
@ -67,18 +67,11 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
if (!HasOnlySupportedRefs(varId)) continue;
// Register the store
if (ptrInst->opcode() == SpvOpVariable) {
// if not pinned, look for WAW
if (pinned_vars_.find(varId) == pinned_vars_.end()) {
auto si = var2store_.find(varId);
if (si != var2store_.end()) {
}
}
var2store_[varId] = &*ii;
} else {
assert(IsNonPtrAccessChain(ptrInst->opcode()));
var2store_.erase(varId);
}
pinned_vars_.erase(varId);
var2load_.erase(varId);
} break;
case SpvOpLoad: {
@ -104,11 +97,11 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
// replace load's result id and delete load
context()->KillNamesAndDecorates(&*ii);
context()->ReplaceAllUsesWith(ii->result_id(), replId);
instructions_to_kill.push_back(&*ii);
modified = true;
} else {
if (ptrInst->opcode() == SpvOpVariable)
var2load_[varId] = &*ii; // register load
pinned_vars_.insert(varId);
}
} break;
case SpvOpFunctionCall: {
@ -116,13 +109,17 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
// TODO(): Handle more optimally
var2store_.clear();
var2load_.clear();
pinned_vars_.clear();
} break;
default:
break;
}
}
}
for (ir::Instruction* inst : instructions_to_kill) {
context()->KillInst(inst);
}
return modified;
}
@ -167,6 +164,7 @@ Pass::Status LocalSingleBlockLoadStoreElimPass::ProcessImpl() {
ProcessFunction pfn = [this](ir::Function* fp) {
return LocalSingleBlockLoadStoreElim(fp);
};
bool modified = ProcessEntryPointCallTree(pfn, get_module());
return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}

View File

@ -30,218 +30,24 @@ const uint32_t kVariableInitIdInIdx = 1;
} // anonymous namespace
bool LocalSingleStoreElimPass::HasOnlySupportedRefs(uint32_t ptrId) {
if (supported_ref_ptrs_.find(ptrId) != supported_ref_ptrs_.end()) return true;
if (get_def_use_mgr()->WhileEachUser(ptrId, [this](ir::Instruction* user) {
SpvOp op = user->opcode();
if (IsNonPtrAccessChain(op) || op == SpvOpCopyObject) {
if (!HasOnlySupportedRefs(user->result_id())) {
return false;
}
} else if (op != SpvOpStore && op != SpvOpLoad && op != SpvOpName &&
!IsNonTypeDecorate(op)) {
return false;
}
return true;
})) {
supported_ref_ptrs_.insert(ptrId);
return true;
}
return false;
}
void LocalSingleStoreElimPass::SingleStoreAnalyze(ir::Function* func) {
ssa_var2store_.clear();
non_ssa_vars_.clear();
store2idx_.clear();
store2blk_.clear();
for (auto bi = func->begin(); bi != func->end(); ++bi) {
uint32_t instIdx = 0;
for (auto ii = bi->begin(); ii != bi->end(); ++ii, ++instIdx) {
uint32_t varId = 0;
ir::Instruction* ptrInst = nullptr;
switch (ii->opcode()) {
case SpvOpStore: {
ptrInst = GetPtr(&*ii, &varId);
} break;
case SpvOpVariable: {
// If initializer, treat like store
if (ii->NumInOperands() > 1) {
varId = ii->result_id();
ptrInst = &*ii;
}
} break;
default:
break;
} // switch
if (varId == 0) continue;
// Verify variable is target type
if (non_ssa_vars_.find(varId) != non_ssa_vars_.end()) continue;
if (ptrInst->opcode() != SpvOpVariable) {
non_ssa_vars_.insert(varId);
ssa_var2store_.erase(varId);
continue;
}
// Verify target type and function storage class
if (!IsTargetVar(varId)) {
non_ssa_vars_.insert(varId);
continue;
}
if (!HasOnlySupportedRefs(varId)) {
non_ssa_vars_.insert(varId);
continue;
}
// Ignore variables with multiple stores
if (ssa_var2store_.find(varId) != ssa_var2store_.end()) {
non_ssa_vars_.insert(varId);
ssa_var2store_.erase(varId);
continue;
}
// Remember pointer to variable's store and it's
// ordinal position in block
ssa_var2store_[varId] = &*ii;
store2idx_[&*ii] = instIdx;
store2blk_[&*ii] = &*bi;
}
}
}
LocalSingleStoreElimPass::GetBlocksFunction
LocalSingleStoreElimPass::AugmentedCFGSuccessorsFunction() const {
return [this](const ir::BasicBlock* block) {
auto asmi = augmented_successors_map_.find(block);
if (asmi != augmented_successors_map_.end()) return &(*asmi).second;
auto smi = successors_map_.find(block);
return &(*smi).second;
};
}
LocalSingleStoreElimPass::GetBlocksFunction
LocalSingleStoreElimPass::AugmentedCFGPredecessorsFunction() const {
return [this](const ir::BasicBlock* block) {
auto apmi = augmented_predecessors_map_.find(block);
if (apmi != augmented_predecessors_map_.end()) return &(*apmi).second;
auto pmi = predecessors_map_.find(block);
return &(*pmi).second;
};
}
void LocalSingleStoreElimPass::CalculateImmediateDominators(
ir::Function* func) {
// Compute CFG
vector<ir::BasicBlock*> ordered_blocks;
predecessors_map_.clear();
successors_map_.clear();
for (auto& blk : *func) {
ordered_blocks.push_back(&blk);
const auto& const_blk = blk;
const_blk.ForEachSuccessorLabel([&blk, this](const uint32_t sbid) {
successors_map_[&blk].push_back(label2block_[sbid]);
predecessors_map_[label2block_[sbid]].push_back(&blk);
});
}
// Compute Augmented CFG
augmented_successors_map_.clear();
augmented_predecessors_map_.clear();
successors_map_[cfg()->pseudo_exit_block()] = {};
predecessors_map_[cfg()->pseudo_entry_block()] = {};
auto succ_func = [this](const ir::BasicBlock* b) {
return &successors_map_[b];
};
auto pred_func = [this](const ir::BasicBlock* b) {
return &predecessors_map_[b];
};
CFA<ir::BasicBlock>::ComputeAugmentedCFG(
ordered_blocks, cfg()->pseudo_entry_block(), cfg()->pseudo_exit_block(),
&augmented_successors_map_, &augmented_predecessors_map_, succ_func,
pred_func);
// Compute Dominators
vector<const ir::BasicBlock*> postorder;
auto ignore_block = [](cbb_ptr) {};
auto ignore_edge = [](cbb_ptr, cbb_ptr) {};
spvtools::CFA<ir::BasicBlock>::DepthFirstTraversal(
ordered_blocks[0], AugmentedCFGSuccessorsFunction(), ignore_block,
[&](cbb_ptr b) { postorder.push_back(b); }, ignore_edge);
auto edges = spvtools::CFA<ir::BasicBlock>::CalculateDominators(
postorder, AugmentedCFGPredecessorsFunction());
idom_.clear();
for (auto edge : edges) idom_[edge.first] = edge.second;
}
bool LocalSingleStoreElimPass::Dominates(ir::BasicBlock* blk0, uint32_t idx0,
ir::BasicBlock* blk1, uint32_t idx1) {
if (blk0 == blk1) return idx0 <= idx1;
ir::BasicBlock* b = blk1;
while (idom_[b] != b) {
b = idom_[b];
if (b == blk0) return true;
}
return false;
}
bool LocalSingleStoreElimPass::SingleStoreProcess(ir::Function* func) {
CalculateImmediateDominators(func);
bool modified = false;
for (auto bi = func->begin(); bi != func->end(); ++bi) {
uint32_t instIdx = 0;
for (auto ii = bi->begin(); ii != bi->end(); ++ii, ++instIdx) {
if (ii->opcode() != SpvOpLoad) continue;
uint32_t varId;
ir::Instruction* ptrInst = GetPtr(&*ii, &varId);
// Skip access chain loads
if (ptrInst->opcode() != SpvOpVariable) continue;
const auto vsi = ssa_var2store_.find(varId);
if (vsi == ssa_var2store_.end()) continue;
if (non_ssa_vars_.find(varId) != non_ssa_vars_.end()) continue;
// store must dominate load
if (!Dominates(store2blk_[vsi->second], store2idx_[vsi->second], &*bi,
instIdx))
continue;
// Determine replacement id depending on OpStore or OpVariable
uint32_t replId;
if (vsi->second->opcode() == SpvOpStore)
replId = vsi->second->GetSingleWordInOperand(kStoreValIdInIdx);
else
replId = vsi->second->GetSingleWordInOperand(kVariableInitIdInIdx);
// Replace all instances of the load's id with the SSA value's id
// and add load to removal list
context()->KillNamesAndDecorates(&*ii);
context()->ReplaceAllUsesWith(ii->result_id(), replId);
modified = true;
}
}
return modified;
}
bool LocalSingleStoreElimPass::LocalSingleStoreElim(ir::Function* func) {
bool modified = false;
SingleStoreAnalyze(func);
if (ssa_var2store_.empty()) return false;
modified |= SingleStoreProcess(func);
// Check all function scope variables in |func|.
ir::BasicBlock* entry_block = &*func->begin();
for (ir::Instruction& inst : *entry_block) {
if (inst.opcode() != SpvOpVariable) {
break;
}
modified |= ProcessVariable(&inst);
}
return modified;
}
void LocalSingleStoreElimPass::Initialize(ir::IRContext* irContext) {
InitializeProcessing(irContext);
// Initialize function and block maps
label2block_.clear();
for (auto& fn : *get_module()) {
for (auto& blk : fn) {
uint32_t bid = blk.id();
label2block_[bid] = &blk;
}
}
// Initialize Target Type Caches
seen_target_vars_.clear();
seen_non_target_vars_.clear();
// Initialize Supported Ref Pointer Cache
supported_ref_ptrs_.clear();
// Initialize extension whitelist
InitExtensions();
InitExtensionWhiteList();
}
bool LocalSingleStoreElimPass::AllExtensionsSupported() const {
@ -259,11 +65,7 @@ Pass::Status LocalSingleStoreElimPass::ProcessImpl() {
// Assumes relaxed logical addressing only (see instruction.h)
if (context()->get_feature_mgr()->HasCapability(SpvCapabilityAddresses))
return Status::SuccessWithoutChange;
// Do not process if module contains OpGroupDecorate. Additional
// support required in KillNamesAndDecorates().
// TODO(greg-lunarg): Add support for OpGroupDecorate
for (auto& ai : get_module()->annotations())
if (ai.opcode() == SpvOpGroupDecorate) return Status::SuccessWithoutChange;
// Do not process if any disallowed extensions are enabled
if (!AllExtensionsSupported()) return Status::SuccessWithoutChange;
// Process all entry point functions
@ -281,8 +83,7 @@ Pass::Status LocalSingleStoreElimPass::Process(ir::IRContext* irContext) {
return ProcessImpl();
}
void LocalSingleStoreElimPass::InitExtensions() {
extensions_whitelist_.clear();
void LocalSingleStoreElimPass::InitExtensionWhiteList() {
extensions_whitelist_.insert({
"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
@ -319,6 +120,128 @@ void LocalSingleStoreElimPass::InitExtensions() {
"SPV_EXT_descriptor_indexing",
});
}
bool LocalSingleStoreElimPass::ProcessVariable(ir::Instruction* var_inst) {
vector<ir::Instruction*> users;
FindUses(var_inst, &users);
ir::Instruction* store_inst = FindSingleStoreAndCheckUses(var_inst, users);
if (store_inst == nullptr) {
return false;
}
return RewriteLoads(store_inst, users);
}
ir::Instruction* LocalSingleStoreElimPass::FindSingleStoreAndCheckUses(
ir::Instruction* var_inst, const vector<ir::Instruction*>& users) const {
// Make sure there is exactly 1 store.
ir::Instruction* store_inst = nullptr;
// If |var_inst| has an initializer, then that will count as a store.
if (var_inst->NumInOperands() > 1) {
store_inst = var_inst;
}
for (ir::Instruction* user : users) {
switch (user->opcode()) {
case SpvOpStore:
// Since we are in the relaxed addressing mode, the use has to be the
// base address of the store, and not the value being store. Otherwise,
// we would have a pointer to a pointer to function scope memory, which
// is not allowed.
if (store_inst == nullptr) {
store_inst = user;
} else {
// More than 1 store.
return nullptr;
}
break;
case SpvOpAccessChain:
case SpvOpInBoundsAccessChain:
if (FeedsAStore(user)) {
// Has a partial store. Cannot propagate that.
return nullptr;
}
break;
case SpvOpLoad:
case SpvOpImageTexelPointer:
case SpvOpName:
case SpvOpCopyObject:
break;
default:
if (!user->IsDecoration()) {
// Don't know if this instruction modifies the variable.
// Conservatively assume it is a store.
return nullptr;
}
break;
}
}
return store_inst;
}
void LocalSingleStoreElimPass::FindUses(
const ir::Instruction* var_inst,
std::vector<ir::Instruction*>* users) const {
analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
def_use_mgr->ForEachUser(var_inst, [users, this](ir::Instruction* user) {
users->push_back(user);
if (user->opcode() == SpvOpCopyObject) {
FindUses(user, users);
}
});
}
bool LocalSingleStoreElimPass::FeedsAStore(ir::Instruction* inst) const {
analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
return !def_use_mgr->WhileEachUser(inst, [this](ir::Instruction* user) {
switch (user->opcode()) {
case SpvOpStore:
return false;
case SpvOpAccessChain:
case SpvOpInBoundsAccessChain:
case SpvOpCopyObject:
return !FeedsAStore(user);
case SpvOpLoad:
case SpvOpImageTexelPointer:
case SpvOpName:
return true;
default:
// Don't know if this instruction modifies the variable.
// Conservatively assume it is a store.
return user->IsDecoration();
}
});
}
bool LocalSingleStoreElimPass::RewriteLoads(
ir::Instruction* store_inst, const std::vector<ir::Instruction*>& uses) {
ir::BasicBlock* store_block = context()->get_instr_block(store_inst);
opt::DominatorAnalysis* dominator_analysis =
context()->GetDominatorAnalysis(store_block->GetParent(), *cfg());
uint32_t stored_id;
if (store_inst->opcode() == SpvOpStore)
stored_id = store_inst->GetSingleWordInOperand(kStoreValIdInIdx);
else
stored_id = store_inst->GetSingleWordInOperand(kVariableInitIdInIdx);
std::vector<ir::Instruction*> uses_in_store_block;
bool modified = false;
for (ir::Instruction* use : uses) {
if (use->opcode() == SpvOpLoad) {
if (dominator_analysis->Dominates(store_inst, use)) {
modified = true;
context()->KillNamesAndDecorates(use->result_id());
context()->ReplaceAllUsesWith(use->result_id(), stored_id);
context()->KillInst(use);
}
}
}
return modified;
}
} // namespace opt
} // namespace spvtools

View File

@ -33,7 +33,7 @@ namespace spvtools {
namespace opt {
// See optimizer.hpp for documentation.
class LocalSingleStoreElimPass : public MemPass {
class LocalSingleStoreElimPass : public Pass {
using cbb_ptr = const ir::BasicBlock*;
public:
@ -46,45 +46,6 @@ class LocalSingleStoreElimPass : public MemPass {
}
private:
// Return true if all refs through |ptrId| are only loads or stores and
// cache ptrId in supported_ref_ptrs_. TODO(dnovillo): This function is
// replicated in other passes and it's slightly different in every pass. Is it
// possible to make one common implementation?
bool HasOnlySupportedRefs(uint32_t ptrId);
// Find all function scope variables in |func| that are stored to
// only once (SSA) and map to their stored value id. Only analyze
// variables of scalar, vector, matrix types and struct and array
// types comprising only these types. Currently this analysis is
// is not done in the presence of function calls. TODO(): Allow
// analysis in the presence of function calls.
void SingleStoreAnalyze(ir::Function* func);
using GetBlocksFunction =
std::function<const std::vector<ir::BasicBlock*>*(const ir::BasicBlock*)>;
/// Returns the block successors function for the augmented CFG.
GetBlocksFunction AugmentedCFGSuccessorsFunction() const;
/// Returns the block predecessors function for the augmented CFG.
GetBlocksFunction AugmentedCFGPredecessorsFunction() const;
// Calculate immediate dominators for |func|'s CFG. Leaves result
// in idom_. Entries for augmented CFG (pseudo blocks) are not created.
// TODO(dnovillo): Move to new CFG class.
void CalculateImmediateDominators(ir::Function* func);
// Return true if instruction in |blk0| at ordinal position |idx0|
// dominates instruction in |blk1| at position |idx1|.
bool Dominates(ir::BasicBlock* blk0, uint32_t idx0, ir::BasicBlock* blk1,
uint32_t idx1);
// For each load of an SSA variable in |func|, replace all uses of
// the load with the value stored if the store dominates the load.
// Assumes that SingleStoreAnalyze() has just been run. Return true
// if any instructions are modified.
bool SingleStoreProcess(ir::Function* func);
// Do "single-store" optimization of function variables defined only
// with a single non-access-chain store in |func|. Replace all their
// non-access-chain loads with the value that is stored and eliminate
@ -92,7 +53,7 @@ class LocalSingleStoreElimPass : public MemPass {
bool LocalSingleStoreElim(ir::Function* func);
// Initialize extensions whitelist
void InitExtensions();
void InitExtensionWhiteList();
// Return true if all extensions in this module are allowed by this pass.
bool AllExtensionsSupported() const;
@ -100,44 +61,34 @@ class LocalSingleStoreElimPass : public MemPass {
void Initialize(ir::IRContext* irContext);
Pass::Status ProcessImpl();
// Map from block's label id to block
std::unordered_map<uint32_t, ir::BasicBlock*> label2block_;
// If there is a single store to |var_inst|, and it covers the entire
// variable, then replace all of the loads of the entire variable that are
// dominated by the store by the value that was stored. Returns true if the
// module was changed.
bool ProcessVariable(ir::Instruction* var_inst);
// Map from SSA Variable to its single store
std::unordered_map<uint32_t, ir::Instruction*> ssa_var2store_;
// Collects all of the uses of |var_inst| into |uses|. This looks through
// OpObjectCopy's that copy the address of the variable, and collects those
// uses as well.
void FindUses(const ir::Instruction* var_inst,
std::vector<ir::Instruction*>* uses) const;
// Map from store to its ordinal position in its block.
std::unordered_map<ir::Instruction*, uint32_t> store2idx_;
// Returns a store to |var_inst| if
// - it is a store to the entire variable,
// - and there are no other instructions that may modify |var_inst|.
ir::Instruction* FindSingleStoreAndCheckUses(
ir::Instruction* var_inst,
const std::vector<ir::Instruction*>& users) const;
// Map from store to its block.
std::unordered_map<ir::Instruction*, ir::BasicBlock*> store2blk_;
// Returns true if the address that results from |inst| may be used as a base
// address in a store instruction or may be used to compute the base address
// of a store instruction.
bool FeedsAStore(ir::Instruction* inst) const;
// Set of non-SSA Variables
std::unordered_set<uint32_t> non_ssa_vars_;
// Variables with only supported references, ie. loads and stores using
// variable directly or through non-ptr access chains.
std::unordered_set<uint32_t> supported_ref_ptrs_;
// CFG Predecessors
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
predecessors_map_;
// CFG Successors
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
successors_map_;
// CFG Augmented Predecessors
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
augmented_predecessors_map_;
// CFG Augmented Successors
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
augmented_successors_map_;
// Immediate Dominator Map
// If block has no idom it points to itself.
std::unordered_map<ir::BasicBlock*, ir::BasicBlock*> idom_;
// Replaces all of the loads in |uses| by the value stored in |store_inst|.
// The load instructions are then killed.
bool RewriteLoads(ir::Instruction* store_inst,
const std::vector<ir::Instruction*>& uses);
// Extensions supported by this pass.
std::unordered_set<std::string> extensions_whitelist_;

View File

@ -73,7 +73,6 @@ OpFunctionEnd
%v = OpVariable %_ptr_Function_v4float Function
%14 = OpLoad %v4float %BaseColor
OpStore %v %14
%15 = OpLoad %v4float %v
OpStore %gl_FragColor %14
OpReturn
OpFunctionEnd
@ -174,7 +173,6 @@ OpBranch %29
%31 = OpLoad %v4float %v
%32 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_0
OpStore %32 %31
%33 = OpLoad %v4float %v
%34 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_1
OpStore %34 %31
OpReturn
@ -263,8 +261,6 @@ OpStore %v %19
%21 = OpAccessChain %_ptr_Function_float %v %20
%22 = OpLoad %float %21
OpStore %f %22
%23 = OpLoad %v4float %v
%24 = OpLoad %float %f
%25 = OpCompositeConstruct %v4float %22 %22 %22 %22
%26 = OpFDiv %v4float %19 %25
OpStore %gl_FragColor %26
@ -464,14 +460,12 @@ OpFunctionEnd
%v2 = OpVariable %_ptr_Function_v4float Function
%23 = OpLoad %v4float %BaseColor
OpStore %v1 %23
%24 = OpLoad %v4float %v1
%25 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_0
OpStore %25 %23
%26 = OpLoad %v4float %BaseColor
%27 = OpVectorTimesScalar %v4float %26 %float_0_5
%28 = OpCopyObject %_ptr_Function_v4float %v2
OpStore %28 %27
%29 = OpLoad %v4float %28
%30 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_1
OpStore %30 %27
OpReturn
@ -562,14 +556,10 @@ OpFunctionEnd
%31 = OpCompositeInsert %S_t %29 %30 0
OpStore %s0 %31
%32 = OpLoad %18 %sampler15
%33 = OpLoad %S_t %s0
%34 = OpCompositeInsert %S_t %32 %31 2
OpStore %s0 %34
%35 = OpLoad %S_t %s0
OpStore %param %34
%36 = OpLoad %S_t %param
%37 = OpCompositeExtract %18 %34 2
%38 = OpLoad %S_t %param
%39 = OpCompositeExtract %v2float %34 0
%40 = OpImageSampleImplicitLod %v4float %37 %39
OpStore %outColor %40
@ -681,7 +671,6 @@ OpFunctionEnd
%t_0 = OpVariable %_ptr_Function_v4float Function
%27 = OpLoad %v4float %v1_0
OpStore %t_0 %27
%28 = OpLoad %v4float %t_0
OpReturnValue %27
OpFunctionEnd
)";
@ -778,7 +767,6 @@ OpDecorate %7 Binding 0
%23 = OpLabel
%24 = OpVariable %_ptr_Function__ptr_Uniform__struct_5 Function
OpStore %24 %7
%26 = OpLoad %_ptr_Uniform__struct_5 %24
%27 = OpAccessChain %_ptr_Uniform_v4float %7 %int_0 %uint_0 %int_0
%28 = OpLoad %v4float %27
%29 = OpCopyObject %v4float %28

View File

@ -112,7 +112,6 @@ OpBranchConditional %23 %25 %24
OpStore %f %float_0
OpBranch %24
%24 = OpLabel
%26 = OpLoad %v4float %v
%27 = OpLoad %float %f
%28 = OpCompositeConstruct %v4float %27 %27 %27 %27
%29 = OpFAdd %v4float %20 %28
@ -125,6 +124,72 @@ OpFunctionEnd
predefs + before, predefs + after, true, true);
}
TEST_F(LocalSingleStoreElimTest, ThreeStores) {
// Three stores to multiple loads of v is not optimized.
const std::string predefs =
R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %BaseColor %fi %gl_FragColor
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 140
OpName %main "main"
OpName %v "v"
OpName %BaseColor "BaseColor"
OpName %fi "fi"
OpName %r "r"
OpName %gl_FragColor "gl_FragColor"
%void = OpTypeVoid
%9 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_Function_v4float = OpTypePointer Function %v4float
%_ptr_Input_v4float = OpTypePointer Input %v4float
%BaseColor = OpVariable %_ptr_Input_v4float Input
%_ptr_Input_float = OpTypePointer Input %float
%fi = OpVariable %_ptr_Input_float Input
%float_0 = OpConstant %float 0
%bool = OpTypeBool
%float_1 = OpConstant %float 1
%_ptr_Output_v4float = OpTypePointer Output %v4float
%gl_FragColor = OpVariable %_ptr_Output_v4float Output
)";
const std::string before =
R"(%main = OpFunction %void None %9
%19 = OpLabel
%v = OpVariable %_ptr_Function_v4float Function
%r = OpVariable %_ptr_Function_v4float Function
%20 = OpLoad %v4float %BaseColor
OpStore %v %20
%21 = OpLoad %float %fi
%22 = OpFOrdLessThan %bool %21 %float_0
OpSelectionMerge %23 None
OpBranchConditional %22 %24 %25
%24 = OpLabel
%26 = OpLoad %v4float %v
OpStore %v %26
OpStore %r %26
OpBranch %23
%25 = OpLabel
%27 = OpLoad %v4float %v
%28 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
OpStore %v %28
%29 = OpFSub %v4float %28 %27
OpStore %r %29
OpBranch %23
%23 = OpLabel
%30 = OpLoad %v4float %r
OpStore %gl_FragColor %30
OpReturn
OpFunctionEnd
)";
SinglePassRunAndCheck<opt::LocalSingleStoreElimPass>(
predefs + before, predefs + before, true, true);
}
TEST_F(LocalSingleStoreElimTest, MultipleLoads) {
// Single store to multiple loads of v is optimized.
//
@ -211,11 +276,9 @@ OpStore %v %20
OpSelectionMerge %23 None
OpBranchConditional %22 %24 %25
%24 = OpLabel
%26 = OpLoad %v4float %v
OpStore %r %20
OpBranch %23
%25 = OpLabel
%27 = OpLoad %v4float %v
%28 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
%29 = OpFSub %v4float %28 %20
OpStore %r %29
@ -299,8 +362,6 @@ OpStore %v %18
%19 = OpAccessChain %_ptr_Function_float %v %uint_3
%20 = OpLoad %float %19
OpStore %f %20
%21 = OpLoad %v4float %v
%22 = OpLoad %float %f
%23 = OpVectorTimesScalar %v4float %18 %20
OpStore %gl_FragColor %23
OpReturn
@ -457,7 +518,6 @@ OpStore %f %float_0
OpBranch %24
%24 = OpLabel
%26 = OpCopyObject %_ptr_Function_v4float %v
%27 = OpLoad %v4float %26
%28 = OpLoad %float %f
%29 = OpCompositeConstruct %v4float %28 %28 %28 %28
%30 = OpFAdd %v4float %20 %29
@ -599,7 +659,6 @@ OpFunctionEnd
R"(%main = OpFunction %void None %6
%12 = OpLabel
%f = OpVariable %_ptr_Function_float Function %float_0
%13 = OpLoad %float %f
%14 = OpCompositeConstruct %v4float %float_0 %float_0 %float_0 %float_0
OpStore %gl_FragColor %14
OpReturn
@ -698,7 +757,6 @@ OpDecorate %7 Binding 0
%23 = OpLabel
%24 = OpVariable %_ptr_Function__ptr_Uniform__struct_5 Function
OpStore %24 %7
%26 = OpLoad %_ptr_Uniform__struct_5 %24
%27 = OpAccessChain %_ptr_Uniform_v4float %7 %int_0 %uint_0 %int_0
%28 = OpLoad %v4float %27
%29 = OpCopyObject %v4float %28