mirror of
https://github.com/KhronosGroup/SPIRV-Tools
synced 2024-11-22 11:40:05 +00:00
Rewrite local-single-store-elim to not create large data structures.
The local-single-store-elim algorithm is not fundamentally bad. However, when there are a large number of variables, some of the maps that are used can become very large. These large data structures then take a very long time to be destroyed. I've seen cases around 40% if the time. I've rewritten that algorithm to not use as much memory. This give a significant improvement when running a large number of shader through DXC. I've also made a small change to local-single-block-elim to delete the loads that is has replaced. That way local-single-store-elim will not have to look at those. local-single-store-elim now does the same thing. The time for one set goes from 309s down to 126s. For another set, the time goes from 102s down to 88s.
This commit is contained in:
parent
0fa42996b5
commit
c20a718e00
@ -56,16 +56,12 @@ bool DominatorAnalysisBase::Dominates(ir::Instruction* a,
|
||||
return tree_.Dominates(bb_a, bb_b);
|
||||
}
|
||||
|
||||
for (ir::Instruction& inst : *bb_a) {
|
||||
if (&inst == a) {
|
||||
ir::Instruction* current_inst = a;
|
||||
while ((current_inst = current_inst->NextNode())) {
|
||||
if (current_inst == b) {
|
||||
return true;
|
||||
} else if (&inst == b) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
assert(false &&
|
||||
"We did not find the load or store in the block they are "
|
||||
"supposed to be in.");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -51,10 +51,10 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
|
||||
ir::Function* func) {
|
||||
// Perform local store/load and load/load elimination on each block
|
||||
bool modified = false;
|
||||
std::vector<ir::Instruction*> instructions_to_kill;
|
||||
for (auto bi = func->begin(); bi != func->end(); ++bi) {
|
||||
var2store_.clear();
|
||||
var2load_.clear();
|
||||
pinned_vars_.clear();
|
||||
auto next = bi->begin();
|
||||
for (auto ii = next; ii != bi->end(); ii = next) {
|
||||
++next;
|
||||
@ -67,18 +67,11 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
|
||||
if (!HasOnlySupportedRefs(varId)) continue;
|
||||
// Register the store
|
||||
if (ptrInst->opcode() == SpvOpVariable) {
|
||||
// if not pinned, look for WAW
|
||||
if (pinned_vars_.find(varId) == pinned_vars_.end()) {
|
||||
auto si = var2store_.find(varId);
|
||||
if (si != var2store_.end()) {
|
||||
}
|
||||
}
|
||||
var2store_[varId] = &*ii;
|
||||
} else {
|
||||
assert(IsNonPtrAccessChain(ptrInst->opcode()));
|
||||
var2store_.erase(varId);
|
||||
}
|
||||
pinned_vars_.erase(varId);
|
||||
var2load_.erase(varId);
|
||||
} break;
|
||||
case SpvOpLoad: {
|
||||
@ -104,11 +97,11 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
|
||||
// replace load's result id and delete load
|
||||
context()->KillNamesAndDecorates(&*ii);
|
||||
context()->ReplaceAllUsesWith(ii->result_id(), replId);
|
||||
instructions_to_kill.push_back(&*ii);
|
||||
modified = true;
|
||||
} else {
|
||||
if (ptrInst->opcode() == SpvOpVariable)
|
||||
var2load_[varId] = &*ii; // register load
|
||||
pinned_vars_.insert(varId);
|
||||
}
|
||||
} break;
|
||||
case SpvOpFunctionCall: {
|
||||
@ -116,13 +109,17 @@ bool LocalSingleBlockLoadStoreElimPass::LocalSingleBlockLoadStoreElim(
|
||||
// TODO(): Handle more optimally
|
||||
var2store_.clear();
|
||||
var2load_.clear();
|
||||
pinned_vars_.clear();
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (ir::Instruction* inst : instructions_to_kill) {
|
||||
context()->KillInst(inst);
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
@ -167,6 +164,7 @@ Pass::Status LocalSingleBlockLoadStoreElimPass::ProcessImpl() {
|
||||
ProcessFunction pfn = [this](ir::Function* fp) {
|
||||
return LocalSingleBlockLoadStoreElim(fp);
|
||||
};
|
||||
|
||||
bool modified = ProcessEntryPointCallTree(pfn, get_module());
|
||||
return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
|
||||
}
|
||||
|
@ -30,218 +30,24 @@ const uint32_t kVariableInitIdInIdx = 1;
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
bool LocalSingleStoreElimPass::HasOnlySupportedRefs(uint32_t ptrId) {
|
||||
if (supported_ref_ptrs_.find(ptrId) != supported_ref_ptrs_.end()) return true;
|
||||
if (get_def_use_mgr()->WhileEachUser(ptrId, [this](ir::Instruction* user) {
|
||||
SpvOp op = user->opcode();
|
||||
if (IsNonPtrAccessChain(op) || op == SpvOpCopyObject) {
|
||||
if (!HasOnlySupportedRefs(user->result_id())) {
|
||||
return false;
|
||||
}
|
||||
} else if (op != SpvOpStore && op != SpvOpLoad && op != SpvOpName &&
|
||||
!IsNonTypeDecorate(op)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})) {
|
||||
supported_ref_ptrs_.insert(ptrId);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void LocalSingleStoreElimPass::SingleStoreAnalyze(ir::Function* func) {
|
||||
ssa_var2store_.clear();
|
||||
non_ssa_vars_.clear();
|
||||
store2idx_.clear();
|
||||
store2blk_.clear();
|
||||
for (auto bi = func->begin(); bi != func->end(); ++bi) {
|
||||
uint32_t instIdx = 0;
|
||||
for (auto ii = bi->begin(); ii != bi->end(); ++ii, ++instIdx) {
|
||||
uint32_t varId = 0;
|
||||
ir::Instruction* ptrInst = nullptr;
|
||||
switch (ii->opcode()) {
|
||||
case SpvOpStore: {
|
||||
ptrInst = GetPtr(&*ii, &varId);
|
||||
} break;
|
||||
case SpvOpVariable: {
|
||||
// If initializer, treat like store
|
||||
if (ii->NumInOperands() > 1) {
|
||||
varId = ii->result_id();
|
||||
ptrInst = &*ii;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
} // switch
|
||||
if (varId == 0) continue;
|
||||
// Verify variable is target type
|
||||
if (non_ssa_vars_.find(varId) != non_ssa_vars_.end()) continue;
|
||||
if (ptrInst->opcode() != SpvOpVariable) {
|
||||
non_ssa_vars_.insert(varId);
|
||||
ssa_var2store_.erase(varId);
|
||||
continue;
|
||||
}
|
||||
// Verify target type and function storage class
|
||||
if (!IsTargetVar(varId)) {
|
||||
non_ssa_vars_.insert(varId);
|
||||
continue;
|
||||
}
|
||||
if (!HasOnlySupportedRefs(varId)) {
|
||||
non_ssa_vars_.insert(varId);
|
||||
continue;
|
||||
}
|
||||
// Ignore variables with multiple stores
|
||||
if (ssa_var2store_.find(varId) != ssa_var2store_.end()) {
|
||||
non_ssa_vars_.insert(varId);
|
||||
ssa_var2store_.erase(varId);
|
||||
continue;
|
||||
}
|
||||
// Remember pointer to variable's store and it's
|
||||
// ordinal position in block
|
||||
ssa_var2store_[varId] = &*ii;
|
||||
store2idx_[&*ii] = instIdx;
|
||||
store2blk_[&*ii] = &*bi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LocalSingleStoreElimPass::GetBlocksFunction
|
||||
LocalSingleStoreElimPass::AugmentedCFGSuccessorsFunction() const {
|
||||
return [this](const ir::BasicBlock* block) {
|
||||
auto asmi = augmented_successors_map_.find(block);
|
||||
if (asmi != augmented_successors_map_.end()) return &(*asmi).second;
|
||||
auto smi = successors_map_.find(block);
|
||||
return &(*smi).second;
|
||||
};
|
||||
}
|
||||
|
||||
LocalSingleStoreElimPass::GetBlocksFunction
|
||||
LocalSingleStoreElimPass::AugmentedCFGPredecessorsFunction() const {
|
||||
return [this](const ir::BasicBlock* block) {
|
||||
auto apmi = augmented_predecessors_map_.find(block);
|
||||
if (apmi != augmented_predecessors_map_.end()) return &(*apmi).second;
|
||||
auto pmi = predecessors_map_.find(block);
|
||||
return &(*pmi).second;
|
||||
};
|
||||
}
|
||||
|
||||
void LocalSingleStoreElimPass::CalculateImmediateDominators(
|
||||
ir::Function* func) {
|
||||
// Compute CFG
|
||||
vector<ir::BasicBlock*> ordered_blocks;
|
||||
predecessors_map_.clear();
|
||||
successors_map_.clear();
|
||||
for (auto& blk : *func) {
|
||||
ordered_blocks.push_back(&blk);
|
||||
const auto& const_blk = blk;
|
||||
const_blk.ForEachSuccessorLabel([&blk, this](const uint32_t sbid) {
|
||||
successors_map_[&blk].push_back(label2block_[sbid]);
|
||||
predecessors_map_[label2block_[sbid]].push_back(&blk);
|
||||
});
|
||||
}
|
||||
// Compute Augmented CFG
|
||||
augmented_successors_map_.clear();
|
||||
augmented_predecessors_map_.clear();
|
||||
successors_map_[cfg()->pseudo_exit_block()] = {};
|
||||
predecessors_map_[cfg()->pseudo_entry_block()] = {};
|
||||
auto succ_func = [this](const ir::BasicBlock* b) {
|
||||
return &successors_map_[b];
|
||||
};
|
||||
auto pred_func = [this](const ir::BasicBlock* b) {
|
||||
return &predecessors_map_[b];
|
||||
};
|
||||
CFA<ir::BasicBlock>::ComputeAugmentedCFG(
|
||||
ordered_blocks, cfg()->pseudo_entry_block(), cfg()->pseudo_exit_block(),
|
||||
&augmented_successors_map_, &augmented_predecessors_map_, succ_func,
|
||||
pred_func);
|
||||
// Compute Dominators
|
||||
vector<const ir::BasicBlock*> postorder;
|
||||
auto ignore_block = [](cbb_ptr) {};
|
||||
auto ignore_edge = [](cbb_ptr, cbb_ptr) {};
|
||||
spvtools::CFA<ir::BasicBlock>::DepthFirstTraversal(
|
||||
ordered_blocks[0], AugmentedCFGSuccessorsFunction(), ignore_block,
|
||||
[&](cbb_ptr b) { postorder.push_back(b); }, ignore_edge);
|
||||
auto edges = spvtools::CFA<ir::BasicBlock>::CalculateDominators(
|
||||
postorder, AugmentedCFGPredecessorsFunction());
|
||||
idom_.clear();
|
||||
for (auto edge : edges) idom_[edge.first] = edge.second;
|
||||
}
|
||||
|
||||
bool LocalSingleStoreElimPass::Dominates(ir::BasicBlock* blk0, uint32_t idx0,
|
||||
ir::BasicBlock* blk1, uint32_t idx1) {
|
||||
if (blk0 == blk1) return idx0 <= idx1;
|
||||
ir::BasicBlock* b = blk1;
|
||||
while (idom_[b] != b) {
|
||||
b = idom_[b];
|
||||
if (b == blk0) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool LocalSingleStoreElimPass::SingleStoreProcess(ir::Function* func) {
|
||||
CalculateImmediateDominators(func);
|
||||
bool modified = false;
|
||||
for (auto bi = func->begin(); bi != func->end(); ++bi) {
|
||||
uint32_t instIdx = 0;
|
||||
for (auto ii = bi->begin(); ii != bi->end(); ++ii, ++instIdx) {
|
||||
if (ii->opcode() != SpvOpLoad) continue;
|
||||
uint32_t varId;
|
||||
ir::Instruction* ptrInst = GetPtr(&*ii, &varId);
|
||||
// Skip access chain loads
|
||||
if (ptrInst->opcode() != SpvOpVariable) continue;
|
||||
const auto vsi = ssa_var2store_.find(varId);
|
||||
if (vsi == ssa_var2store_.end()) continue;
|
||||
if (non_ssa_vars_.find(varId) != non_ssa_vars_.end()) continue;
|
||||
// store must dominate load
|
||||
if (!Dominates(store2blk_[vsi->second], store2idx_[vsi->second], &*bi,
|
||||
instIdx))
|
||||
continue;
|
||||
// Determine replacement id depending on OpStore or OpVariable
|
||||
uint32_t replId;
|
||||
if (vsi->second->opcode() == SpvOpStore)
|
||||
replId = vsi->second->GetSingleWordInOperand(kStoreValIdInIdx);
|
||||
else
|
||||
replId = vsi->second->GetSingleWordInOperand(kVariableInitIdInIdx);
|
||||
// Replace all instances of the load's id with the SSA value's id
|
||||
// and add load to removal list
|
||||
context()->KillNamesAndDecorates(&*ii);
|
||||
context()->ReplaceAllUsesWith(ii->result_id(), replId);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
return modified;
|
||||
}
|
||||
|
||||
bool LocalSingleStoreElimPass::LocalSingleStoreElim(ir::Function* func) {
|
||||
bool modified = false;
|
||||
SingleStoreAnalyze(func);
|
||||
if (ssa_var2store_.empty()) return false;
|
||||
modified |= SingleStoreProcess(func);
|
||||
|
||||
// Check all function scope variables in |func|.
|
||||
ir::BasicBlock* entry_block = &*func->begin();
|
||||
for (ir::Instruction& inst : *entry_block) {
|
||||
if (inst.opcode() != SpvOpVariable) {
|
||||
break;
|
||||
}
|
||||
|
||||
modified |= ProcessVariable(&inst);
|
||||
}
|
||||
return modified;
|
||||
}
|
||||
|
||||
void LocalSingleStoreElimPass::Initialize(ir::IRContext* irContext) {
|
||||
InitializeProcessing(irContext);
|
||||
|
||||
// Initialize function and block maps
|
||||
label2block_.clear();
|
||||
for (auto& fn : *get_module()) {
|
||||
for (auto& blk : fn) {
|
||||
uint32_t bid = blk.id();
|
||||
label2block_[bid] = &blk;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize Target Type Caches
|
||||
seen_target_vars_.clear();
|
||||
seen_non_target_vars_.clear();
|
||||
|
||||
// Initialize Supported Ref Pointer Cache
|
||||
supported_ref_ptrs_.clear();
|
||||
|
||||
// Initialize extension whitelist
|
||||
InitExtensions();
|
||||
InitExtensionWhiteList();
|
||||
}
|
||||
|
||||
bool LocalSingleStoreElimPass::AllExtensionsSupported() const {
|
||||
@ -259,11 +65,7 @@ Pass::Status LocalSingleStoreElimPass::ProcessImpl() {
|
||||
// Assumes relaxed logical addressing only (see instruction.h)
|
||||
if (context()->get_feature_mgr()->HasCapability(SpvCapabilityAddresses))
|
||||
return Status::SuccessWithoutChange;
|
||||
// Do not process if module contains OpGroupDecorate. Additional
|
||||
// support required in KillNamesAndDecorates().
|
||||
// TODO(greg-lunarg): Add support for OpGroupDecorate
|
||||
for (auto& ai : get_module()->annotations())
|
||||
if (ai.opcode() == SpvOpGroupDecorate) return Status::SuccessWithoutChange;
|
||||
|
||||
// Do not process if any disallowed extensions are enabled
|
||||
if (!AllExtensionsSupported()) return Status::SuccessWithoutChange;
|
||||
// Process all entry point functions
|
||||
@ -281,8 +83,7 @@ Pass::Status LocalSingleStoreElimPass::Process(ir::IRContext* irContext) {
|
||||
return ProcessImpl();
|
||||
}
|
||||
|
||||
void LocalSingleStoreElimPass::InitExtensions() {
|
||||
extensions_whitelist_.clear();
|
||||
void LocalSingleStoreElimPass::InitExtensionWhiteList() {
|
||||
extensions_whitelist_.insert({
|
||||
"SPV_AMD_shader_explicit_vertex_parameter",
|
||||
"SPV_AMD_shader_trinary_minmax",
|
||||
@ -319,6 +120,128 @@ void LocalSingleStoreElimPass::InitExtensions() {
|
||||
"SPV_EXT_descriptor_indexing",
|
||||
});
|
||||
}
|
||||
bool LocalSingleStoreElimPass::ProcessVariable(ir::Instruction* var_inst) {
|
||||
vector<ir::Instruction*> users;
|
||||
FindUses(var_inst, &users);
|
||||
|
||||
ir::Instruction* store_inst = FindSingleStoreAndCheckUses(var_inst, users);
|
||||
|
||||
if (store_inst == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return RewriteLoads(store_inst, users);
|
||||
}
|
||||
|
||||
ir::Instruction* LocalSingleStoreElimPass::FindSingleStoreAndCheckUses(
|
||||
ir::Instruction* var_inst, const vector<ir::Instruction*>& users) const {
|
||||
// Make sure there is exactly 1 store.
|
||||
ir::Instruction* store_inst = nullptr;
|
||||
|
||||
// If |var_inst| has an initializer, then that will count as a store.
|
||||
if (var_inst->NumInOperands() > 1) {
|
||||
store_inst = var_inst;
|
||||
}
|
||||
|
||||
for (ir::Instruction* user : users) {
|
||||
switch (user->opcode()) {
|
||||
case SpvOpStore:
|
||||
// Since we are in the relaxed addressing mode, the use has to be the
|
||||
// base address of the store, and not the value being store. Otherwise,
|
||||
// we would have a pointer to a pointer to function scope memory, which
|
||||
// is not allowed.
|
||||
if (store_inst == nullptr) {
|
||||
store_inst = user;
|
||||
} else {
|
||||
// More than 1 store.
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
case SpvOpAccessChain:
|
||||
case SpvOpInBoundsAccessChain:
|
||||
if (FeedsAStore(user)) {
|
||||
// Has a partial store. Cannot propagate that.
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
case SpvOpLoad:
|
||||
case SpvOpImageTexelPointer:
|
||||
case SpvOpName:
|
||||
case SpvOpCopyObject:
|
||||
break;
|
||||
default:
|
||||
if (!user->IsDecoration()) {
|
||||
// Don't know if this instruction modifies the variable.
|
||||
// Conservatively assume it is a store.
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return store_inst;
|
||||
}
|
||||
|
||||
void LocalSingleStoreElimPass::FindUses(
|
||||
const ir::Instruction* var_inst,
|
||||
std::vector<ir::Instruction*>* users) const {
|
||||
analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
|
||||
def_use_mgr->ForEachUser(var_inst, [users, this](ir::Instruction* user) {
|
||||
users->push_back(user);
|
||||
if (user->opcode() == SpvOpCopyObject) {
|
||||
FindUses(user, users);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bool LocalSingleStoreElimPass::FeedsAStore(ir::Instruction* inst) const {
|
||||
analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
|
||||
return !def_use_mgr->WhileEachUser(inst, [this](ir::Instruction* user) {
|
||||
switch (user->opcode()) {
|
||||
case SpvOpStore:
|
||||
return false;
|
||||
case SpvOpAccessChain:
|
||||
case SpvOpInBoundsAccessChain:
|
||||
case SpvOpCopyObject:
|
||||
return !FeedsAStore(user);
|
||||
case SpvOpLoad:
|
||||
case SpvOpImageTexelPointer:
|
||||
case SpvOpName:
|
||||
return true;
|
||||
default:
|
||||
// Don't know if this instruction modifies the variable.
|
||||
// Conservatively assume it is a store.
|
||||
return user->IsDecoration();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bool LocalSingleStoreElimPass::RewriteLoads(
|
||||
ir::Instruction* store_inst, const std::vector<ir::Instruction*>& uses) {
|
||||
ir::BasicBlock* store_block = context()->get_instr_block(store_inst);
|
||||
opt::DominatorAnalysis* dominator_analysis =
|
||||
context()->GetDominatorAnalysis(store_block->GetParent(), *cfg());
|
||||
|
||||
uint32_t stored_id;
|
||||
if (store_inst->opcode() == SpvOpStore)
|
||||
stored_id = store_inst->GetSingleWordInOperand(kStoreValIdInIdx);
|
||||
else
|
||||
stored_id = store_inst->GetSingleWordInOperand(kVariableInitIdInIdx);
|
||||
|
||||
std::vector<ir::Instruction*> uses_in_store_block;
|
||||
bool modified = false;
|
||||
for (ir::Instruction* use : uses) {
|
||||
if (use->opcode() == SpvOpLoad) {
|
||||
if (dominator_analysis->Dominates(store_inst, use)) {
|
||||
modified = true;
|
||||
context()->KillNamesAndDecorates(use->result_id());
|
||||
context()->ReplaceAllUsesWith(use->result_id(), stored_id);
|
||||
context()->KillInst(use);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
} // namespace opt
|
||||
} // namespace spvtools
|
||||
|
@ -33,7 +33,7 @@ namespace spvtools {
|
||||
namespace opt {
|
||||
|
||||
// See optimizer.hpp for documentation.
|
||||
class LocalSingleStoreElimPass : public MemPass {
|
||||
class LocalSingleStoreElimPass : public Pass {
|
||||
using cbb_ptr = const ir::BasicBlock*;
|
||||
|
||||
public:
|
||||
@ -46,45 +46,6 @@ class LocalSingleStoreElimPass : public MemPass {
|
||||
}
|
||||
|
||||
private:
|
||||
// Return true if all refs through |ptrId| are only loads or stores and
|
||||
// cache ptrId in supported_ref_ptrs_. TODO(dnovillo): This function is
|
||||
// replicated in other passes and it's slightly different in every pass. Is it
|
||||
// possible to make one common implementation?
|
||||
bool HasOnlySupportedRefs(uint32_t ptrId);
|
||||
|
||||
// Find all function scope variables in |func| that are stored to
|
||||
// only once (SSA) and map to their stored value id. Only analyze
|
||||
// variables of scalar, vector, matrix types and struct and array
|
||||
// types comprising only these types. Currently this analysis is
|
||||
// is not done in the presence of function calls. TODO(): Allow
|
||||
// analysis in the presence of function calls.
|
||||
void SingleStoreAnalyze(ir::Function* func);
|
||||
|
||||
using GetBlocksFunction =
|
||||
std::function<const std::vector<ir::BasicBlock*>*(const ir::BasicBlock*)>;
|
||||
|
||||
/// Returns the block successors function for the augmented CFG.
|
||||
GetBlocksFunction AugmentedCFGSuccessorsFunction() const;
|
||||
|
||||
/// Returns the block predecessors function for the augmented CFG.
|
||||
GetBlocksFunction AugmentedCFGPredecessorsFunction() const;
|
||||
|
||||
// Calculate immediate dominators for |func|'s CFG. Leaves result
|
||||
// in idom_. Entries for augmented CFG (pseudo blocks) are not created.
|
||||
// TODO(dnovillo): Move to new CFG class.
|
||||
void CalculateImmediateDominators(ir::Function* func);
|
||||
|
||||
// Return true if instruction in |blk0| at ordinal position |idx0|
|
||||
// dominates instruction in |blk1| at position |idx1|.
|
||||
bool Dominates(ir::BasicBlock* blk0, uint32_t idx0, ir::BasicBlock* blk1,
|
||||
uint32_t idx1);
|
||||
|
||||
// For each load of an SSA variable in |func|, replace all uses of
|
||||
// the load with the value stored if the store dominates the load.
|
||||
// Assumes that SingleStoreAnalyze() has just been run. Return true
|
||||
// if any instructions are modified.
|
||||
bool SingleStoreProcess(ir::Function* func);
|
||||
|
||||
// Do "single-store" optimization of function variables defined only
|
||||
// with a single non-access-chain store in |func|. Replace all their
|
||||
// non-access-chain loads with the value that is stored and eliminate
|
||||
@ -92,7 +53,7 @@ class LocalSingleStoreElimPass : public MemPass {
|
||||
bool LocalSingleStoreElim(ir::Function* func);
|
||||
|
||||
// Initialize extensions whitelist
|
||||
void InitExtensions();
|
||||
void InitExtensionWhiteList();
|
||||
|
||||
// Return true if all extensions in this module are allowed by this pass.
|
||||
bool AllExtensionsSupported() const;
|
||||
@ -100,44 +61,34 @@ class LocalSingleStoreElimPass : public MemPass {
|
||||
void Initialize(ir::IRContext* irContext);
|
||||
Pass::Status ProcessImpl();
|
||||
|
||||
// Map from block's label id to block
|
||||
std::unordered_map<uint32_t, ir::BasicBlock*> label2block_;
|
||||
// If there is a single store to |var_inst|, and it covers the entire
|
||||
// variable, then replace all of the loads of the entire variable that are
|
||||
// dominated by the store by the value that was stored. Returns true if the
|
||||
// module was changed.
|
||||
bool ProcessVariable(ir::Instruction* var_inst);
|
||||
|
||||
// Map from SSA Variable to its single store
|
||||
std::unordered_map<uint32_t, ir::Instruction*> ssa_var2store_;
|
||||
// Collects all of the uses of |var_inst| into |uses|. This looks through
|
||||
// OpObjectCopy's that copy the address of the variable, and collects those
|
||||
// uses as well.
|
||||
void FindUses(const ir::Instruction* var_inst,
|
||||
std::vector<ir::Instruction*>* uses) const;
|
||||
|
||||
// Map from store to its ordinal position in its block.
|
||||
std::unordered_map<ir::Instruction*, uint32_t> store2idx_;
|
||||
// Returns a store to |var_inst| if
|
||||
// - it is a store to the entire variable,
|
||||
// - and there are no other instructions that may modify |var_inst|.
|
||||
ir::Instruction* FindSingleStoreAndCheckUses(
|
||||
ir::Instruction* var_inst,
|
||||
const std::vector<ir::Instruction*>& users) const;
|
||||
|
||||
// Map from store to its block.
|
||||
std::unordered_map<ir::Instruction*, ir::BasicBlock*> store2blk_;
|
||||
// Returns true if the address that results from |inst| may be used as a base
|
||||
// address in a store instruction or may be used to compute the base address
|
||||
// of a store instruction.
|
||||
bool FeedsAStore(ir::Instruction* inst) const;
|
||||
|
||||
// Set of non-SSA Variables
|
||||
std::unordered_set<uint32_t> non_ssa_vars_;
|
||||
|
||||
// Variables with only supported references, ie. loads and stores using
|
||||
// variable directly or through non-ptr access chains.
|
||||
std::unordered_set<uint32_t> supported_ref_ptrs_;
|
||||
|
||||
// CFG Predecessors
|
||||
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
|
||||
predecessors_map_;
|
||||
|
||||
// CFG Successors
|
||||
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
|
||||
successors_map_;
|
||||
|
||||
// CFG Augmented Predecessors
|
||||
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
|
||||
augmented_predecessors_map_;
|
||||
|
||||
// CFG Augmented Successors
|
||||
std::unordered_map<const ir::BasicBlock*, std::vector<ir::BasicBlock*>>
|
||||
augmented_successors_map_;
|
||||
|
||||
// Immediate Dominator Map
|
||||
// If block has no idom it points to itself.
|
||||
std::unordered_map<ir::BasicBlock*, ir::BasicBlock*> idom_;
|
||||
// Replaces all of the loads in |uses| by the value stored in |store_inst|.
|
||||
// The load instructions are then killed.
|
||||
bool RewriteLoads(ir::Instruction* store_inst,
|
||||
const std::vector<ir::Instruction*>& uses);
|
||||
|
||||
// Extensions supported by this pass.
|
||||
std::unordered_set<std::string> extensions_whitelist_;
|
||||
|
@ -73,7 +73,6 @@ OpFunctionEnd
|
||||
%v = OpVariable %_ptr_Function_v4float Function
|
||||
%14 = OpLoad %v4float %BaseColor
|
||||
OpStore %v %14
|
||||
%15 = OpLoad %v4float %v
|
||||
OpStore %gl_FragColor %14
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@ -174,7 +173,6 @@ OpBranch %29
|
||||
%31 = OpLoad %v4float %v
|
||||
%32 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_0
|
||||
OpStore %32 %31
|
||||
%33 = OpLoad %v4float %v
|
||||
%34 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_1
|
||||
OpStore %34 %31
|
||||
OpReturn
|
||||
@ -263,8 +261,6 @@ OpStore %v %19
|
||||
%21 = OpAccessChain %_ptr_Function_float %v %20
|
||||
%22 = OpLoad %float %21
|
||||
OpStore %f %22
|
||||
%23 = OpLoad %v4float %v
|
||||
%24 = OpLoad %float %f
|
||||
%25 = OpCompositeConstruct %v4float %22 %22 %22 %22
|
||||
%26 = OpFDiv %v4float %19 %25
|
||||
OpStore %gl_FragColor %26
|
||||
@ -464,14 +460,12 @@ OpFunctionEnd
|
||||
%v2 = OpVariable %_ptr_Function_v4float Function
|
||||
%23 = OpLoad %v4float %BaseColor
|
||||
OpStore %v1 %23
|
||||
%24 = OpLoad %v4float %v1
|
||||
%25 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_0
|
||||
OpStore %25 %23
|
||||
%26 = OpLoad %v4float %BaseColor
|
||||
%27 = OpVectorTimesScalar %v4float %26 %float_0_5
|
||||
%28 = OpCopyObject %_ptr_Function_v4float %v2
|
||||
OpStore %28 %27
|
||||
%29 = OpLoad %v4float %28
|
||||
%30 = OpAccessChain %_ptr_Output_v4float %gl_FragData %int_1
|
||||
OpStore %30 %27
|
||||
OpReturn
|
||||
@ -562,14 +556,10 @@ OpFunctionEnd
|
||||
%31 = OpCompositeInsert %S_t %29 %30 0
|
||||
OpStore %s0 %31
|
||||
%32 = OpLoad %18 %sampler15
|
||||
%33 = OpLoad %S_t %s0
|
||||
%34 = OpCompositeInsert %S_t %32 %31 2
|
||||
OpStore %s0 %34
|
||||
%35 = OpLoad %S_t %s0
|
||||
OpStore %param %34
|
||||
%36 = OpLoad %S_t %param
|
||||
%37 = OpCompositeExtract %18 %34 2
|
||||
%38 = OpLoad %S_t %param
|
||||
%39 = OpCompositeExtract %v2float %34 0
|
||||
%40 = OpImageSampleImplicitLod %v4float %37 %39
|
||||
OpStore %outColor %40
|
||||
@ -681,7 +671,6 @@ OpFunctionEnd
|
||||
%t_0 = OpVariable %_ptr_Function_v4float Function
|
||||
%27 = OpLoad %v4float %v1_0
|
||||
OpStore %t_0 %27
|
||||
%28 = OpLoad %v4float %t_0
|
||||
OpReturnValue %27
|
||||
OpFunctionEnd
|
||||
)";
|
||||
@ -778,7 +767,6 @@ OpDecorate %7 Binding 0
|
||||
%23 = OpLabel
|
||||
%24 = OpVariable %_ptr_Function__ptr_Uniform__struct_5 Function
|
||||
OpStore %24 %7
|
||||
%26 = OpLoad %_ptr_Uniform__struct_5 %24
|
||||
%27 = OpAccessChain %_ptr_Uniform_v4float %7 %int_0 %uint_0 %int_0
|
||||
%28 = OpLoad %v4float %27
|
||||
%29 = OpCopyObject %v4float %28
|
||||
|
@ -112,7 +112,6 @@ OpBranchConditional %23 %25 %24
|
||||
OpStore %f %float_0
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
%26 = OpLoad %v4float %v
|
||||
%27 = OpLoad %float %f
|
||||
%28 = OpCompositeConstruct %v4float %27 %27 %27 %27
|
||||
%29 = OpFAdd %v4float %20 %28
|
||||
@ -125,6 +124,72 @@ OpFunctionEnd
|
||||
predefs + before, predefs + after, true, true);
|
||||
}
|
||||
|
||||
TEST_F(LocalSingleStoreElimTest, ThreeStores) {
|
||||
// Three stores to multiple loads of v is not optimized.
|
||||
|
||||
const std::string predefs =
|
||||
R"(OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %BaseColor %fi %gl_FragColor
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource GLSL 140
|
||||
OpName %main "main"
|
||||
OpName %v "v"
|
||||
OpName %BaseColor "BaseColor"
|
||||
OpName %fi "fi"
|
||||
OpName %r "r"
|
||||
OpName %gl_FragColor "gl_FragColor"
|
||||
%void = OpTypeVoid
|
||||
%9 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Function_v4float = OpTypePointer Function %v4float
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%BaseColor = OpVariable %_ptr_Input_v4float Input
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%fi = OpVariable %_ptr_Input_float Input
|
||||
%float_0 = OpConstant %float 0
|
||||
%bool = OpTypeBool
|
||||
%float_1 = OpConstant %float 1
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%gl_FragColor = OpVariable %_ptr_Output_v4float Output
|
||||
)";
|
||||
|
||||
const std::string before =
|
||||
R"(%main = OpFunction %void None %9
|
||||
%19 = OpLabel
|
||||
%v = OpVariable %_ptr_Function_v4float Function
|
||||
%r = OpVariable %_ptr_Function_v4float Function
|
||||
%20 = OpLoad %v4float %BaseColor
|
||||
OpStore %v %20
|
||||
%21 = OpLoad %float %fi
|
||||
%22 = OpFOrdLessThan %bool %21 %float_0
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %22 %24 %25
|
||||
%24 = OpLabel
|
||||
%26 = OpLoad %v4float %v
|
||||
OpStore %v %26
|
||||
OpStore %r %26
|
||||
OpBranch %23
|
||||
%25 = OpLabel
|
||||
%27 = OpLoad %v4float %v
|
||||
%28 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
|
||||
OpStore %v %28
|
||||
%29 = OpFSub %v4float %28 %27
|
||||
OpStore %r %29
|
||||
OpBranch %23
|
||||
%23 = OpLabel
|
||||
%30 = OpLoad %v4float %r
|
||||
OpStore %gl_FragColor %30
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
)";
|
||||
|
||||
SinglePassRunAndCheck<opt::LocalSingleStoreElimPass>(
|
||||
predefs + before, predefs + before, true, true);
|
||||
}
|
||||
|
||||
TEST_F(LocalSingleStoreElimTest, MultipleLoads) {
|
||||
// Single store to multiple loads of v is optimized.
|
||||
//
|
||||
@ -211,11 +276,9 @@ OpStore %v %20
|
||||
OpSelectionMerge %23 None
|
||||
OpBranchConditional %22 %24 %25
|
||||
%24 = OpLabel
|
||||
%26 = OpLoad %v4float %v
|
||||
OpStore %r %20
|
||||
OpBranch %23
|
||||
%25 = OpLabel
|
||||
%27 = OpLoad %v4float %v
|
||||
%28 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
|
||||
%29 = OpFSub %v4float %28 %20
|
||||
OpStore %r %29
|
||||
@ -299,8 +362,6 @@ OpStore %v %18
|
||||
%19 = OpAccessChain %_ptr_Function_float %v %uint_3
|
||||
%20 = OpLoad %float %19
|
||||
OpStore %f %20
|
||||
%21 = OpLoad %v4float %v
|
||||
%22 = OpLoad %float %f
|
||||
%23 = OpVectorTimesScalar %v4float %18 %20
|
||||
OpStore %gl_FragColor %23
|
||||
OpReturn
|
||||
@ -457,7 +518,6 @@ OpStore %f %float_0
|
||||
OpBranch %24
|
||||
%24 = OpLabel
|
||||
%26 = OpCopyObject %_ptr_Function_v4float %v
|
||||
%27 = OpLoad %v4float %26
|
||||
%28 = OpLoad %float %f
|
||||
%29 = OpCompositeConstruct %v4float %28 %28 %28 %28
|
||||
%30 = OpFAdd %v4float %20 %29
|
||||
@ -599,7 +659,6 @@ OpFunctionEnd
|
||||
R"(%main = OpFunction %void None %6
|
||||
%12 = OpLabel
|
||||
%f = OpVariable %_ptr_Function_float Function %float_0
|
||||
%13 = OpLoad %float %f
|
||||
%14 = OpCompositeConstruct %v4float %float_0 %float_0 %float_0 %float_0
|
||||
OpStore %gl_FragColor %14
|
||||
OpReturn
|
||||
@ -698,7 +757,6 @@ OpDecorate %7 Binding 0
|
||||
%23 = OpLabel
|
||||
%24 = OpVariable %_ptr_Function__ptr_Uniform__struct_5 Function
|
||||
OpStore %24 %7
|
||||
%26 = OpLoad %_ptr_Uniform__struct_5 %24
|
||||
%27 = OpAccessChain %_ptr_Uniform_v4float %7 %int_0 %uint_0 %int_0
|
||||
%28 = OpLoad %v4float %27
|
||||
%29 = OpCopyObject %v4float %28
|
||||
|
Loading…
Reference in New Issue
Block a user