Initial patch for scalar evolution analysis

This patch adds support for the analysis of scalars in loops. It works
by traversing the defuse chain to build a DAG of scalar operations and
then simplifies the DAG by folding constants and grouping like terms.
It represents induction variables as recurrent expressions with respect
to a given loop and can simplify DAGs containing recurrent expression by
rewritting the entire DAG to be a recurrent expression with respect to
the same loop.
This commit is contained in:
Stephen McGroarty 2018-03-28 14:19:55 +01:00 committed by Diego Novillo
parent c26866ee74
commit ad7e4b8401
10 changed files with 2909 additions and 2 deletions

View File

@ -118,6 +118,8 @@ SPVTOOLS_OPT_SRC_FILES := \
source/opt/redundancy_elimination.cpp \
source/opt/remove_duplicates_pass.cpp \
source/opt/replace_invalid_opc.cpp \
source/opt/scalar_analysis.cpp \
source/opt/scalar_analysis_simplification.cpp \
source/opt/scalar_replacement_pass.cpp \
source/opt/set_spec_constant_default_value_pass.cpp \
source/opt/simplification_pass.cpp \

View File

@ -77,6 +77,8 @@ add_library(SPIRV-Tools-opt
reflect.h
remove_duplicates_pass.h
replace_invalid_opc.h
scalar_analysis.h
scalar_analysis_nodes.h
scalar_replacement_pass.h
set_spec_constant_default_value_pass.h
simplification_pass.h
@ -151,6 +153,8 @@ add_library(SPIRV-Tools-opt
redundancy_elimination.cpp
remove_duplicates_pass.cpp
replace_invalid_opc.cpp
scalar_analysis.cpp
scalar_analysis_simplification.cpp
scalar_replacement_pass.cpp
set_spec_constant_default_value_pass.cpp
simplification_pass.cpp

View File

@ -45,6 +45,9 @@ void IRContext::BuildInvalidAnalyses(IRContext::Analysis set) {
if (set & kAnalysisNameMap) {
BuildIdToNameMap();
}
if (set & kAnalysisScalarEvolution) {
BuildScalarEvolutionAnalysis();
}
}
void IRContext::InvalidateAnalysesExceptFor(

View File

@ -24,6 +24,7 @@
#include "feature_manager.h"
#include "loop_descriptor.h"
#include "module.h"
#include "scalar_analysis.h"
#include "type_manager.h"
#include <algorithm>
@ -58,7 +59,8 @@ class IRContext {
kAnalysisDominatorAnalysis = 1 << 5,
kAnalysisLoopAnalysis = 1 << 6,
kAnalysisNameMap = 1 << 7,
kAnalysisEnd = 1 << 8
kAnalysisScalarEvolution = 1 << 8,
kAnalysisEnd = 1 << 9
};
friend inline Analysis operator|(Analysis lhs, Analysis rhs);
@ -258,6 +260,15 @@ class IRContext {
return type_mgr_.get();
}
// Returns a pointer to the scalar evolution analysis. If it is invalid it
// will be rebuilt first.
opt::ScalarEvolutionAnalysis* GetScalarEvolutionAnalysis() {
if (!AreAnalysesValid(kAnalysisScalarEvolution)) {
BuildScalarEvolutionAnalysis();
}
return scalar_evolution_analysis_.get();
}
// Build the map from the ids to the OpName and OpMemberName instruction
// associated with it.
inline void BuildIdToNameMap();
@ -444,6 +455,11 @@ class IRContext {
valid_analyses_ = valid_analyses_ | kAnalysisCFG;
}
void BuildScalarEvolutionAnalysis() {
scalar_evolution_analysis_.reset(new opt::ScalarEvolutionAnalysis(this));
valid_analyses_ = valid_analyses_ | kAnalysisScalarEvolution;
}
// Removes all computed dominator and post-dominator trees. This will force
// the context to rebuild the trees on demand.
void ResetDominatorAnalysis() {
@ -544,6 +560,9 @@ class IRContext {
// A map from an id to its corresponding OpName and OpMemberName instructions.
std::unique_ptr<std::multimap<uint32_t, Instruction*>> id_to_name_;
// The cache scalar evolution analysis node.
std::unique_ptr<opt::ScalarEvolutionAnalysis> scalar_evolution_analysis_;
};
inline ir::IRContext::Analysis operator|(ir::IRContext::Analysis lhs,

View File

@ -0,0 +1,638 @@
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "opt/scalar_analysis.h"
#include <algorithm>
#include <functional>
#include <string>
#include <utility>
#include "opt/ir_context.h"
// Transforms a given scalar operation instruction into a DAG representation.
//
// 1. Take an instruction and traverse its operands until we reach a
// constant node or an instruction which we do not know how to compute the
// value, such as a load.
//
// 2. Create a new node for each instruction traversed and build the nodes for
// the in operands of that instruction as well.
//
// 3. Add the operand nodes as children of the first and hash the node. Use the
// hash to see if the node is already in the cache. We ensure the children are
// always in sorted order so that two nodes with the same children but inserted
// in a different order have the same hash and so that the overloaded operator==
// will return true. If the node is already in the cache return the cached
// version instead.
//
// 4. The created DAG can then be simplified by
// ScalarAnalysis::SimplifyExpression, implemented in
// scalar_analysis_simplification.cpp. See that file for further information on
// the simplification process.
//
namespace spvtools {
namespace opt {
uint32_t SENode::NumberOfNodes = 0;
ScalarEvolutionAnalysis::ScalarEvolutionAnalysis(ir::IRContext* context)
: context_(context) {
// Create and cached the CantComputeNode.
cached_cant_compute_ =
GetCachedOrAdd(std::unique_ptr<SECantCompute>(new SECantCompute(this)));
}
SENode* ScalarEvolutionAnalysis::CreateNegation(SENode* operand) {
// If operand is can't compute then the whole graph is can't compute.
if (operand->IsCantCompute()) return CreateCantComputeNode();
if (operand->GetType() == SENode::Constant) {
return CreateConstant(-operand->AsSEConstantNode()->FoldToSingleValue());
}
std::unique_ptr<SENode> negation_node{new SENegative(this)};
negation_node->AddChild(operand);
return GetCachedOrAdd(std::move(negation_node));
}
SENode* ScalarEvolutionAnalysis::CreateConstant(int64_t integer) {
return GetCachedOrAdd(
std::unique_ptr<SENode>(new SEConstantNode(this, integer)));
}
SENode* ScalarEvolutionAnalysis::CreateRecurrentExpression(
const ir::Loop* loop, SENode* offset, SENode* coefficient) {
assert(loop && "Recurrent add expressions must have a valid loop.");
// If operands are can't compute then the whole graph is can't compute.
if (offset->IsCantCompute() || coefficient->IsCantCompute())
return CreateCantComputeNode();
std::unique_ptr<SERecurrentNode> phi_node{new SERecurrentNode(this, loop)};
phi_node->AddOffset(offset);
phi_node->AddCoefficient(coefficient);
return GetCachedOrAdd(std::move(phi_node));
}
SENode* ScalarEvolutionAnalysis::AnalyzeMultiplyOp(
const ir::Instruction* multiply) {
assert(multiply->opcode() == SpvOp::SpvOpIMul &&
"Multiply node did not come from a multiply instruction");
opt::analysis::DefUseManager* def_use = context_->get_def_use_mgr();
SENode* op1 =
AnalyzeInstruction(def_use->GetDef(multiply->GetSingleWordInOperand(0)));
SENode* op2 =
AnalyzeInstruction(def_use->GetDef(multiply->GetSingleWordInOperand(1)));
return CreateMultiplyNode(op1, op2);
}
SENode* ScalarEvolutionAnalysis::CreateMultiplyNode(SENode* operand_1,
SENode* operand_2) {
// If operands are can't compute then the whole graph is can't compute.
if (operand_1->IsCantCompute() || operand_2->IsCantCompute())
return CreateCantComputeNode();
if (operand_1->GetType() == SENode::Constant &&
operand_2->GetType() == SENode::Constant) {
return CreateConstant(operand_1->AsSEConstantNode()->FoldToSingleValue() *
operand_2->AsSEConstantNode()->FoldToSingleValue());
}
std::unique_ptr<SENode> multiply_node{new SEMultiplyNode(this)};
multiply_node->AddChild(operand_1);
multiply_node->AddChild(operand_2);
return GetCachedOrAdd(std::move(multiply_node));
}
SENode* ScalarEvolutionAnalysis::CreateSubtraction(SENode* operand_1,
SENode* operand_2) {
// Fold if both operands are constant.
if (operand_1->GetType() == SENode::Constant &&
operand_2->GetType() == SENode::Constant) {
return CreateConstant(operand_1->AsSEConstantNode()->FoldToSingleValue() -
operand_2->AsSEConstantNode()->FoldToSingleValue());
}
return CreateAddNode(operand_1, CreateNegation(operand_2));
}
SENode* ScalarEvolutionAnalysis::CreateAddNode(SENode* operand_1,
SENode* operand_2) {
// Fold if both operands are constant and the |simplify| flag is true.
if (operand_1->GetType() == SENode::Constant &&
operand_2->GetType() == SENode::Constant) {
return CreateConstant(operand_1->AsSEConstantNode()->FoldToSingleValue() +
operand_2->AsSEConstantNode()->FoldToSingleValue());
}
// If operands are can't compute then the whole graph is can't compute.
if (operand_1->IsCantCompute() || operand_2->IsCantCompute())
return CreateCantComputeNode();
std::unique_ptr<SENode> add_node{new SEAddNode(this)};
add_node->AddChild(operand_1);
add_node->AddChild(operand_2);
return GetCachedOrAdd(std::move(add_node));
}
SENode* ScalarEvolutionAnalysis::AnalyzeInstruction(
const ir::Instruction* inst) {
auto itr = recurrent_node_map_.find(inst);
if (itr != recurrent_node_map_.end()) return itr->second;
SENode* output = nullptr;
switch (inst->opcode()) {
case SpvOp::SpvOpPhi: {
output = AnalyzePhiInstruction(inst);
break;
}
case SpvOp::SpvOpConstant:
case SpvOp::SpvOpConstantNull: {
output = AnalyzeConstant(inst);
break;
}
case SpvOp::SpvOpISub:
case SpvOp::SpvOpIAdd: {
output = AnalyzeAddOp(inst);
break;
}
case SpvOp::SpvOpIMul: {
output = AnalyzeMultiplyOp(inst);
break;
}
default: {
output = CreateValueUnknownNode(inst);
break;
}
}
return output;
}
SENode* ScalarEvolutionAnalysis::AnalyzeConstant(const ir::Instruction* inst) {
if (inst->opcode() == SpvOp::SpvOpConstantNull) return CreateConstant(0);
assert(inst->opcode() == SpvOp::SpvOpConstant);
assert(inst->NumInOperands() == 1);
int64_t value = 0;
// Look up the instruction in the constant manager.
const opt::analysis::Constant* constant =
context_->get_constant_mgr()->FindDeclaredConstant(inst->result_id());
if (!constant) return CreateCantComputeNode();
const opt::analysis::IntConstant* int_constant = constant->AsIntConstant();
// Exit out if it is a 64 bit integer.
if (!int_constant || int_constant->words().size() != 1)
return CreateCantComputeNode();
if (int_constant->type()->AsInteger()->IsSigned()) {
value = int_constant->GetS32BitValue();
} else {
value = int_constant->GetU32BitValue();
}
return CreateConstant(value);
}
// Handles both addition and subtraction. If the |sub| flag is set then the
// addition will be op1+(-op2) otherwise op1+op2.
SENode* ScalarEvolutionAnalysis::AnalyzeAddOp(const ir::Instruction* inst) {
assert((inst->opcode() == SpvOp::SpvOpIAdd ||
inst->opcode() == SpvOp::SpvOpISub) &&
"Add node must be created from a OpIAdd or OpISub instruction");
opt::analysis::DefUseManager* def_use = context_->get_def_use_mgr();
SENode* op1 =
AnalyzeInstruction(def_use->GetDef(inst->GetSingleWordInOperand(0)));
SENode* op2 =
AnalyzeInstruction(def_use->GetDef(inst->GetSingleWordInOperand(1)));
// To handle subtraction we wrap the second operand in a unary negation node.
if (inst->opcode() == SpvOp::SpvOpISub) {
op2 = CreateNegation(op2);
}
return CreateAddNode(op1, op2);
}
SENode* ScalarEvolutionAnalysis::AnalyzePhiInstruction(
const ir::Instruction* phi) {
// The phi should only have two incoming value pairs.
if (phi->NumInOperands() != 4) {
return CreateCantComputeNode();
}
opt::analysis::DefUseManager* def_use = context_->get_def_use_mgr();
// Get the basic block this instruction belongs to.
ir::BasicBlock* basic_block =
context_->get_instr_block(const_cast<ir::Instruction*>(phi));
// And then the function that the basic blocks belongs to.
ir::Function* function = basic_block->GetParent();
// Use the function to get the loop descriptor.
ir::LoopDescriptor* loop_descriptor = context_->GetLoopDescriptor(function);
// We only handle phis in loops at the moment.
if (!loop_descriptor) return CreateCantComputeNode();
// Get the innermost loop which this block belongs to.
ir::Loop* loop = (*loop_descriptor)[basic_block->id()];
// If the loop doesn't exist or doesn't have a preheader or latch block, exit
// out.
if (!loop || !loop->GetLatchBlock() || !loop->GetPreHeaderBlock() ||
loop->GetHeaderBlock() != basic_block)
return recurrent_node_map_[phi] = CreateCantComputeNode();
std::unique_ptr<SERecurrentNode> phi_node{new SERecurrentNode(this, loop)};
// We add the node to this map to allow it to be returned before the node is
// fully built. This is needed as the subsequent call to AnalyzeInstruction
// could lead back to this |phi| instruction so we return the pointer
// immediately in AnalyzeInstruction to break the recursion.
recurrent_node_map_[phi] = phi_node.get();
// Traverse the operands of the instruction an create new nodes for each one.
for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
uint32_t value_id = phi->GetSingleWordInOperand(i);
uint32_t incoming_label_id = phi->GetSingleWordInOperand(i + 1);
ir::Instruction* value_inst = def_use->GetDef(value_id);
SENode* value_node = AnalyzeInstruction(value_inst);
// If any operand is CantCompute then the whole graph is CantCompute.
if (value_node->IsCantCompute())
return recurrent_node_map_[phi] = CreateCantComputeNode();
// If the value is coming from the preheader block then the value is the
// initial value of the phi.
if (incoming_label_id == loop->GetPreHeaderBlock()->id()) {
phi_node->AddOffset(value_node);
} else if (incoming_label_id == loop->GetLatchBlock()->id()) {
// Assumed to be in the form of step + phi.
if (value_node->GetType() != SENode::Add)
return recurrent_node_map_[phi] = CreateCantComputeNode();
SENode* step_node = nullptr;
SENode* phi_operand = nullptr;
SENode* operand_1 = value_node->GetChild(0);
SENode* operand_2 = value_node->GetChild(1);
// Find which node is the step term.
if (!operand_1->AsSERecurrentNode())
step_node = operand_1;
else if (!operand_2->AsSERecurrentNode())
step_node = operand_2;
// Find which node is the recurrent expression.
if (operand_1->AsSERecurrentNode())
phi_operand = operand_1;
else if (operand_2->AsSERecurrentNode())
phi_operand = operand_2;
// If it is not in the form step + phi exit out.
if (!(step_node && phi_operand))
return recurrent_node_map_[phi] = CreateCantComputeNode();
// If the phi operand is not the same phi node exit out.
if (phi_operand != phi_node.get())
return recurrent_node_map_[phi] = CreateCantComputeNode();
if (!IsLoopInvariant(loop, step_node))
return recurrent_node_map_[phi] = CreateCantComputeNode();
phi_node->AddCoefficient(step_node);
}
}
// Once the node is fully built we update the map with the version from the
// cache (if it has already been added to the cache).
return recurrent_node_map_[phi] = GetCachedOrAdd(std::move(phi_node));
}
SENode* ScalarEvolutionAnalysis::CreateValueUnknownNode(
const ir::Instruction* inst) {
std::unique_ptr<SEValueUnknown> load_node{
new SEValueUnknown(this, inst->result_id())};
return GetCachedOrAdd(std::move(load_node));
}
SENode* ScalarEvolutionAnalysis::CreateCantComputeNode() {
return cached_cant_compute_;
}
// Add the created node into the cache of nodes. If it already exists return it.
SENode* ScalarEvolutionAnalysis::GetCachedOrAdd(
std::unique_ptr<SENode> prospective_node) {
auto itr = node_cache_.find(prospective_node);
if (itr != node_cache_.end()) {
return (*itr).get();
}
SENode* raw_ptr_to_node = prospective_node.get();
node_cache_.insert(std::move(prospective_node));
return raw_ptr_to_node;
}
bool ScalarEvolutionAnalysis::IsLoopInvariant(const ir::Loop* loop,
const SENode* node) const {
for (auto itr = node->graph_cbegin(); itr != node->graph_cend(); ++itr) {
if (const SERecurrentNode* rec = itr->AsSERecurrentNode()) {
const ir::BasicBlock* header = rec->GetLoop()->GetHeaderBlock();
// If the loop which the recurrent expression belongs to is either |loop
// or a nested loop inside |loop| then we assume it is variant.
if (loop->IsInsideLoop(header)) {
return false;
}
} else if (const SEValueUnknown* unknown = itr->AsSEValueUnknown()) {
// If the instruction is inside the loop we conservatively assume it is
// loop variant.
if (loop->IsInsideLoop(unknown->ResultId())) return false;
}
}
return true;
}
SENode* ScalarEvolutionAnalysis::GetCoefficientFromRecurrentTerm(
SENode* node, const ir::Loop* loop) {
// Traverse the DAG to find the recurrent expression belonging to |loop|.
for (auto itr = node->graph_begin(); itr != node->graph_end(); ++itr) {
SERecurrentNode* rec = itr->AsSERecurrentNode();
if (rec && rec->GetLoop() == loop) {
return rec->GetCoefficient();
}
}
return CreateConstant(0);
}
SENode* ScalarEvolutionAnalysis::UpdateChildNode(SENode* parent,
SENode* old_child,
SENode* new_child) {
// Only handles add.
if (parent->GetType() != SENode::Add) return parent;
std::vector<SENode*> new_children;
for (SENode* child : *parent) {
if (child == old_child) {
new_children.push_back(new_child);
} else {
new_children.push_back(child);
}
}
std::unique_ptr<SENode> add_node{new SEAddNode(this)};
for (SENode* child : new_children) {
add_node->AddChild(child);
}
return SimplifyExpression(GetCachedOrAdd(std::move(add_node)));
}
// Rebuild the |node| eliminating, if it exists, the recurrent term which
// belongs to the |loop|.
SENode* ScalarEvolutionAnalysis::BuildGraphWithoutRecurrentTerm(
SENode* node, const ir::Loop* loop) {
// If the node is already a recurrent expression belonging to loop then just
// return the offset.
SERecurrentNode* recurrent = node->AsSERecurrentNode();
if (recurrent) {
if (recurrent->GetLoop() == loop) {
return recurrent->GetOffset();
} else {
return node;
}
}
std::vector<SENode*> new_children;
// Otherwise find the recurrent node in the children of this node.
for (auto itr : *node) {
recurrent = itr->AsSERecurrentNode();
if (recurrent && recurrent->GetLoop() == loop) {
new_children.push_back(recurrent->GetOffset());
} else {
new_children.push_back(itr);
}
}
std::unique_ptr<SENode> add_node{new SEAddNode(this)};
for (SENode* child : new_children) {
add_node->AddChild(child);
}
return SimplifyExpression(GetCachedOrAdd(std::move(add_node)));
}
// Return the recurrent term belonging to |loop| if it appears in the graph
// starting at |node| or null if it doesn't.
SERecurrentNode* ScalarEvolutionAnalysis::GetRecurrentTerm(
SENode* node, const ir::Loop* loop) {
for (auto itr = node->graph_begin(); itr != node->graph_end(); ++itr) {
SERecurrentNode* rec = itr->AsSERecurrentNode();
if (rec && rec->GetLoop() == loop) {
return rec;
}
}
return nullptr;
}
std::string SENode::AsString() const {
switch (GetType()) {
case Constant:
return "Constant";
case RecurrentAddExpr:
return "RecurrentAddExpr";
case Add:
return "Add";
case Negative:
return "Negative";
case Multiply:
return "Multiply";
case ValueUnknown:
return "Value Unknown";
case CanNotCompute:
return "Can not compute";
}
return "NULL";
}
bool SENode::operator==(const SENode& other) const {
if (GetType() != other.GetType()) return false;
if (other.GetChildren().size() != children_.size()) return false;
const SERecurrentNode* this_as_recurrent = AsSERecurrentNode();
// Check the children are the same, for SERecurrentNodes we need to check the
// offset and coefficient manually as the child vector is sorted by ids so the
// offset/coefficient information is lost.
if (!this_as_recurrent) {
for (size_t index = 0; index < children_.size(); ++index) {
if (other.GetChildren()[index] != children_[index]) return false;
}
} else {
const SERecurrentNode* other_as_recurrent = other.AsSERecurrentNode();
// We've already checked the types are the same, this should not fail if
// this->AsSERecurrentNode() succeeded.
assert(other_as_recurrent);
if (this_as_recurrent->GetCoefficient() !=
other_as_recurrent->GetCoefficient())
return false;
if (this_as_recurrent->GetOffset() != other_as_recurrent->GetOffset())
return false;
if (this_as_recurrent->GetLoop() != other_as_recurrent->GetLoop())
return false;
}
// If we're dealing with a value unknown node check both nodes were created by
// the same instruction.
if (GetType() == SENode::ValueUnknown) {
if (AsSEValueUnknown()->ResultId() !=
other.AsSEValueUnknown()->ResultId()) {
return false;
}
}
if (AsSEConstantNode()) {
if (AsSEConstantNode()->FoldToSingleValue() !=
other.AsSEConstantNode()->FoldToSingleValue())
return false;
}
return true;
}
bool SENode::operator!=(const SENode& other) const { return !(*this == other); }
namespace {
// Helper functions to insert 32/64 bit values into the 32 bit hash string. This
// allows us to add pointers to the string by reinterpreting the pointers as
// uintptr_t. PushToString will deduce the type, call sizeof on it and use
// that size to call into the correct PushToStringImpl functor depending on
// whether it is 32 or 64 bit.
template <typename T, size_t size_of_t>
struct PushToStringImpl;
template <typename T>
struct PushToStringImpl<T, 8> {
void operator()(T id, std::u32string* str) {
str->push_back(static_cast<uint32_t>(id >> 32));
str->push_back(static_cast<uint32_t>(id));
}
};
template <typename T>
struct PushToStringImpl<T, 4> {
void operator()(T id, std::u32string* str) {
str->push_back(static_cast<uint32_t>(id));
}
};
template <typename T>
static void PushToString(T id, std::u32string* str) {
PushToStringImpl<T, sizeof(T)>{}(id, str);
}
} // namespace
// Implements the hashing of SENodes.
size_t SENodeHash::operator()(const SENode* node) const {
// Concatinate the terms into a string which we can hash.
std::u32string hash_string{};
// Hashing the type as a string is safer than hashing the enum as the enum is
// very likely to collide with constants.
for (char ch : node->AsString()) {
hash_string.push_back(static_cast<char32_t>(ch));
}
// We just ignore the literal value unless it is a constant.
if (node->GetType() == SENode::Constant)
PushToString(node->AsSEConstantNode()->FoldToSingleValue(), &hash_string);
const SERecurrentNode* recurrent = node->AsSERecurrentNode();
// If we're dealing with a recurrent expression hash the loop as well so that
// nested inductions like i=0,i++ and j=0,j++ correspond to different nodes.
if (recurrent) {
PushToString(reinterpret_cast<uintptr_t>(recurrent->GetLoop()),
&hash_string);
// Recurrent expressions can't be hashed using the normal method as the
// order of coefficient and offset matters to the hash.
PushToString(reinterpret_cast<uintptr_t>(recurrent->GetCoefficient()),
&hash_string);
PushToString(reinterpret_cast<uintptr_t>(recurrent->GetOffset()),
&hash_string);
return std::hash<std::u32string>{}(hash_string);
}
// Hash the result id of the original instruction which created this node if
// it is a value unknown node.
if (node->GetType() == SENode::ValueUnknown) {
PushToString(node->AsSEValueUnknown()->ResultId(), &hash_string);
}
// Hash the pointers of the child nodes, each SENode has a unique pointer
// associated with it.
const std::vector<SENode*>& children = node->GetChildren();
for (const SENode* child : children) {
PushToString(reinterpret_cast<uintptr_t>(child), &hash_string);
}
return std::hash<std::u32string>{}(hash_string);
}
// This overload is the actual overload used by the node_cache_ set.
size_t SENodeHash::operator()(const std::unique_ptr<SENode>& node) const {
return this->operator()(node.get());
}
void SENode::DumpDot(std::ostream& out, bool recurse) const {
size_t unique_id = std::hash<const SENode*>{}(this);
out << unique_id << " [label=\"" << AsString() << " ";
if (GetType() == SENode::Constant) {
out << "\nwith value: " << this->AsSEConstantNode()->FoldToSingleValue();
}
out << "\"]\n";
for (const SENode* child : children_) {
size_t child_unique_id = std::hash<const SENode*>{}(child);
out << unique_id << " -> " << child_unique_id << " \n";
if (recurse) child->DumpDot(out, true);
}
}
} // namespace opt
} // namespace spvtools

View File

@ -0,0 +1,156 @@
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_OPT_SCALAR_ANALYSIS_H_
#define SOURCE_OPT_SCALAR_ANALYSIS_H_
#include <algorithm>
#include <cstdint>
#include <map>
#include <memory>
#include <unordered_set>
#include <vector>
#include "opt/basic_block.h"
#include "opt/instruction.h"
#include "opt/scalar_analysis_nodes.h"
namespace spvtools {
namespace ir {
class IRContext;
class Loop;
} // namespace ir
namespace opt {
// Manager for the Scalar Evolution analysis. Creates and maintains a DAG of
// scalar operations generated from analysing the use def graph from incoming
// instructions. Each node is hashed as it is added so like node (for instance,
// two induction variables i=0,i++ and j=0,j++) become the same node. After
// creating a DAG with AnalyzeInstruction it can the be simplified into a more
// usable form with SimplifyExpression.
class ScalarEvolutionAnalysis {
public:
explicit ScalarEvolutionAnalysis(ir::IRContext* context);
// Create a unary negative node on |operand|.
SENode* CreateNegation(SENode* operand);
// Creates a subtraction between the two operands by adding |operand_1| to the
// negation of |operand_2|.
SENode* CreateSubtraction(SENode* operand_1, SENode* operand_2);
// Create an addition node between two operands. The |simplify| when set will
// allow the function to return an SEConstant instead of an addition if the
// two input operands are also constant.
SENode* CreateAddNode(SENode* operand_1, SENode* operand_2);
// Create a multiply node between two operands.
SENode* CreateMultiplyNode(SENode* operand_1, SENode* operand_2);
// Create a node representing a constant integer.
SENode* CreateConstant(int64_t integer);
// Create a value unknown node, such as a load.
SENode* CreateValueUnknownNode(const ir::Instruction* inst);
// Create a CantComputeNode. Used to exit out of analysis.
SENode* CreateCantComputeNode();
// Create a new recurrent node with |offset| and |coefficient|, with respect
// to |loop|.
SENode* CreateRecurrentExpression(const ir::Loop* loop, SENode* offset,
SENode* coefficient);
// Construct the DAG by traversing use def chain of |inst|.
SENode* AnalyzeInstruction(const ir::Instruction* inst);
// Simplify the |node| by grouping like terms or if contains a recurrent
// expression, rewrite the graph so the whole DAG (from |node| down) is in
// terms of that recurrent expression.
//
// For example.
// Induction variable i=0, i++ would produce Rec(0,1) so i+1 could be
// transformed into Rec(1,1).
//
// X+X*2+Y-Y+34-17 would be transformed into 3*X + 17, where X and Y are
// ValueUnknown nodes (such as a load instruction).
SENode* SimplifyExpression(SENode* node);
// Add |prospective_node| into the cache and return a raw pointer to it. If
// |prospective_node| is already in the cache just return the raw pointer.
SENode* GetCachedOrAdd(std::unique_ptr<SENode> prospective_node);
// Checks that the graph starting from |node| is invariant to the |loop|.
bool IsLoopInvariant(const ir::Loop* loop, const SENode* node) const;
// Find the recurrent term belonging to |loop| in the graph starting from
// |node| and return the coefficient of that recurrent term. Constant zero
// will be returned if no recurrent could be found. |node| should be in
// simplest form.
SENode* GetCoefficientFromRecurrentTerm(SENode* node, const ir::Loop* loop);
// Return a rebuilt graph starting from |node| with the recurrent expression
// belonging to |loop| being zeroed out. Returned node will be simplified.
SENode* BuildGraphWithoutRecurrentTerm(SENode* node, const ir::Loop* loop);
// Return the recurrent term belonging to |loop| if it appears in the graph
// starting at |node| or null if it doesn't.
SERecurrentNode* GetRecurrentTerm(SENode* node, const ir::Loop* loop);
SENode* UpdateChildNode(SENode* parent, SENode* child, SENode* new_child);
private:
SENode* AnalyzeConstant(const ir::Instruction* inst);
// Handles both addition and subtraction. If the |instruction| is OpISub
// then the resulting node will be op1+(-op2) otherwise if it is OpIAdd then
// the result will be op1+op2. |instruction| must be OpIAdd or OpISub.
SENode* AnalyzeAddOp(const ir::Instruction* instruction);
SENode* AnalyzeMultiplyOp(const ir::Instruction* multiply);
SENode* AnalyzePhiInstruction(const ir::Instruction* phi);
ir::IRContext* context_;
// A map of instructions to SENodes. This is used to track recurrent
// expressions as they are added when analyzing instructions. Recurrent
// expressions come from phi nodes which by nature can include recursion so we
// check if nodes have already been built when analyzing instructions.
std::map<const ir::Instruction*, SENode*> recurrent_node_map_;
// On creation we create and cache the CantCompute node so we not need to
// perform a needless create step.
SENode* cached_cant_compute_;
// Helper functor to allow two unique_ptr to nodes to be compare. Only
// needed
// for the unordered_set implementation.
struct NodePointersEquality {
bool operator()(const std::unique_ptr<SENode>& lhs,
const std::unique_ptr<SENode>& rhs) const {
return *lhs == *rhs;
}
};
// Cache of nodes. All pointers to the nodes are references to the memory
// managed by they set.
std::unordered_set<std::unique_ptr<SENode>, SENodeHash, NodePointersEquality>
node_cache_;
};
} // namespace opt
} // namespace spvtools
#endif // SOURCE_OPT_SCALAR_ANALYSIS_H__

View File

@ -0,0 +1,313 @@
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASI,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_OPT_SCALAR_ANALYSIS_NODES_H_
#define SOURCE_OPT_SCALAR_ANALYSIS_NODES_H_
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "opt/tree_iterator.h"
namespace spvtools {
namespace ir {
class Loop;
} // namespace ir
namespace opt {
class ScalarEvolutionAnalysis;
class SEConstantNode;
class SERecurrentNode;
class SEAddNode;
class SEMultiplyNode;
class SENegative;
class SEValueUnknown;
class SECantCompute;
// Abstract class representing a node in the scalar evolution DAG. Each node
// contains a vector of pointers to its children and each subclass of SENode
// implements GetType and an As method to allow casting. SENodes can be hashed
// using the SENodeHash functor. The vector of children is sorted when a node is
// added. This is important as it allows the hash of X+Y to be the same as Y+X.
class SENode {
public:
enum SENodeType {
Constant,
RecurrentAddExpr,
Add,
Multiply,
Negative,
ValueUnknown,
CanNotCompute
};
using ChildContainerType = std::vector<SENode*>;
explicit SENode(opt::ScalarEvolutionAnalysis* parent_analysis)
: parent_analysis_(parent_analysis), unique_id_(++NumberOfNodes) {}
virtual SENodeType GetType() const = 0;
virtual ~SENode() {}
virtual inline void AddChild(SENode* child) {
// If this is a constant node, assert.
if (AsSEConstantNode()) {
assert(false && "Trying to add a child node to a constant!");
}
// Find the first point in the vector where |child| is greater than the node
// currently in the vector.
auto find_first_less_than = [child](const SENode* node) {
return child->unique_id_ <= node->unique_id_;
};
auto position = std::find_if_not(children_.begin(), children_.end(),
find_first_less_than);
// Children are sorted so the hashing and equality operator will be the same
// for a node with the same children. X+Y should be the same as Y+X.
children_.insert(position, child);
}
// Get the type as an std::string. This is used to represent the node in the
// dot output and is used to hash the type as well.
std::string AsString() const;
// Dump the SENode and its immediate children, if |recurse| is true then it
// will recurse through all children to print the DAG starting from this node
// as a root.
void DumpDot(std::ostream& out, bool recurse = false) const;
// Checks if two nodes are the same by hashing them.
bool operator==(const SENode& other) const;
// Checks if two nodes are not the same by comparing the hashes.
bool operator!=(const SENode& other) const;
// Return the child node at |index|.
inline SENode* GetChild(size_t index) { return children_[index]; }
inline const SENode* GetChild(size_t index) const { return children_[index]; }
// Iterator to iterate over the child nodes.
using iterator = ChildContainerType::iterator;
using const_iterator = ChildContainerType::const_iterator;
// Iterate over immediate child nodes.
iterator begin() { return children_.begin(); }
iterator end() { return children_.end(); }
// Constant overloads for iterating over immediate child nodes.
const_iterator begin() const { return children_.cbegin(); }
const_iterator end() const { return children_.cend(); }
const_iterator cbegin() { return children_.cbegin(); }
const_iterator cend() { return children_.cend(); }
// Iterator to iterate over the entire DAG. Even though we are using the tree
// iterator it should still be safe to iterate over. However, nodes with
// multiple parents will be visited multiple times, unlike in a tree.
using dag_iterator = TreeDFIterator<SENode>;
using const_dag_iterator = TreeDFIterator<const SENode>;
// Iterate over all child nodes in the graph.
dag_iterator graph_begin() { return dag_iterator(this); }
dag_iterator graph_end() { return dag_iterator(); }
const_dag_iterator graph_begin() const { return graph_cbegin(); }
const_dag_iterator graph_end() const { return graph_cend(); }
const_dag_iterator graph_cbegin() const { return const_dag_iterator(this); }
const_dag_iterator graph_cend() const { return const_dag_iterator(); }
// Return the vector of immediate children.
const ChildContainerType& GetChildren() const { return children_; }
ChildContainerType& GetChildren() { return children_; }
// Return true if this node is a cant compute node.
bool IsCantCompute() const { return GetType() == CanNotCompute; }
// Implements a casting method for each type.
#define DeclareCastMethod(target) \
virtual target* As##target() { return nullptr; } \
virtual const target* As##target() const { return nullptr; }
DeclareCastMethod(SEConstantNode);
DeclareCastMethod(SERecurrentNode);
DeclareCastMethod(SEAddNode);
DeclareCastMethod(SEMultiplyNode);
DeclareCastMethod(SENegative);
DeclareCastMethod(SEValueUnknown);
DeclareCastMethod(SECantCompute);
#undef DeclareCastMethod
// Get the analysis which has this node in its cache.
inline opt::ScalarEvolutionAnalysis* GetParentAnalysis() const {
return parent_analysis_;
}
protected:
ChildContainerType children_;
opt::ScalarEvolutionAnalysis* parent_analysis_;
// The unique id of this node, assigned on creation by incrementing the static
// node count.
uint32_t unique_id_;
// The number of nodes created.
static uint32_t NumberOfNodes;
};
// Function object to handle the hashing of SENodes. Hashing algorithm hashes
// the type (as a string), the literal value of any constants, and the child
// pointers which are assumed to be unique.
struct SENodeHash {
size_t operator()(const std::unique_ptr<SENode>& node) const;
size_t operator()(const SENode* node) const;
};
// A node representing a constant integer.
class SEConstantNode : public SENode {
public:
SEConstantNode(opt::ScalarEvolutionAnalysis* parent_analysis, int64_t value)
: SENode(parent_analysis), literal_value_(value) {}
SENodeType GetType() const final { return Constant; }
int64_t FoldToSingleValue() const { return literal_value_; }
SEConstantNode* AsSEConstantNode() override { return this; }
const SEConstantNode* AsSEConstantNode() const override { return this; }
inline void AddChild(SENode*) final {
assert(false && "Attempting to add a child to a constant node!");
}
protected:
int64_t literal_value_;
};
// A node representing a recurrent expression in the code. A recurrent
// expression is an expression whose value can be expressed as a linear
// expression of the loop iterations. Such as an induction variable. The actual
// value of a recurrent expression is coefficent_ * iteration + offset_, hence
// an induction variable i=0, i++ becomes a recurrent expression with an offset
// of zero and a coefficient of one.
class SERecurrentNode : public SENode {
public:
SERecurrentNode(opt::ScalarEvolutionAnalysis* parent_analysis,
const ir::Loop* loop)
: SENode(parent_analysis), loop_(loop) {}
SENodeType GetType() const final { return RecurrentAddExpr; }
inline void AddCoefficient(SENode* child) {
coefficient_ = child;
SENode::AddChild(child);
}
inline void AddOffset(SENode* child) {
offset_ = child;
SENode::AddChild(child);
}
inline const SENode* GetCoefficient() const { return coefficient_; }
inline SENode* GetCoefficient() { return coefficient_; }
inline const SENode* GetOffset() const { return offset_; }
inline SENode* GetOffset() { return offset_; }
// Return the loop which this recurrent expression is recurring within.
const ir::Loop* GetLoop() const { return loop_; }
SERecurrentNode* AsSERecurrentNode() override { return this; }
const SERecurrentNode* AsSERecurrentNode() const override { return this; }
private:
SENode* coefficient_;
SENode* offset_;
const ir::Loop* loop_;
};
// A node representing an addition operation between child nodes.
class SEAddNode : public SENode {
public:
explicit SEAddNode(opt::ScalarEvolutionAnalysis* parent_analysis)
: SENode(parent_analysis) {}
SENodeType GetType() const final { return Add; }
SEAddNode* AsSEAddNode() override { return this; }
const SEAddNode* AsSEAddNode() const override { return this; }
};
// A node representing a multiply operation between child nodes.
class SEMultiplyNode : public SENode {
public:
explicit SEMultiplyNode(opt::ScalarEvolutionAnalysis* parent_analysis)
: SENode(parent_analysis) {}
SENodeType GetType() const final { return Multiply; }
SEMultiplyNode* AsSEMultiplyNode() override { return this; }
const SEMultiplyNode* AsSEMultiplyNode() const override { return this; }
};
// A node representing a unary negative operation.
class SENegative : public SENode {
public:
explicit SENegative(opt::ScalarEvolutionAnalysis* parent_analysis)
: SENode(parent_analysis) {}
SENodeType GetType() const final { return Negative; }
SENegative* AsSENegative() override { return this; }
const SENegative* AsSENegative() const override { return this; }
};
// A node representing a value which we do not know the value of, such as a load
// instruction.
class SEValueUnknown : public SENode {
public:
// SEValueUnknowns must come from an instruction |unique_id| is the unique id
// of that instruction. This is so we cancompare value unknowns and have a
// unique value unknown for each instruction.
SEValueUnknown(opt::ScalarEvolutionAnalysis* parent_analysis,
uint32_t result_id)
: SENode(parent_analysis), result_id_(result_id) {}
SENodeType GetType() const final { return ValueUnknown; }
SEValueUnknown* AsSEValueUnknown() override { return this; }
const SEValueUnknown* AsSEValueUnknown() const override { return this; }
inline uint32_t ResultId() const { return result_id_; }
private:
uint32_t result_id_;
};
// A node which we cannot reason about at all.
class SECantCompute : public SENode {
public:
explicit SECantCompute(opt::ScalarEvolutionAnalysis* parent_analysis)
: SENode(parent_analysis) {}
SENodeType GetType() const final { return CanNotCompute; }
SECantCompute* AsSECantCompute() override { return this; }
const SECantCompute* AsSECantCompute() const override { return this; }
};
} // namespace opt
} // namespace spvtools
#endif // SOURCE_OPT_SCALAR_ANALYSIS_NODES_H_

View File

@ -0,0 +1,539 @@
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "opt/scalar_analysis.h"
#include <functional>
#include <map>
#include <memory>
#include <set>
#include <unordered_set>
#include <utility>
#include <vector>
// Simplifies scalar analysis DAGs.
//
// 1. Given a node passed to SimplifyExpression we first simplify the graph by
// calling SimplifyPolynomial. This groups like nodes following basic arithmetic
// rules, so multiple adds of the same load instruction could be grouped into a
// single multiply of that instruction. SimplifyPolynomial will traverse the DAG
// and build up an accumulator buffer for each class of instruction it finds.
// For example take the loop:
// for (i=0, i<N; i++) { i+B+23+4+B+C; }
// In this example the expression "i+B+23+4+B+C" has four classes of
// instruction, induction variable i, the two value unknowns B and C, and the
// constants. The accumulator buffer is then used to rebuild the graph using
// the accumulation of each type. This example would then be folded into
// i+2*B+C+27.
//
// This new graph contains a single add node (or if only one type found then
// just that node) with each of the like terms (or multiplication node) as a
// child.
//
// 2. FoldRecurrentAddExpressions is then called on this new DAG. This will take
// RecurrentAddExpressions which are with respect to the same loop and fold them
// into a single new RecurrentAddExpression with respect to that same loop. An
// expression can have multiple RecurrentAddExpression's with respect to
// different loops in the case of nested loops. These expressions cannot be
// folded further. For example:
//
// for (i=0; i<N;i++) for(j=0,k=1; j<N;++j,++k)
//
// The 'j' and 'k' are RecurrentAddExpression with respect to the second loop
// and 'i' to the first. If 'j' and 'k' are used in an expression together then
// they will be folded into a new RecurrentAddExpression with respect to the
// second loop in that expression.
//
//
// 3. If the DAG now only contains a single RecurrentAddExpression we can now
// perform a final optimization SimplifyRecurrentAddExpression. This will
// transform the entire DAG into a RecurrentAddExpression. Additions to the
// RecurrentAddExpression are added to the offset field and multiplications to
// the coefficient.
//
namespace spvtools {
namespace opt {
// Implementation of the functions which are used to simplify the graph. Graphs
// of unknowns, multiplies, additions, and constants can be turned into a linear
// add node with each term as a child. For instance a large graph built from, X
// + X*2 + Y - Y*3 + 4 - 1, would become a single add expression with the
// children X*3, -Y*2, and the constant 3. Graphs containing a recurrent
// expression will be simplified to represent the entire graph around a single
// recurrent expression. So for an induction variable (i=0, i++) if you add 1 to
// i in an expression we can rewrite the graph of that expression to be a single
// recurrent expression of (i=1,i++).
class SENodeSimplifyImpl {
public:
SENodeSimplifyImpl(ScalarEvolutionAnalysis* analysis,
SENode* node_to_simplify)
: analysis_(*analysis),
node_(node_to_simplify),
constant_accumulator_(0) {}
// Return the result of the simplification.
SENode* Simplify();
private:
// Recursively descend through the graph to build up the accumulator objects
// which are used to flatten the graph. |child| is the node currenty being
// traversed and the |negation| flag is used to signify that this operation
// was preceded by a unary negative operation and as such the result should be
// negated.
void GatherAccumulatorsFromChildNodes(SENode* new_node, SENode* child,
bool negation);
// Given a |multiply| node add to the accumulators for the term type within
// the |multiply| expression. Will return true if the accumulators could be
// calculated successfully. If the |multiply| is in any form other than
// unknown*constant then we return false. |negation| signifies that the
// operation was preceded by a unary negative.
bool AccumulatorsFromMultiply(SENode* multiply, bool negation);
SERecurrentNode* UpdateCoefficient(SERecurrentNode* recurrent,
int64_t coefficient_update) const;
// If the graph contains a recurrent expression, ie, an expression with the
// loop iterations as a term in the expression, then the whole expression
// can be rewritten to be a recurrent expression.
SENode* SimplifyRecurrentAddExpression(SERecurrentNode* node);
// Simplify the whole graph by linking like terms together in a single flat
// add node. So X*2 + Y -Y + 3 +6 would become X*2 + 9. Where X and Y are a
// ValueUnknown node (i.e, a load) or a recurrent expression.
SENode* SimplifyPolynomial();
// Each recurrent expression is an expression with respect to a specific loop.
// If we have two different recurrent terms with respect to the same loop in a
// single expression then we can fold those terms into a single new term.
// For instance:
//
// induction i = 0, i++
// temp = i*10
// array[i+temp]
//
// We can fold the i + temp into a single expression. Rec(0,1) + Rec(0,10) can
// become Rec(0,11).
SENode* FoldRecurrentAddExpressions(SENode*);
// We can eliminate recurrent expressions which have a coefficient of zero by
// replacing them with their offset value. We are able to do this because a
// recurrent expression represents the equation coefficient*iterations +
// offset.
SENode* EliminateZeroCoefficientRecurrents(SENode* node);
// A reference the the analysis which requested the simplification.
ScalarEvolutionAnalysis& analysis_;
// The node being simplified.
SENode* node_;
// An accumulator of the net result of all the constant operations performed
// in a graph.
int64_t constant_accumulator_;
// An accumulator for each of the non constant terms in the graph.
std::map<SENode*, int64_t> accumulators_;
};
// From a |multiply| build up the accumulator objects.
bool SENodeSimplifyImpl::AccumulatorsFromMultiply(SENode* multiply,
bool negation) {
if (multiply->GetChildren().size() != 2 ||
multiply->GetType() != SENode::Multiply)
return false;
SENode* operand_1 = multiply->GetChild(0);
SENode* operand_2 = multiply->GetChild(1);
SENode* value_unknown = nullptr;
SENode* constant = nullptr;
// Work out which operand is the unknown value.
if (operand_1->GetType() == SENode::ValueUnknown ||
operand_1->GetType() == SENode::RecurrentAddExpr)
value_unknown = operand_1;
else if (operand_2->GetType() == SENode::ValueUnknown ||
operand_2->GetType() == SENode::RecurrentAddExpr)
value_unknown = operand_2;
// Work out which operand is the constant coefficient.
if (operand_1->GetType() == SENode::Constant)
constant = operand_1;
else if (operand_2->GetType() == SENode::Constant)
constant = operand_2;
// If the expression is not a variable multiplied by a constant coefficient,
// exit out.
if (!(value_unknown && constant)) {
return false;
}
int64_t sign = negation ? -1 : 1;
auto iterator = accumulators_.find(value_unknown);
int64_t new_value = constant->AsSEConstantNode()->FoldToSingleValue() * sign;
// Add the result of the multiplication to the accumulators.
if (iterator != accumulators_.end()) {
(*iterator).second += new_value;
} else {
accumulators_.insert({value_unknown, new_value});
}
return true;
}
SENode* SENodeSimplifyImpl::Simplify() {
// We only handle graphs with an addition, multiplication, or negation, at the
// root.
if (node_->GetType() != SENode::Add && node_->GetType() != SENode::Multiply &&
node_->GetType() != SENode::Negative)
return node_;
SENode* simplified_polynomial = SimplifyPolynomial();
SERecurrentNode* recurrent_expr = nullptr;
node_ = simplified_polynomial;
// Fold recurrent expressions which are with respect to the same loop into a
// single recurrent expression.
simplified_polynomial = FoldRecurrentAddExpressions(simplified_polynomial);
simplified_polynomial =
EliminateZeroCoefficientRecurrents(simplified_polynomial);
// Traverse the immediate children of the new node to find the recurrent
// expression. If there is more than one there is nothing further we can do.
for (SENode* child : simplified_polynomial->GetChildren()) {
if (child->GetType() == SENode::RecurrentAddExpr) {
recurrent_expr = child->AsSERecurrentNode();
}
}
// We need to count the number of unique recurrent expressions in the DAG to
// ensure there is only one.
for (auto child_iterator = simplified_polynomial->graph_begin();
child_iterator != simplified_polynomial->graph_end(); ++child_iterator) {
if (child_iterator->GetType() == SENode::RecurrentAddExpr &&
recurrent_expr != child_iterator->AsSERecurrentNode()) {
return simplified_polynomial;
}
}
if (recurrent_expr) {
return SimplifyRecurrentAddExpression(recurrent_expr);
}
return simplified_polynomial;
}
// Traverse the graph to build up the accumulator objects.
void SENodeSimplifyImpl::GatherAccumulatorsFromChildNodes(SENode* new_node,
SENode* child,
bool negation) {
int32_t sign = negation ? -1 : 1;
if (child->GetType() == SENode::Constant) {
// Collect all the constants and add them together.
constant_accumulator_ +=
child->AsSEConstantNode()->FoldToSingleValue() * sign;
} else if (child->GetType() == SENode::ValueUnknown ||
child->GetType() == SENode::RecurrentAddExpr) {
// To rebuild the graph of X+X+X*2 into 4*X we count the occurrences of X
// and create a new node of count*X after. X can either be a ValueUnknown or
// a RecurrentAddExpr. The count for each X is stored in the accumulators_
// map.
auto iterator = accumulators_.find(child);
// If we've encountered this term before add to the accumulator for it.
if (iterator == accumulators_.end())
accumulators_.insert({child, sign});
else
iterator->second += sign;
} else if (child->GetType() == SENode::Multiply) {
if (!AccumulatorsFromMultiply(child, negation)) {
new_node->AddChild(child);
}
} else if (child->GetType() == SENode::Add) {
for (SENode* next_child : *child) {
GatherAccumulatorsFromChildNodes(new_node, next_child, negation);
}
} else if (child->GetType() == SENode::Negative) {
SENode* negated_node = child->GetChild(0);
GatherAccumulatorsFromChildNodes(new_node, negated_node, !negation);
} else {
// If we can't work out how to fold the expression just add it back into
// the graph.
new_node->AddChild(child);
}
}
SERecurrentNode* SENodeSimplifyImpl::UpdateCoefficient(
SERecurrentNode* recurrent, int64_t coefficient_update) const {
std::unique_ptr<SERecurrentNode> new_recurrent_node{new SERecurrentNode(
recurrent->GetParentAnalysis(), recurrent->GetLoop())};
SENode* new_coefficient = analysis_.CreateMultiplyNode(
recurrent->GetCoefficient(),
analysis_.CreateConstant(coefficient_update));
// See if the node can be simplified.
SENode* simplified = analysis_.SimplifyExpression(new_coefficient);
if (simplified->GetType() != SENode::CanNotCompute)
new_coefficient = simplified;
if (coefficient_update < 0) {
new_recurrent_node->AddOffset(
analysis_.CreateNegation(recurrent->GetOffset()));
} else {
new_recurrent_node->AddOffset(recurrent->GetOffset());
}
new_recurrent_node->AddCoefficient(new_coefficient);
return analysis_.GetCachedOrAdd(std::move(new_recurrent_node))
->AsSERecurrentNode();
}
// Simplify all the terms in the polynomial function.
SENode* SENodeSimplifyImpl::SimplifyPolynomial() {
std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
// Traverse the graph and gather the accumulators from it.
GatherAccumulatorsFromChildNodes(new_add.get(), node_, false);
// Fold all the constants into a single constant node.
if (constant_accumulator_ != 0) {
new_add->AddChild(analysis_.CreateConstant(constant_accumulator_));
}
for (auto& pair : accumulators_) {
SENode* term = pair.first;
int64_t count = pair.second;
// We can eliminate the term completely.
if (count == 0) continue;
if (count == 1) {
new_add->AddChild(term);
} else if (count == -1 && term->GetType() != SENode::RecurrentAddExpr) {
// If the count is -1 we can just add a negative version of that node,
// unless it is a recurrent expression as we would rather the negative
// goes on the recurrent expressions children. This makes it easier to
// work with in other places.
new_add->AddChild(analysis_.CreateNegation(term));
} else {
// Output value unknown terms as count*term and output recurrent
// expression terms as rec(offset, coefficient + count) offset and
// coefficient are the same as in the original expression.
if (term->GetType() == SENode::ValueUnknown) {
SENode* count_as_constant = analysis_.CreateConstant(count);
new_add->AddChild(
analysis_.CreateMultiplyNode(count_as_constant, term));
} else {
assert(term->GetType() == SENode::RecurrentAddExpr &&
"We only handle value unknowns or recurrent expressions");
// Create a new recurrent expression by adding the count to the
// coefficient of the old one.
new_add->AddChild(UpdateCoefficient(term->AsSERecurrentNode(), count));
}
}
}
// If there is only one term in the addition left just return that term.
if (new_add->GetChildren().size() == 1) {
return new_add->GetChild(0);
}
// If there are no terms left in the addition just return 0.
if (new_add->GetChildren().size() == 0) {
return analysis_.CreateConstant(0);
}
return analysis_.GetCachedOrAdd(std::move(new_add));
}
SENode* SENodeSimplifyImpl::FoldRecurrentAddExpressions(SENode* root) {
std::unique_ptr<SEAddNode> new_node{new SEAddNode(&analysis_)};
// A mapping of loops to the list of recurrent expressions which are with
// respect to those loops.
std::map<const ir::Loop*, std::vector<std::pair<SERecurrentNode*, bool>>>
loops_to_recurrent{};
bool has_multiple_same_loop_recurrent_terms = false;
for (SENode* child : *root) {
bool negation = false;
if (child->GetType() == SENode::Negative) {
child = child->GetChild(0);
negation = true;
}
if (child->GetType() == SENode::RecurrentAddExpr) {
const ir::Loop* loop = child->AsSERecurrentNode()->GetLoop();
SERecurrentNode* rec = child->AsSERecurrentNode();
if (loops_to_recurrent.find(loop) == loops_to_recurrent.end()) {
loops_to_recurrent[loop] = {std::make_pair(rec, negation)};
} else {
loops_to_recurrent[loop].push_back(std::make_pair(rec, negation));
has_multiple_same_loop_recurrent_terms = true;
}
} else {
new_node->AddChild(child);
}
}
if (!has_multiple_same_loop_recurrent_terms) return root;
for (auto pair : loops_to_recurrent) {
std::vector<std::pair<SERecurrentNode*, bool>>& recurrent_expressions =
pair.second;
const ir::Loop* loop = pair.first;
std::unique_ptr<SENode> new_coefficient{new SEAddNode(&analysis_)};
std::unique_ptr<SENode> new_offset{new SEAddNode(&analysis_)};
for (auto node_pair : recurrent_expressions) {
SERecurrentNode* node = node_pair.first;
bool negative = node_pair.second;
if (!negative) {
new_coefficient->AddChild(node->GetCoefficient());
new_offset->AddChild(node->GetOffset());
} else {
new_coefficient->AddChild(
analysis_.CreateNegation(node->GetCoefficient()));
new_offset->AddChild(analysis_.CreateNegation(node->GetOffset()));
}
}
std::unique_ptr<SERecurrentNode> new_recurrent{
new SERecurrentNode(&analysis_, loop)};
SENode* new_coefficient_simplified =
analysis_.SimplifyExpression(new_coefficient.get());
SENode* new_offset_simplified =
analysis_.SimplifyExpression(new_offset.get());
if (new_coefficient_simplified->GetType() == SENode::Constant &&
new_coefficient_simplified->AsSEConstantNode()->FoldToSingleValue() ==
0) {
return new_offset_simplified;
}
new_recurrent->AddCoefficient(new_coefficient_simplified);
new_recurrent->AddOffset(new_offset_simplified);
new_node->AddChild(analysis_.GetCachedOrAdd(std::move(new_recurrent)));
}
// If we only have one child in the add just return that.
if (new_node->GetChildren().size() == 1) {
return new_node->GetChild(0);
}
return analysis_.GetCachedOrAdd(std::move(new_node));
}
SENode* SENodeSimplifyImpl::EliminateZeroCoefficientRecurrents(SENode* node) {
if (node->GetType() != SENode::Add) return node;
bool has_change = false;
std::vector<SENode*> new_children{};
for (SENode* child : *node) {
if (child->GetType() == SENode::RecurrentAddExpr) {
SENode* coefficient = child->AsSERecurrentNode()->GetCoefficient();
// If coefficient is zero then we can eliminate the recurrent expression
// entirely and just return the offset as the recurrent expression is
// representing the equation coefficient*iterations + offset.
if (coefficient->GetType() == SENode::Constant &&
coefficient->AsSEConstantNode()->FoldToSingleValue() == 0) {
new_children.push_back(child->AsSERecurrentNode()->GetOffset());
has_change = true;
} else {
new_children.push_back(child);
}
} else {
new_children.push_back(child);
}
}
if (!has_change) return node;
std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
for (SENode* child : new_children) {
new_add->AddChild(child);
}
return analysis_.GetCachedOrAdd(std::move(new_add));
}
SENode* SENodeSimplifyImpl::SimplifyRecurrentAddExpression(
SERecurrentNode* recurrent_expr) {
const std::vector<SENode*>& children = node_->GetChildren();
std::unique_ptr<SERecurrentNode> recurrent_node{new SERecurrentNode(
recurrent_expr->GetParentAnalysis(), recurrent_expr->GetLoop())};
// Create and simplify the new offset node.
std::unique_ptr<SENode> new_offset{
new SEAddNode(recurrent_expr->GetParentAnalysis())};
new_offset->AddChild(recurrent_expr->GetOffset());
for (SENode* child : children) {
if (child->GetType() != SENode::RecurrentAddExpr) {
new_offset->AddChild(child);
}
}
// Simplify the new offset.
SENode* simplified_child = analysis_.SimplifyExpression(new_offset.get());
// If the child can be simplified, add the simplified form otherwise, add it
// via the usual caching mechanism.
if (simplified_child->GetType() != SENode::CanNotCompute) {
recurrent_node->AddOffset(simplified_child);
} else {
recurrent_expr->AddOffset(analysis_.GetCachedOrAdd(std::move(new_offset)));
}
recurrent_node->AddCoefficient(recurrent_expr->GetCoefficient());
return analysis_.GetCachedOrAdd(std::move(recurrent_node));
}
/*
* Scalar Analysis simplification public methods.
*/
SENode* ScalarEvolutionAnalysis::SimplifyExpression(SENode* node) {
SENodeSimplifyImpl impl{this, node};
return impl.Simplify();
}
} // namespace opt
} // namespace spvtools

View File

@ -301,8 +301,13 @@ add_spvtools_unittest(TARGET simplification
SRCS simplification_test.cpp pass_utils.cpp
LIBS SPIRV-Tools-opt
)
add_spvtools_unittest(TARGET copy_prop_array
SRCS copy_prop_array_test.cpp pass_utils.cpp
LIBS SPIRV-Tools-opt
)
add_spvtools_unittest(TARGET scalar_analysis
SRCS scalar_analysis.cpp pass_utils.cpp
LIBS SPIRV-Tools-opt
)

1228
test/opt/scalar_analysis.cpp Normal file

File diff suppressed because it is too large Load Diff