SPIRV-Tools/source/opt/loop_peeling.h
Victor Lomuller 10e5d7cf13 Add a loop peeling pass.
For each loop in a function, the pass walks the loops from inner to outer most loop
and tries to peel loop for which a certain amount of iteration can be done before or after the loop.

To limit code growth, peeling will not happen if the growth in code size goes above a configurable threshold.
2018-04-11 15:41:29 +01:00

338 lines
13 KiB
C++

// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_OPT_LOOP_PEELING_H_
#define SOURCE_OPT_LOOP_PEELING_H_
#include <algorithm>
#include <limits>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "opt/ir_context.h"
#include "opt/loop_descriptor.h"
#include "opt/loop_utils.h"
#include "opt/pass.h"
#include "opt/scalar_analysis.h"
namespace spvtools {
namespace opt {
// Utility class to perform the peeling of a given loop.
// The loop peeling transformation make a certain amount of a loop iterations to
// be executed either before (peel before) or after (peel after) the transformed
// loop.
//
// For peeling cases the transformation does the following steps:
// - It clones the loop and inserts the cloned loop before the original loop;
// - It connects all iterating values of the cloned loop with the
// corresponding original loop values so that the second loop starts with
// the appropriate values.
// - It inserts a new induction variable "i" is inserted into the cloned that
// starts with the value 0 and increment by step of one.
//
// The last step is specific to each case:
// - Peel before: the transformation is to peel the "N" first iterations.
// The exit condition of the cloned loop is changed so that the loop
// exits when "i < N" becomes false. The original loop is then protected to
// only execute if there is any iteration left to do.
// - Peel after: the transformation is to peel the "N" last iterations,
// then the exit condition of the cloned loop is changed so that the loop
// exits when "i + N < max_iteration" becomes false, where "max_iteration"
// is the upper bound of the loop. The cloned loop is then protected to
// only execute if there is any iteration left to do no covered by the
// second.
//
// To be peelable:
// - The loop must be in LCSSA form;
// - The loop must not contain any breaks;
// - The loop must not have any ambiguous iterators updates (see
// "CanPeelLoop").
// The method "CanPeelLoop" checks that those constrained are met.
class LoopPeeling {
public:
// LoopPeeling constructor.
// |loop| is the loop to peel.
// |loop_iteration_count| is the instruction holding the |loop| iteration
// count, must be invariant for |loop| and must be of an int 32 type (signed
// or unsigned).
// |canonical_induction_variable| is an induction variable that can be used to
// count the number of iterations, must be of the same type as
// |loop_iteration_count| and start at 0 and increase by step of one at each
// iteration. The value nullptr is interpreted as no suitable variable exists
// and one will be created.
LoopPeeling(ir::Loop* loop, ir::Instruction* loop_iteration_count,
ir::Instruction* canonical_induction_variable = nullptr)
: context_(loop->GetContext()),
loop_utils_(loop->GetContext(), loop),
loop_(loop),
loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
? loop_iteration_count
: nullptr),
int_type_(nullptr),
original_loop_canonical_induction_variable_(
canonical_induction_variable),
canonical_induction_variable_(nullptr) {
if (loop_iteration_count_) {
int_type_ = context_->get_type_mgr()
->GetType(loop_iteration_count_->type_id())
->AsInteger();
if (canonical_induction_variable_) {
assert(canonical_induction_variable_->type_id() ==
loop_iteration_count_->type_id() &&
"loop_iteration_count and canonical_induction_variable do not "
"have the same type");
}
}
GetIteratingExitValues();
}
// Returns true if the loop can be peeled.
// To be peelable, all operation involved in the update of the loop iterators
// must not dominates the exit condition. This restriction is a work around to
// not miss compile code like:
//
// for (int i = 0; i + 1 < N; i++) {}
// for (int i = 0; ++i < N; i++) {}
//
// The increment will happen before the test on the exit condition leading to
// very look-a-like code.
//
// This restriction will not apply if a loop rotate is applied before (i.e.
// becomes a do-while loop).
bool CanPeelLoop() const {
ir::CFG& cfg = *context_->cfg();
if (!loop_iteration_count_) {
return false;
}
if (!int_type_) {
return false;
}
if (int_type_->width() != 32) {
return false;
}
if (!loop_->IsLCSSA()) {
return false;
}
if (!loop_->GetMergeBlock()) {
return false;
}
if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
return false;
}
if (!IsConditionCheckSideEffectFree()) {
return false;
}
return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
[](std::pair<uint32_t, ir::Instruction*> it) {
return it.second == nullptr;
});
}
// Moves the execution of the |factor| first iterations of the loop into a
// dedicated loop.
void PeelBefore(uint32_t factor);
// Moves the execution of the |factor| last iterations of the loop into a
// dedicated loop.
void PeelAfter(uint32_t factor);
// Returns the cloned loop.
ir::Loop* GetClonedLoop() { return cloned_loop_; }
// Returns the original loop.
ir::Loop* GetOriginalLoop() { return loop_; }
private:
ir::IRContext* context_;
LoopUtils loop_utils_;
// The original loop.
ir::Loop* loop_;
// The initial |loop_| upper bound.
ir::Instruction* loop_iteration_count_;
// The int type to use for the canonical_induction_variable_.
analysis::Integer* int_type_;
// The cloned loop.
ir::Loop* cloned_loop_;
// This is set to true when the exit and back-edge branch instruction is the
// same.
bool do_while_form_;
// The canonical induction variable from the original loop if it exists.
ir::Instruction* original_loop_canonical_induction_variable_;
// The canonical induction variable of the cloned loop. The induction variable
// is initialized to 0 and incremented by step of 1.
ir::Instruction* canonical_induction_variable_;
// Map between loop iterators and exit values. Loop iterators
std::unordered_map<uint32_t, ir::Instruction*> exit_value_;
// Duplicate |loop_| and place the new loop before the cloned loop. Iterating
// values from the cloned loop are then connected to the original loop as
// initializer.
void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
// Insert the canonical induction variable into the first loop as a simplified
// counter.
void InsertCanonicalInductionVariable(
LoopUtils::LoopCloningResult* clone_results);
// Fixes the exit condition of the before loop. The function calls
// |condition_builder| to get the condition to use in the conditional branch
// of the loop exit. The loop will be exited if the condition evaluate to
// true. |condition_builder| takes an ir::Instruction* that represent the
// insertion point.
void FixExitCondition(
const std::function<uint32_t(ir::Instruction*)>& condition_builder);
// Gathers all operations involved in the update of |iterator| into
// |operations|.
void GetIteratorUpdateOperations(
const ir::Loop* loop, ir::Instruction* iterator,
std::unordered_set<ir::Instruction*>* operations);
// Gathers exiting iterator values. The function builds a map between each
// iterating value in the loop (a phi instruction in the loop header) and its
// SSA value when it exit the loop. If no exit value can be accurately found,
// it is map to nullptr (see comment on CanPeelLoop).
void GetIteratingExitValues();
// Returns true if a for-loop has no instruction with effects before the
// condition check.
bool IsConditionCheckSideEffectFree() const;
// Creates a new basic block and insert it between |bb| and the predecessor of
// |bb|.
ir::BasicBlock* CreateBlockBefore(ir::BasicBlock* bb);
// Inserts code to only execute |loop| only if the given |condition| is true.
// |if_merge| is a suitable basic block to be used by the if condition as
// merge block.
// The function returns the if block protecting the loop.
ir::BasicBlock* ProtectLoop(ir::Loop* loop, ir::Instruction* condition,
ir::BasicBlock* if_merge);
};
// Implements a loop peeling optimization.
// For each loop, the pass will try to peel it if there is conditions that
// are true for the "N" first or last iterations of the loop.
// To avoid code size explosion, too large loops will not be peeled.
class LoopPeelingPass : public Pass {
public:
// Describes the peeling direction.
enum class PeelDirection {
kNone, // Cannot peel
kBefore, // Can peel before
kAfter // Can peel last
};
// Holds some statistics about peeled function.
struct LoopPeelingStats {
std::vector<std::tuple<const ir::Loop*, PeelDirection, uint32_t>>
peeled_loops_;
};
LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {}
// Sets the loop peeling growth threshold. If the code size increase is above
// |code_grow_threshold|, the loop will not be peeled. The code size is
// measured in terms of SPIR-V instructions.
static void SetLoopPeelingThreshold(size_t code_grow_threshold) {
code_grow_threshold_ = code_grow_threshold;
}
// Returns the loop peeling code growth threshold.
static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; }
const char* name() const override { return "loop-peeling"; }
// Processes the given |module|. Returns Status::Failure if errors occur when
// processing. Returns the corresponding Status::Success if processing is
// succesful to indicate whether changes have been made to the modue.
Pass::Status Process(ir::IRContext* context) override;
private:
// Describes the peeling direction.
enum class CmpOperator {
kLT, // less than
kGT, // greater than
kLE, // less than or equal
kGE, // greater than or equal
};
class LoopPeelingInfo {
public:
using Direction = std::pair<PeelDirection, uint32_t>;
LoopPeelingInfo(ir::Loop* loop, size_t loop_max_iterations,
opt::ScalarEvolutionAnalysis* scev_analysis)
: context_(loop->GetContext()),
loop_(loop),
scev_analysis_(scev_analysis),
loop_max_iterations_(loop_max_iterations) {}
// Returns by how much and to which direction a loop should be peeled to
// make the conditional branch of the basic block |bb| an unconditional
// branch. If |bb|'s terminator is not a conditional branch or the condition
// is not workable then it returns PeelDirection::kNone and a 0 factor.
Direction GetPeelingInfo(ir::BasicBlock* bb) const;
private:
// Returns the id of the loop invariant operand of the conditional
// expression |condition|. It returns if no operand is invariant.
uint32_t GetFirstLoopInvariantOperand(ir::Instruction* condition) const;
// Returns the id of the non loop invariant operand of the conditional
// expression |condition|. It returns if all operands are invariant.
uint32_t GetFirstNonLoopInvariantOperand(ir::Instruction* condition) const;
// Returns the value of |rec| at the first loop iteration.
SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const;
// Returns the value of |rec| at the given |iteration|.
SExpression GetValueAtIteration(SERecurrentNode* rec,
int64_t iteration) const;
// Returns the value of |rec| at the last loop iteration.
SExpression GetValueAtLastIteration(SERecurrentNode* rec) const;
bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs,
bool* result) const;
Direction HandleEquality(SExpression lhs, SExpression rhs) const;
Direction HandleInequality(CmpOperator cmp_op, SExpression lhs,
SERecurrentNode* rhs) const;
static Direction GetNoneDirection() {
return Direction{LoopPeelingPass::PeelDirection::kNone, 0};
}
ir::IRContext* context_;
ir::Loop* loop_;
opt::ScalarEvolutionAnalysis* scev_analysis_;
size_t loop_max_iterations_;
};
// Peel profitable loops in |f|.
bool ProcessFunction(ir::Function* f);
// Peel |loop| if profitable.
std::pair<bool, ir::Loop*> ProcessLoop(ir::Loop* loop,
CodeMetrics* loop_size);
static size_t code_grow_threshold_;
LoopPeelingStats* stats_;
};
} // namespace opt
} // namespace spvtools
#endif // SOURCE_OPT_LOOP_PEELING_H_