// Copyright (c) 2018 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef SOURCE_OPT_LOOP_PEELING_H_ #define SOURCE_OPT_LOOP_PEELING_H_ #include #include #include #include #include #include #include #include "opt/ir_context.h" #include "opt/loop_descriptor.h" #include "opt/loop_utils.h" #include "opt/pass.h" #include "opt/scalar_analysis.h" namespace spvtools { namespace opt { // Utility class to perform the peeling of a given loop. // The loop peeling transformation make a certain amount of a loop iterations to // be executed either before (peel before) or after (peel after) the transformed // loop. // // For peeling cases the transformation does the following steps: // - It clones the loop and inserts the cloned loop before the original loop; // - It connects all iterating values of the cloned loop with the // corresponding original loop values so that the second loop starts with // the appropriate values. // - It inserts a new induction variable "i" is inserted into the cloned that // starts with the value 0 and increment by step of one. // // The last step is specific to each case: // - Peel before: the transformation is to peel the "N" first iterations. // The exit condition of the cloned loop is changed so that the loop // exits when "i < N" becomes false. The original loop is then protected to // only execute if there is any iteration left to do. // - Peel after: the transformation is to peel the "N" last iterations, // then the exit condition of the cloned loop is changed so that the loop // exits when "i + N < max_iteration" becomes false, where "max_iteration" // is the upper bound of the loop. The cloned loop is then protected to // only execute if there is any iteration left to do no covered by the // second. // // To be peelable: // - The loop must be in LCSSA form; // - The loop must not contain any breaks; // - The loop must not have any ambiguous iterators updates (see // "CanPeelLoop"). // The method "CanPeelLoop" checks that those constrained are met. class LoopPeeling { public: // LoopPeeling constructor. // |loop| is the loop to peel. // |loop_iteration_count| is the instruction holding the |loop| iteration // count, must be invariant for |loop| and must be of an int 32 type (signed // or unsigned). // |canonical_induction_variable| is an induction variable that can be used to // count the number of iterations, must be of the same type as // |loop_iteration_count| and start at 0 and increase by step of one at each // iteration. The value nullptr is interpreted as no suitable variable exists // and one will be created. LoopPeeling(ir::Loop* loop, ir::Instruction* loop_iteration_count, ir::Instruction* canonical_induction_variable = nullptr) : context_(loop->GetContext()), loop_utils_(loop->GetContext(), loop), loop_(loop), loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count) ? loop_iteration_count : nullptr), int_type_(nullptr), original_loop_canonical_induction_variable_( canonical_induction_variable), canonical_induction_variable_(nullptr) { if (loop_iteration_count_) { int_type_ = context_->get_type_mgr() ->GetType(loop_iteration_count_->type_id()) ->AsInteger(); if (canonical_induction_variable_) { assert(canonical_induction_variable_->type_id() == loop_iteration_count_->type_id() && "loop_iteration_count and canonical_induction_variable do not " "have the same type"); } } GetIteratingExitValues(); } // Returns true if the loop can be peeled. // To be peelable, all operation involved in the update of the loop iterators // must not dominates the exit condition. This restriction is a work around to // not miss compile code like: // // for (int i = 0; i + 1 < N; i++) {} // for (int i = 0; ++i < N; i++) {} // // The increment will happen before the test on the exit condition leading to // very look-a-like code. // // This restriction will not apply if a loop rotate is applied before (i.e. // becomes a do-while loop). bool CanPeelLoop() const { ir::CFG& cfg = *context_->cfg(); if (!loop_iteration_count_) { return false; } if (!int_type_) { return false; } if (int_type_->width() != 32) { return false; } if (!loop_->IsLCSSA()) { return false; } if (!loop_->GetMergeBlock()) { return false; } if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) { return false; } if (!IsConditionCheckSideEffectFree()) { return false; } return !std::any_of(exit_value_.cbegin(), exit_value_.cend(), [](std::pair it) { return it.second == nullptr; }); } // Moves the execution of the |factor| first iterations of the loop into a // dedicated loop. void PeelBefore(uint32_t factor); // Moves the execution of the |factor| last iterations of the loop into a // dedicated loop. void PeelAfter(uint32_t factor); // Returns the cloned loop. ir::Loop* GetClonedLoop() { return cloned_loop_; } // Returns the original loop. ir::Loop* GetOriginalLoop() { return loop_; } private: ir::IRContext* context_; LoopUtils loop_utils_; // The original loop. ir::Loop* loop_; // The initial |loop_| upper bound. ir::Instruction* loop_iteration_count_; // The int type to use for the canonical_induction_variable_. analysis::Integer* int_type_; // The cloned loop. ir::Loop* cloned_loop_; // This is set to true when the exit and back-edge branch instruction is the // same. bool do_while_form_; // The canonical induction variable from the original loop if it exists. ir::Instruction* original_loop_canonical_induction_variable_; // The canonical induction variable of the cloned loop. The induction variable // is initialized to 0 and incremented by step of 1. ir::Instruction* canonical_induction_variable_; // Map between loop iterators and exit values. Loop iterators std::unordered_map exit_value_; // Duplicate |loop_| and place the new loop before the cloned loop. Iterating // values from the cloned loop are then connected to the original loop as // initializer. void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results); // Insert the canonical induction variable into the first loop as a simplified // counter. void InsertCanonicalInductionVariable( LoopUtils::LoopCloningResult* clone_results); // Fixes the exit condition of the before loop. The function calls // |condition_builder| to get the condition to use in the conditional branch // of the loop exit. The loop will be exited if the condition evaluate to // true. |condition_builder| takes an ir::Instruction* that represent the // insertion point. void FixExitCondition( const std::function& condition_builder); // Gathers all operations involved in the update of |iterator| into // |operations|. void GetIteratorUpdateOperations( const ir::Loop* loop, ir::Instruction* iterator, std::unordered_set* operations); // Gathers exiting iterator values. The function builds a map between each // iterating value in the loop (a phi instruction in the loop header) and its // SSA value when it exit the loop. If no exit value can be accurately found, // it is map to nullptr (see comment on CanPeelLoop). void GetIteratingExitValues(); // Returns true if a for-loop has no instruction with effects before the // condition check. bool IsConditionCheckSideEffectFree() const; // Creates a new basic block and insert it between |bb| and the predecessor of // |bb|. ir::BasicBlock* CreateBlockBefore(ir::BasicBlock* bb); // Inserts code to only execute |loop| only if the given |condition| is true. // |if_merge| is a suitable basic block to be used by the if condition as // merge block. // The function returns the if block protecting the loop. ir::BasicBlock* ProtectLoop(ir::Loop* loop, ir::Instruction* condition, ir::BasicBlock* if_merge); }; // Implements a loop peeling optimization. // For each loop, the pass will try to peel it if there is conditions that // are true for the "N" first or last iterations of the loop. // To avoid code size explosion, too large loops will not be peeled. class LoopPeelingPass : public Pass { public: // Describes the peeling direction. enum class PeelDirection { kNone, // Cannot peel kBefore, // Can peel before kAfter // Can peel last }; // Holds some statistics about peeled function. struct LoopPeelingStats { std::vector> peeled_loops_; }; LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {} // Sets the loop peeling growth threshold. If the code size increase is above // |code_grow_threshold|, the loop will not be peeled. The code size is // measured in terms of SPIR-V instructions. static void SetLoopPeelingThreshold(size_t code_grow_threshold) { code_grow_threshold_ = code_grow_threshold; } // Returns the loop peeling code growth threshold. static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; } const char* name() const override { return "loop-peeling"; } // Processes the given |module|. Returns Status::Failure if errors occur when // processing. Returns the corresponding Status::Success if processing is // succesful to indicate whether changes have been made to the modue. Pass::Status Process(ir::IRContext* context) override; private: // Describes the peeling direction. enum class CmpOperator { kLT, // less than kGT, // greater than kLE, // less than or equal kGE, // greater than or equal }; class LoopPeelingInfo { public: using Direction = std::pair; LoopPeelingInfo(ir::Loop* loop, size_t loop_max_iterations, opt::ScalarEvolutionAnalysis* scev_analysis) : context_(loop->GetContext()), loop_(loop), scev_analysis_(scev_analysis), loop_max_iterations_(loop_max_iterations) {} // Returns by how much and to which direction a loop should be peeled to // make the conditional branch of the basic block |bb| an unconditional // branch. If |bb|'s terminator is not a conditional branch or the condition // is not workable then it returns PeelDirection::kNone and a 0 factor. Direction GetPeelingInfo(ir::BasicBlock* bb) const; private: // Returns the id of the loop invariant operand of the conditional // expression |condition|. It returns if no operand is invariant. uint32_t GetFirstLoopInvariantOperand(ir::Instruction* condition) const; // Returns the id of the non loop invariant operand of the conditional // expression |condition|. It returns if all operands are invariant. uint32_t GetFirstNonLoopInvariantOperand(ir::Instruction* condition) const; // Returns the value of |rec| at the first loop iteration. SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const; // Returns the value of |rec| at the given |iteration|. SExpression GetValueAtIteration(SERecurrentNode* rec, int64_t iteration) const; // Returns the value of |rec| at the last loop iteration. SExpression GetValueAtLastIteration(SERecurrentNode* rec) const; bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs, bool* result) const; Direction HandleEquality(SExpression lhs, SExpression rhs) const; Direction HandleInequality(CmpOperator cmp_op, SExpression lhs, SERecurrentNode* rhs) const; static Direction GetNoneDirection() { return Direction{LoopPeelingPass::PeelDirection::kNone, 0}; } ir::IRContext* context_; ir::Loop* loop_; opt::ScalarEvolutionAnalysis* scev_analysis_; size_t loop_max_iterations_; }; // Peel profitable loops in |f|. bool ProcessFunction(ir::Function* f); // Peel |loop| if profitable. std::pair ProcessLoop(ir::Loop* loop, CodeMetrics* loop_size); static size_t code_grow_threshold_; LoopPeelingStats* stats_; }; } // namespace opt } // namespace spvtools #endif // SOURCE_OPT_LOOP_PEELING_H_