mirror of
https://github.com/KhronosGroup/SPIRV-Tools
synced 2024-10-19 03:20:14 +00:00
224 lines
8.4 KiB
C
224 lines
8.4 KiB
C
|
// Copyright (c) 2018 Google LLC.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
#ifndef SOURCE_OPT_LOOP_PEELING_H_
|
||
|
#define SOURCE_OPT_LOOP_PEELING_H_
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <limits>
|
||
|
#include <memory>
|
||
|
#include <unordered_map>
|
||
|
#include <unordered_set>
|
||
|
#include <utility>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "opt/ir_context.h"
|
||
|
#include "opt/loop_descriptor.h"
|
||
|
#include "opt/loop_utils.h"
|
||
|
|
||
|
namespace spvtools {
|
||
|
namespace opt {
|
||
|
|
||
|
// Utility class to perform the peeling of a given loop.
|
||
|
// The loop peeling transformation make a certain amount of a loop iterations to
|
||
|
// be executed either before (peel before) or after (peel after) the transformed
|
||
|
// loop.
|
||
|
//
|
||
|
// For peeling cases the transformation does the following steps:
|
||
|
// - It clones the loop and inserts the cloned loop before the original loop;
|
||
|
// - It connects all iterating values of the cloned loop with the
|
||
|
// corresponding original loop values so that the second loop starts with
|
||
|
// the appropriate values.
|
||
|
// - It inserts a new induction variable "i" is inserted into the cloned that
|
||
|
// starts with the value 0 and increment by step of one.
|
||
|
//
|
||
|
// The last step is specific to each case:
|
||
|
// - Peel before: the transformation is to peel the "N" first iterations.
|
||
|
// The exit condition of the cloned loop is changed so that the loop
|
||
|
// exits when "i < N" becomes false. The original loop is then protected to
|
||
|
// only execute if there is any iteration left to do.
|
||
|
// - Peel after: the transformation is to peel the "N" last iterations,
|
||
|
// then the exit condition of the cloned loop is changed so that the loop
|
||
|
// exits when "i + N < max_iteration" becomes false, where "max_iteration"
|
||
|
// is the upper bound of the loop. The cloned loop is then protected to
|
||
|
// only execute if there is any iteration left to do no covered by the
|
||
|
// second.
|
||
|
//
|
||
|
// To be peelable:
|
||
|
// - The loop must be in LCSSA form;
|
||
|
// - The loop must not contain any breaks;
|
||
|
// - The loop must not have any ambiguous iterators updates (see
|
||
|
// "CanPeelLoop").
|
||
|
// The method "CanPeelLoop" checks that those constrained are met.
|
||
|
//
|
||
|
// FIXME(Victor): Allow the utility it accept an canonical induction variable
|
||
|
// rather than automatically create one.
|
||
|
// FIXME(Victor): When possible, evaluate the initial value of the second loop
|
||
|
// iterating values rather than using the exit value of the first loop.
|
||
|
// FIXME(Victor): Make the utility work-out the upper bound without having to
|
||
|
// provide it. This should become easy once the scalar evolution is in.
|
||
|
class LoopPeeling {
|
||
|
public:
|
||
|
// LoopPeeling constructor.
|
||
|
// |loop| is the loop to peel.
|
||
|
// |loop_iteration_count| is the instruction holding the |loop| iteration
|
||
|
// count, must be invariant for |loop| and must be of an int 32 type (signed
|
||
|
// or unsigned).
|
||
|
LoopPeeling(ir::IRContext* context, ir::Loop* loop,
|
||
|
ir::Instruction* loop_iteration_count)
|
||
|
: context_(context),
|
||
|
loop_utils_(context, loop),
|
||
|
loop_(loop),
|
||
|
loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
|
||
|
? loop_iteration_count
|
||
|
: nullptr),
|
||
|
int_type_(nullptr),
|
||
|
canonical_induction_variable_(nullptr) {
|
||
|
if (loop_iteration_count_) {
|
||
|
int_type_ = context_->get_type_mgr()
|
||
|
->GetType(loop_iteration_count_->type_id())
|
||
|
->AsInteger();
|
||
|
}
|
||
|
GetIteratingExitValues();
|
||
|
}
|
||
|
|
||
|
// Returns true if the loop can be peeled.
|
||
|
// To be peelable, all operation involved in the update of the loop iterators
|
||
|
// must not dominates the exit condition. This restriction is a work around to
|
||
|
// not miss compile code like:
|
||
|
//
|
||
|
// for (int i = 0; i + 1 < N; i++) {}
|
||
|
// for (int i = 0; ++i < N; i++) {}
|
||
|
//
|
||
|
// The increment will happen before the test on the exit condition leading to
|
||
|
// very look-a-like code.
|
||
|
//
|
||
|
// This restriction will not apply if a loop rotate is applied before (i.e.
|
||
|
// becomes a do-while loop).
|
||
|
bool CanPeelLoop() const {
|
||
|
ir::CFG& cfg = *context_->cfg();
|
||
|
|
||
|
if (!loop_iteration_count_) {
|
||
|
return false;
|
||
|
}
|
||
|
if (!int_type_) {
|
||
|
return false;
|
||
|
}
|
||
|
if (int_type_->width() != 32) {
|
||
|
return false;
|
||
|
}
|
||
|
if (!loop_->IsLCSSA()) {
|
||
|
return false;
|
||
|
}
|
||
|
if (!loop_->GetMergeBlock()) {
|
||
|
return false;
|
||
|
}
|
||
|
if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
|
||
|
return false;
|
||
|
}
|
||
|
if (!IsConditionCheckSideEffectFree()) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
|
||
|
[](std::pair<uint32_t, ir::Instruction*> it) {
|
||
|
return it.second == nullptr;
|
||
|
});
|
||
|
}
|
||
|
|
||
|
// Moves the execution of the |factor| first iterations of the loop into a
|
||
|
// dedicated loop.
|
||
|
void PeelBefore(uint32_t factor);
|
||
|
|
||
|
// Moves the execution of the |factor| last iterations of the loop into a
|
||
|
// dedicated loop.
|
||
|
void PeelAfter(uint32_t factor);
|
||
|
|
||
|
// Returns the cloned loop.
|
||
|
ir::Loop* GetClonedLoop() { return cloned_loop_; }
|
||
|
// Returns the original loop.
|
||
|
ir::Loop* GetOriginalLoop() { return loop_; }
|
||
|
|
||
|
private:
|
||
|
ir::IRContext* context_;
|
||
|
LoopUtils loop_utils_;
|
||
|
// The original loop.
|
||
|
ir::Loop* loop_;
|
||
|
// The initial |loop_| upper bound.
|
||
|
ir::Instruction* loop_iteration_count_;
|
||
|
// The int type to use for the canonical_induction_variable_.
|
||
|
analysis::Integer* int_type_;
|
||
|
// The cloned loop.
|
||
|
ir::Loop* cloned_loop_;
|
||
|
// This is set to true when the exit and back-edge branch instruction is the
|
||
|
// same.
|
||
|
bool do_while_form_;
|
||
|
|
||
|
// The canonical induction variable of the cloned loop. The induction variable
|
||
|
// is initialized to 0 and incremented by step of 1.
|
||
|
ir::Instruction* canonical_induction_variable_;
|
||
|
|
||
|
// Map between loop iterators and exit values. Loop iterators
|
||
|
std::unordered_map<uint32_t, ir::Instruction*> exit_value_;
|
||
|
|
||
|
// Duplicate |loop_| and place the new loop before the cloned loop. Iterating
|
||
|
// values from the cloned loop are then connected to the original loop as
|
||
|
// initializer.
|
||
|
void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
|
||
|
|
||
|
// Insert the canonical induction variable into the first loop as a simplified
|
||
|
// counter.
|
||
|
void InsertCanonicalInductionVariable();
|
||
|
|
||
|
// Fixes the exit condition of the before loop. The function calls
|
||
|
// |condition_builder| to get the condition to use in the conditional branch
|
||
|
// of the loop exit. The loop will be exited if the condition evaluate to
|
||
|
// true. |condition_builder| takes an ir::Instruction* that represent the
|
||
|
// insertion point.
|
||
|
void FixExitCondition(
|
||
|
const std::function<uint32_t(ir::Instruction*)>& condition_builder);
|
||
|
|
||
|
// Gathers all operations involved in the update of |iterator| into
|
||
|
// |operations|.
|
||
|
void GetIteratorUpdateOperations(
|
||
|
const ir::Loop* loop, ir::Instruction* iterator,
|
||
|
std::unordered_set<ir::Instruction*>* operations);
|
||
|
|
||
|
// Gathers exiting iterator values. The function builds a map between each
|
||
|
// iterating value in the loop (a phi instruction in the loop header) and its
|
||
|
// SSA value when it exit the loop. If no exit value can be accurately found,
|
||
|
// it is map to nullptr (see comment on CanPeelLoop).
|
||
|
void GetIteratingExitValues();
|
||
|
|
||
|
// Returns true if a for-loop has no instruction with effects before the
|
||
|
// condition check.
|
||
|
bool IsConditionCheckSideEffectFree() const;
|
||
|
|
||
|
// Creates a new basic block and insert it between |bb| and the predecessor of
|
||
|
// |bb|.
|
||
|
ir::BasicBlock* CreateBlockBefore(ir::BasicBlock* bb);
|
||
|
|
||
|
// Inserts code to only execute |loop| only if the given |condition| is true.
|
||
|
// |if_merge| is a suitable basic block to be used by the if condition as
|
||
|
// merge block.
|
||
|
// The function returns the if block protecting the loop.
|
||
|
ir::BasicBlock* ProtectLoop(ir::Loop* loop, ir::Instruction* condition,
|
||
|
ir::BasicBlock* if_merge);
|
||
|
};
|
||
|
|
||
|
} // namespace opt
|
||
|
} // namespace spvtools
|
||
|
|
||
|
#endif // SOURCE_OPT_LOOP_PEELING_H_
|