SPIRV-Tools/source/opt/loop_peeling.h

// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef SOURCE_OPT_LOOP_PEELING_H_
#define SOURCE_OPT_LOOP_PEELING_H_

#include <algorithm>
#include <limits>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#include "opt/ir_context.h"
#include "opt/loop_descriptor.h"
#include "opt/loop_utils.h"

namespace spvtools {
namespace opt {

// Utility class to perform the peeling of a given loop.
// The loop peeling transformation make a certain amount of a loop iterations to
// be executed either before (peel before) or after (peel after) the transformed
// loop.
//
// For peeling cases the transformation does the following steps:
//   - It clones the loop and inserts the cloned loop before the original loop;
//   - It connects all iterating values of the cloned loop with the
//     corresponding original loop values so that the second loop starts with
//     the appropriate values.
//   - It inserts a new induction variable "i" is inserted into the cloned that
//     starts with the value 0 and increment by step of one.
//
// The last step is specific to each case:
//   - Peel before: the transformation is to peel the "N" first iterations.
//     The exit condition of the cloned loop is changed so that the loop
//     exits when "i < N" becomes false. The original loop is then protected to
//     only execute if there is any iteration left to do.
//   - Peel after: the transformation is to peel the "N" last iterations,
//     then the exit condition of the cloned loop is changed so that the loop
//     exits when "i + N < max_iteration" becomes false, where "max_iteration"
//     is the upper bound of the loop. The cloned loop is then protected to
//     only execute if there is any iteration left to do no covered by the
//     second.
//
// To be peelable:
//   - The loop must be in LCSSA form;
//   - The loop must not contain any breaks;
//   - The loop must not have any ambiguous iterators updates (see
//     "CanPeelLoop").
// The method "CanPeelLoop" checks that those constrained are met.
//
// FIXME(Victor): Allow the utility it accept an canonical induction variable
// rather than automatically create one.
// FIXME(Victor): When possible, evaluate the initial value of the second loop
// iterating values rather than using the exit value of the first loop.
// FIXME(Victor): Make the utility work-out the upper bound without having to
// provide it. This should become easy once the scalar evolution is in.
class LoopPeeling {
 public:
  // LoopPeeling constructor.
  // |loop| is the loop to peel.
  // |loop_iteration_count| is the instruction holding the |loop| iteration
  // count, must be invariant for |loop| and must be of an int 32 type (signed
  // or unsigned).
  LoopPeeling(ir::IRContext* context, ir::Loop* loop,
              ir::Instruction* loop_iteration_count)
      : context_(context),
        loop_utils_(context, loop),
        loop_(loop),
        loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
                                  ? loop_iteration_count
                                  : nullptr),
        int_type_(nullptr),
        canonical_induction_variable_(nullptr) {
    if (loop_iteration_count_) {
      int_type_ = context_->get_type_mgr()
                      ->GetType(loop_iteration_count_->type_id())
                      ->AsInteger();
    }
    GetIteratingExitValues();
  }

  // Returns true if the loop can be peeled.
  // To be peelable, all operation involved in the update of the loop iterators
  // must not dominates the exit condition. This restriction is a work around to
  // not miss compile code like:
  //
  //   for (int i = 0; i + 1 < N; i++) {}
  //   for (int i = 0; ++i < N; i++) {}
  //
  // The increment will happen before the test on the exit condition leading to
  // very look-a-like code.
  //
  // This restriction will not apply if a loop rotate is applied before (i.e.
  // becomes a do-while loop).
  bool CanPeelLoop() const {
    ir::CFG& cfg = *context_->cfg();

    if (!loop_iteration_count_) {
      return false;
    }
    if (!int_type_) {
      return false;
    }
    if (int_type_->width() != 32) {
      return false;
    }
    if (!loop_->IsLCSSA()) {
      return false;
    }
    if (!loop_->GetMergeBlock()) {
      return false;
    }
    if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
      return false;
    }
    if (!IsConditionCheckSideEffectFree()) {
      return false;
    }

    return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
                        [](std::pair<uint32_t, ir::Instruction*> it) {
                          return it.second == nullptr;
                        });
  }

  // Moves the execution of the |factor| first iterations of the loop into a
  // dedicated loop.
  void PeelBefore(uint32_t factor);

  // Moves the execution of the |factor| last iterations of the loop into a
  // dedicated loop.
  void PeelAfter(uint32_t factor);

  // Returns the cloned loop.
  ir::Loop* GetClonedLoop() { return cloned_loop_; }
  // Returns the original loop.
  ir::Loop* GetOriginalLoop() { return loop_; }

 private:
  ir::IRContext* context_;
  LoopUtils loop_utils_;
  // The original loop.
  ir::Loop* loop_;
  // The initial |loop_| upper bound.
  ir::Instruction* loop_iteration_count_;
  // The int type to use for the canonical_induction_variable_.
  analysis::Integer* int_type_;
  // The cloned loop.
  ir::Loop* cloned_loop_;
  // This is set to true when the exit and back-edge branch instruction is the
  // same.
  bool do_while_form_;

  // The canonical induction variable of the cloned loop. The induction variable
  // is initialized to 0 and incremented by step of 1.
  ir::Instruction* canonical_induction_variable_;

  // Map between loop iterators and exit values. Loop iterators
  std::unordered_map<uint32_t, ir::Instruction*> exit_value_;

  // Duplicate |loop_| and place the new loop before the cloned loop. Iterating
  // values from the cloned loop are then connected to the original loop as
  // initializer.
  void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);

  // Insert the canonical induction variable into the first loop as a simplified
  // counter.
  void InsertCanonicalInductionVariable();

  // Fixes the exit condition of the before loop. The function calls
  // |condition_builder| to get the condition to use in the conditional branch
  // of the loop exit. The loop will be exited if the condition evaluate to
  // true. |condition_builder| takes an ir::Instruction* that represent the
  // insertion point.
  void FixExitCondition(
      const std::function<uint32_t(ir::Instruction*)>& condition_builder);

  // Gathers all operations involved in the update of |iterator| into
  // |operations|.
  void GetIteratorUpdateOperations(
      const ir::Loop* loop, ir::Instruction* iterator,
      std::unordered_set<ir::Instruction*>* operations);

  // Gathers exiting iterator values. The function builds a map between each
  // iterating value in the loop (a phi instruction in the loop header) and its
  // SSA value when it exit the loop. If no exit value can be accurately found,
  // it is map to nullptr (see comment on CanPeelLoop).
  void GetIteratingExitValues();

  // Returns true if a for-loop has no instruction with effects before the
  // condition check.
  bool IsConditionCheckSideEffectFree() const;

  // Creates a new basic block and insert it between |bb| and the predecessor of
  // |bb|.
  ir::BasicBlock* CreateBlockBefore(ir::BasicBlock* bb);

  // Inserts code to only execute |loop| only if the given |condition| is true.
  // |if_merge| is a suitable basic block to be used by the if condition as
  // merge block.
  // The function returns the if block protecting the loop.
  ir::BasicBlock* ProtectLoop(ir::Loop* loop, ir::Instruction* condition,
                              ir::BasicBlock* if_merge);
};

}  // namespace opt
}  // namespace spvtools

#endif  // SOURCE_OPT_LOOP_PEELING_H_