[wasm] Avoid taking a mutex during Liftoff compilation

The mutex is there to protect against concurrent compilation of the same function. It can be avoided by accumulating the vector of call targets locally in the LiftoffCompiler, and only transferring it to the stored type feedback once at the end (under the mutex). Also, choose slightly better data structures: base::OwnedVector uses less memory that std::vector (because it never over-reserves), and std::unordered_map is more memory efficient and faster to lookup that {std::map}. R=jkummerow@chromium.org Bug: v8:13209 Change-Id: I7aa82560a83cbac5c019effc7fd38c9b1495a42c Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3840294 Commit-Queue: Clemens Backes <clemensb@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/main@{#82602}
2022-08-19 15:15:59 +02:00 · 2022-08-19 15:15:59 +02:00 · 660938b7b1
commit 660938b7b1
parent df59f2176e
3 changed files with 37 additions and 25 deletions
--- a/src/wasm/baseline/liftoff-compiler.cc
+++ b/src/wasm/baseline/liftoff-compiler.cc
@ -558,9 +558,9 @@ class LiftoffCompiler {
  }

  int GetFeedbackVectorSlots() const {
-    // The number of instructions is capped by max function size.
+    // The number of call instructions is capped by max function size.
    static_assert(kV8MaxWasmFunctionSize < std::numeric_limits<int>::max() / 2);
-    return static_cast<int>(num_call_instructions_) * 2;
+    return static_cast<int>(encountered_call_instructions_.size()) * 2;
  }

  void unsupported(FullDecoder* decoder, LiftoffBailoutReason reason,
@ -1058,6 +1058,22 @@ class LiftoffCompiler {
    // The previous calls may have also generated a bailout.
    DidAssemblerBailout(decoder);
    DCHECK_EQ(num_exceptions_, 0);
+
+    if (FLAG_wasm_speculative_inlining &&
+        !encountered_call_instructions_.empty()) {
+      // Update the call targets stored in the WasmModule.
+      TypeFeedbackStorage& type_feedback = env_->module->type_feedback;
+      base::MutexGuard mutex_guard(&type_feedback.mutex);
+      base::OwnedVector<uint32_t>& call_targets =
+          type_feedback.feedback_for_function[func_index_].call_targets;
+      if (call_targets.empty()) {
+        call_targets =
+            base::OwnedVector<uint32_t>::Of(encountered_call_instructions_);
+      } else {
+        DCHECK_EQ(call_targets.as_vector(),
+                  base::VectorOf(encountered_call_instructions_));
+      }
+    }
  }

  void OnFirstError(FullDecoder* decoder) {
@ -6986,12 +7002,9 @@ class LiftoffCompiler {

    // One slot would be enough for call_direct, but would make index
    // computations much more complicated.
-    uintptr_t vector_slot = num_call_instructions_ * 2;
+    size_t vector_slot = encountered_call_instructions_.size() * 2;
    if (FLAG_wasm_speculative_inlining) {
-      base::MutexGuard mutex_guard(&decoder->module_->type_feedback.mutex);
-      decoder->module_->type_feedback.feedback_for_function[func_index_]
-          .call_targets.push_back(imm.index);
-      num_call_instructions_++;
+      encountered_call_instructions_.push_back(imm.index);
    }

    if (imm.index < env_->module->num_imported_functions) {
@ -7234,13 +7247,9 @@ class LiftoffCompiler {
      __ Fill(vector, liftoff::kFeedbackVectorOffset, kPointerKind);
      LiftoffAssembler::VarState vector_var(kPointerKind, vector, 0);
      LiftoffRegister index = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
-      uintptr_t vector_slot = num_call_instructions_ * 2;
-      {
-        base::MutexGuard mutex_guard(&decoder->module_->type_feedback.mutex);
-        decoder->module_->type_feedback.feedback_for_function[func_index_]
-            .call_targets.push_back(FunctionTypeFeedback::kNonDirectCall);
-      }
-      num_call_instructions_++;
+      size_t vector_slot = encountered_call_instructions_.size() * 2;
+      encountered_call_instructions_.push_back(
+          FunctionTypeFeedback::kNonDirectCall);
      __ LoadConstant(index, WasmValue::ForUintPtr(vector_slot));
      LiftoffAssembler::VarState index_var(kIntPtrKind, index, 0);

@ -7574,10 +7583,11 @@ class LiftoffCompiler {
  // Current number of exception refs on the stack.
  int num_exceptions_ = 0;

-  // Number of feedback-collecting call instructions encountered. While
-  // compiling, also index of the next such instruction. Used for indexing type
-  // feedback.
-  uintptr_t num_call_instructions_ = 0;
+  // Updated during compilation on every "call" or "call_ref" instruction.
+  // Holds the call target, or {FunctionTypeFeedback::kNonDirectCall} for
+  // "call_ref".
+  // After compilation, this is transferred into {WasmModule::type_feedback}.
+  std::vector<uint32_t> encountered_call_instructions_;

  int32_t* max_steps_;
  int32_t* nondeterminism_;
--- a/src/wasm/module-compiler.cc
+++ b/src/wasm/module-compiler.cc
@ -1285,8 +1285,8 @@ class TransitiveTypeFeedbackProcessor {
  TransitiveTypeFeedbackProcessor(WasmInstanceObject instance, int func_index)
      : instance_(instance),
        module_(instance.module()),
+        mutex_guard(&module_->type_feedback.mutex),
        feedback_for_function_(module_->type_feedback.feedback_for_function) {
-    base::MutexGuard mutex_guard(&module_->type_feedback.mutex);
    queue_.insert(func_index);
    while (!queue_.empty()) {
      auto next = queue_.cbegin();
@ -1319,8 +1319,9 @@ class TransitiveTypeFeedbackProcessor {
  DisallowGarbageCollection no_gc_scope_;
  WasmInstanceObject instance_;
  const WasmModule* const module_;
-  std::map<uint32_t, FunctionTypeFeedback>& feedback_for_function_;
-  std::unordered_set<int> queue_;
+  base::MutexGuard mutex_guard;
+  std::unordered_map<uint32_t, FunctionTypeFeedback>& feedback_for_function_;
+  std::set<int> queue_;
 };

 class FeedbackMaker {
@ -1407,8 +1408,9 @@ void TransitiveTypeFeedbackProcessor::Process(int func_index) {
  Object maybe_feedback = instance_.feedback_vectors().get(which_vector);
  if (!maybe_feedback.IsFixedArray()) return;
  FixedArray feedback = FixedArray::cast(maybe_feedback);
-  const std::vector<uint32_t>& call_direct_targets =
-      module_->type_feedback.feedback_for_function[func_index].call_targets;
+  base::Vector<uint32_t> call_direct_targets =
+      module_->type_feedback.feedback_for_function[func_index]
+          .call_targets.as_vector();
  FeedbackMaker fm(instance_, func_index, feedback.length() / 2);
  for (int i = 0; i < feedback.length(); i += 2) {
    Object value = feedback.get(i);
--- a/src/wasm/wasm-module.h
+++ b/src/wasm/wasm-module.h
@ -467,7 +467,7 @@ struct FunctionTypeFeedback {
  // {call_targets} has one entry per "call" and "call_ref" in the function.
  // For "call", it holds the index of the called function, for "call_ref" the
  // value will be {kNonDirectCall}.
-  std::vector<uint32_t> call_targets;
+  base::OwnedVector<uint32_t> call_targets;

  // {tierup_priority} is updated and used when triggering tier-up.
  // TODO(clemensb): This does not belong here; find a better place.
@ -477,7 +477,7 @@ struct FunctionTypeFeedback {
 };

 struct TypeFeedbackStorage {
-  std::map<uint32_t, FunctionTypeFeedback> feedback_for_function;
+  std::unordered_map<uint32_t, FunctionTypeFeedback> feedback_for_function;
  // Accesses to {feedback_for_function} are guarded by this mutex.
  base::Mutex mutex;
 };