[wasm-hints] Resolve Performance Problem

Locks for compilation state callbacks and for the native module are again taken one after the other. As a consequence, publishing compiled Wasm code again happens in parallel. Compile times are now comparable to before lazy hints were enabled. Bug: chromium:949050 Change-Id: I45c52254d046de080938bd131fd3ed8116660bef Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1552787 Commit-Queue: Frederik Gossen <frgossen@google.com> Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Reviewed-by: Clemens Hammacher <clemensh@chromium.org> Cr-Commit-Position: refs/heads/master@{#60646}
2019-04-05 12:36:06 +02:00 · 2019-04-05 12:36:06 +02:00 · fa7c0ed288
commit fa7c0ed288
parent 7d714b61b4
9 changed files with 85 additions and 89 deletions
--- a/src/compiler/wasm-compiler.cc
+++ b/src/compiler/wasm-compiler.cc
@ -5948,7 +5948,7 @@ wasm::WasmCode* CompileWasmMathIntrinsic(wasm::WasmEngine* wasm_engine,
      std::move(result.source_positions), wasm::WasmCode::kFunction,
      wasm::ExecutionTier::kNone);
  // TODO(titzer): add counters for math intrinsic code size / allocation
-  return native_module->PublishCode(std::move(wasm_code)).code;
+  return native_module->PublishCode(std::move(wasm_code));
 }

 wasm::WasmCode* CompileWasmImportCallWrapper(wasm::WasmEngine* wasm_engine,
@ -6012,7 +6012,7 @@ wasm::WasmCode* CompileWasmImportCallWrapper(wasm::WasmEngine* wasm_engine,
      std::move(result.protected_instructions),
      std::move(result.source_positions), wasm::WasmCode::kWasmToJsWrapper,
      wasm::ExecutionTier::kNone);
-  return native_module->PublishCode(std::move(wasm_code)).code;
+  return native_module->PublishCode(std::move(wasm_code));
 }

 wasm::WasmCompilationResult CompileWasmInterpreterEntry(
--- a/src/wasm/compilation-environment.h
+++ b/src/wasm/compilation-environment.h
@ -21,7 +21,6 @@ namespace wasm {

 class NativeModule;
 class WasmCode;
-struct WasmCompilationResult;
 class WasmError;

 enum RuntimeExceptionSupport : bool {
@ -121,8 +120,8 @@ class CompilationState {

  bool failed() const;

-  void FinishUnit(WasmCompilationResult);
-  void FinishUnits(Vector<WasmCompilationResult>);
+  void OnFinishedUnit(WasmCode*);
+  void OnFinishedUnits(Vector<WasmCode*>);

 private:
  friend class NativeModule;
--- a/src/wasm/module-compiler.cc
+++ b/src/wasm/module-compiler.cc
@ -142,8 +142,8 @@ class CompilationStateImpl {
  void AddTopTierCompilationUnit(std::unique_ptr<WasmCompilationUnit>);
  std::unique_ptr<WasmCompilationUnit> GetNextCompilationUnit();

-  void FinishUnit(WasmCompilationResult);
-  void FinishUnits(Vector<WasmCompilationResult>);
+  void OnFinishedUnit(WasmCode*);
+  void OnFinishedUnits(Vector<WasmCode*>);

  void ReportDetectedFeatures(const WasmFeatures& detected);
  void OnBackgroundTaskStopped(const WasmFeatures& detected);
@ -225,6 +225,7 @@ class CompilationStateImpl {

  int outstanding_baseline_functions_ = 0;
  int outstanding_top_tier_functions_ = 0;
+  std::vector<ExecutionTier> highest_execution_tier_;

  // End of fields protected by {callbacks_mutex_}.
  //////////////////////////////////////////////////////////////////////////////
@ -276,12 +277,12 @@ void CompilationState::AddCallback(CompilationState::callback_t callback) {

 bool CompilationState::failed() const { return Impl(this)->failed(); }

-void CompilationState::FinishUnit(WasmCompilationResult result) {
-  Impl(this)->FinishUnit(std::move(result));
+void CompilationState::OnFinishedUnit(WasmCode* code) {
+  Impl(this)->OnFinishedUnit(code);
 }

-void CompilationState::FinishUnits(Vector<WasmCompilationResult> results) {
-  Impl(this)->FinishUnits(results);
+void CompilationState::OnFinishedUnits(Vector<WasmCode*> code_vector) {
+  Impl(this)->OnFinishedUnits(code_vector);
 }

 // static
@ -473,8 +474,7 @@ void CompileLazy(Isolate* isolate, NativeModule* native_module,
      &env, compilation_state->GetWireBytesStorage(), isolate->counters(),
      compilation_state->detected_features());
  WasmCodeRefScope code_ref_scope;
-  WasmCodeUpdate update = native_module->AddCompiledCode(std::move(result));
-  WasmCode* code = update.code;
+  WasmCode* code = native_module->AddCompiledCode(std::move(result));

  if (tiers.baseline_tier < tiers.top_tier) {
    auto tiering_unit = base::make_unique<WasmCompilationUnit>(
@ -533,7 +533,9 @@ bool FetchAndExecuteCompilationUnit(CompilationEnv* env,
      env, compilation_state->GetWireBytesStorage(), counters, detected);

  if (result.succeeded()) {
-    compilation_state->FinishUnit(std::move(result));
+    WasmCodeRefScope code_ref_scope;
+    WasmCode* code = native_module->AddCompiledCode(std::move(result));
+    compilation_state->OnFinishedUnit(code);
  } else {
    compilation_state->SetError();
  }
@ -791,8 +793,12 @@ class BackgroundCompileTask : public CancelableTask {
    auto publish_results =
        [&results_to_publish](BackgroundCompileScope* compile_scope) {
          if (results_to_publish.empty()) return;
-          compile_scope->compilation_state()->FinishUnits(
-              VectorOf(results_to_publish));
+          WasmCodeRefScope code_ref_scope;
+          std::vector<WasmCode*> code_vector =
+              compile_scope->native_module()->AddCompiledCode(
+                  VectorOf(results_to_publish));
+          compile_scope->compilation_state()->OnFinishedUnits(
+              VectorOf(code_vector));
          results_to_publish.clear();
        };

@ -1687,6 +1693,7 @@ void CompilationStateImpl::SetNumberOfFunctionsToCompile(
  int num_functions_to_compile = num_functions - num_lazy_functions;
  outstanding_baseline_functions_ = num_functions_to_compile;
  outstanding_top_tier_functions_ = num_functions_to_compile;
+  highest_execution_tier_.assign(num_functions, ExecutionTier::kNone);
 }

 void CompilationStateImpl::AddCallback(CompilationState::callback_t callback) {
@ -1754,65 +1761,64 @@ CompilationStateImpl::GetNextCompilationUnit() {
  return unit;
 }

-void CompilationStateImpl::FinishUnit(WasmCompilationResult result) {
-  FinishUnits({&result, 1});
+void CompilationStateImpl::OnFinishedUnit(WasmCode* code) {
+  OnFinishedUnits({&code, 1});
 }

-void CompilationStateImpl::FinishUnits(
-    Vector<WasmCompilationResult> compilation_results) {
+void CompilationStateImpl::OnFinishedUnits(Vector<WasmCode*> code_vector) {
  base::MutexGuard guard(&callbacks_mutex_);

  // Assume an order of execution tiers that represents the quality of their
  // generated code.
-  static_assert(ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
+  static_assert(ExecutionTier::kNone < ExecutionTier::kInterpreter &&
+                    ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
                    ExecutionTier::kLiftoff < ExecutionTier::kTurbofan,
                "Assume an order on execution tiers");

  auto module = native_module_->module();
  auto enabled_features = native_module_->enabled_features();
-  WasmCodeRefScope code_ref_scope;
-  std::vector<WasmCodeUpdate> code_update_vector =
-      native_module_->AddCompiledCode(compilation_results);
+  for (WasmCode* code : code_vector) {
+    DCHECK_NOT_NULL(code);
+    DCHECK_NE(code->tier(), ExecutionTier::kNone);
+    native_module_->engine()->LogCode(code);

-  for (WasmCodeUpdate& code_update : code_update_vector) {
-    DCHECK_NOT_NULL(code_update.code);
-    DCHECK(code_update.tier.has_value());
-    native_module_->engine()->LogCode(code_update.code);
+    // Skip lazily compiled code as we do not consider this for the completion
+    // of baseline respectively top tier compilation.
+    int func_index = code->index();
+    if (IsLazyCompilation(module, native_module_, enabled_features,
+                          func_index)) {
+      continue;
+    }

-    uint32_t func_index = code_update.code->index();
+    // Determine whether we are reaching baseline or top tier with the given
+    // code.
+    uint32_t slot_index = code->index() - module->num_imported_functions;
    ExecutionTierPair requested_tiers = GetRequestedExecutionTiers(
        module, compile_mode(), enabled_features, func_index);
-
-    // Reconstruct state before code update.
-    bool had_reached_baseline = code_update.prior_tier.has_value();
-    bool had_reached_top_tier =
-        code_update.prior_tier.has_value() &&
-        code_update.prior_tier.value() >= requested_tiers.top_tier;
-    DCHECK_IMPLIES(had_reached_baseline, code_update.prior_tier.has_value() &&
-                                             code_update.prior_tier.value() >=
-                                                 requested_tiers.baseline_tier);
-
-    // Conclude whether we are reaching baseline or top tier.
+    DCHECK_EQ(highest_execution_tier_.size(), module->num_declared_functions);
+    ExecutionTier prior_tier = highest_execution_tier_[slot_index];
+    bool had_reached_baseline = prior_tier >= requested_tiers.baseline_tier;
+    bool had_reached_top_tier = prior_tier >= requested_tiers.top_tier;
+    DCHECK_IMPLIES(had_reached_baseline, prior_tier > ExecutionTier::kNone);
    bool reaches_baseline = !had_reached_baseline;
    bool reaches_top_tier =
-        !had_reached_top_tier &&
-        code_update.tier.value() >= requested_tiers.top_tier;
+        !had_reached_top_tier && code->tier() >= requested_tiers.top_tier;
    DCHECK_IMPLIES(reaches_baseline,
-                   code_update.tier.value() >= requested_tiers.baseline_tier);
+                   code->tier() >= requested_tiers.baseline_tier);
    DCHECK_IMPLIES(reaches_top_tier, had_reached_baseline || reaches_baseline);

-    // Remember state before update.
+    // Remember compilation state before update.
    bool had_completed_baseline_compilation =
        outstanding_baseline_functions_ == 0;
    bool had_completed_top_tier_compilation =
        outstanding_top_tier_functions_ == 0;

-    // Update state.
-    if (!IsLazyCompilation(module, native_module_, enabled_features,
-                           func_index)) {
-      if (reaches_baseline) outstanding_baseline_functions_--;
-      if (reaches_top_tier) outstanding_top_tier_functions_--;
+    // Update compilation state.
+    if (code->tier() > prior_tier) {
+      highest_execution_tier_[slot_index] = code->tier();
    }
+    if (reaches_baseline) outstanding_baseline_functions_--;
+    if (reaches_top_tier) outstanding_top_tier_functions_--;
    DCHECK_LE(0, outstanding_baseline_functions_);
    DCHECK_LE(outstanding_baseline_functions_, outstanding_top_tier_functions_);

@ -1827,12 +1833,14 @@ void CompilationStateImpl::FinishUnits(

    // Trigger callbacks.
    if (completes_baseline_compilation) {
-      for (auto& callback : callbacks_)
+      for (auto& callback : callbacks_) {
        callback(CompilationEvent::kFinishedBaselineCompilation);
+      }
    }
    if (completes_top_tier_compilation) {
-      for (auto& callback : callbacks_)
+      for (auto& callback : callbacks_) {
        callback(CompilationEvent::kFinishedTopTierCompilation);
+      }
      // Clear the callbacks because no more events will be delivered.
      callbacks_.clear();
    }
--- a/src/wasm/wasm-code-manager.cc
+++ b/src/wasm/wasm-code-manager.cc
@ -624,7 +624,7 @@ WasmCode* NativeModule::AddAndPublishAnonymousCode(Handle<Code> code,
  new_code->MaybePrint(name);
  new_code->Validate();

-  return PublishCode(std::move(new_code)).code;
+  return PublishCode(std::move(new_code));
 }

 std::unique_ptr<WasmCode> NativeModule::AddCode(
@ -710,7 +710,7 @@ std::unique_ptr<WasmCode> NativeModule::AddCodeWithCodeSpace(
  return code;
 }

-WasmCodeUpdate NativeModule::PublishCode(std::unique_ptr<WasmCode> code) {
+WasmCode* NativeModule::PublishCode(std::unique_ptr<WasmCode> code) {
  base::MutexGuard lock(&allocation_mutex_);
  return PublishCodeLocked(std::move(code));
 }
@ -729,10 +729,9 @@ WasmCode::Kind GetCodeKindForExecutionTier(ExecutionTier tier) {
 }
 }  // namespace

-WasmCodeUpdate NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
+WasmCode* NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
  // The caller must hold the {allocation_mutex_}, thus we fail to lock it here.
  DCHECK(!allocation_mutex_.TryLock());
-  WasmCodeUpdate update;

  if (!code->IsAnonymous()) {
    DCHECK_LT(code->index(), num_functions());
@ -740,20 +739,18 @@ WasmCodeUpdate NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {

    // Assume an order of execution tiers that represents the quality of their
    // generated code.
-    static_assert(ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
+    static_assert(ExecutionTier::kNone < ExecutionTier::kInterpreter &&
+                      ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
                      ExecutionTier::kLiftoff < ExecutionTier::kTurbofan,
                  "Assume an order on execution tiers");

    // Update code table but avoid to fall back to less optimized code. We use
-    // the new code if it was compiled with a higher tier and also if we cannot
-    // determine the tier.
+    // the new code if it was compiled with a higher tier.
    uint32_t slot_idx = code->index() - module_->num_imported_functions;
    WasmCode* prior_code = code_table_[slot_idx];
-    if (prior_code) update.prior_tier = prior_code->tier();
-    update.tier = code->tier();
-    bool update_code_table = !update.prior_tier.has_value() ||
-                             !update.tier.has_value() ||
-                             update.prior_tier.value() < update.tier.value();
+    ExecutionTier prior_tier =
+        prior_code == nullptr ? ExecutionTier::kNone : prior_code->tier();
+    bool update_code_table = prior_tier < code->tier();
    if (update_code_table) {
      code_table_[slot_idx] = code.get();
    }
@ -776,9 +773,9 @@ WasmCodeUpdate NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
    }
  }
  WasmCodeRefScope::AddRef(code.get());
-  update.code = code.get();
+  WasmCode* result = code.get();
  owned_code_.emplace_back(std::move(code));
-  return update;
+  return result;
 }

 WasmCode* NativeModule::AddDeserializedCode(
@ -805,7 +802,7 @@ WasmCode* NativeModule::AddDeserializedCode(
  // Note: we do not flush the i-cache here, since the code needs to be
  // relocated anyway. The caller is responsible for flushing the i-cache later.

-  return PublishCode(std::move(code)).code;
+  return PublishCode(std::move(code));
 }

 std::vector<WasmCode*> NativeModule::SnapshotCodeTable() const {
@ -837,7 +834,7 @@ WasmCode* NativeModule::CreateEmptyJumpTable(uint32_t jump_table_size) {
      OwnedVector<const uint8_t>{},             // source_pos
      WasmCode::kJumpTable,                     // kind
      ExecutionTier::kNone}};                   // tier
-  return PublishCode(std::move(code)).code;
+  return PublishCode(std::move(code));
 }

 Vector<byte> NativeModule::AllocateForCode(size_t size) {
@ -1296,11 +1293,11 @@ void NativeModule::SampleCodeSize(
  histogram->AddSample(code_size_mb);
 }

-WasmCodeUpdate NativeModule::AddCompiledCode(WasmCompilationResult result) {
+WasmCode* NativeModule::AddCompiledCode(WasmCompilationResult result) {
  return AddCompiledCode({&result, 1})[0];
 }

-std::vector<WasmCodeUpdate> NativeModule::AddCompiledCode(
+std::vector<WasmCode*> NativeModule::AddCompiledCode(
    Vector<WasmCompilationResult> results) {
  DCHECK(!results.empty());
  // First, allocate code space for all the results.
@ -1331,15 +1328,15 @@ std::vector<WasmCodeUpdate> NativeModule::AddCompiledCode(

  // Under the {allocation_mutex_}, publish the code. The published code is put
  // into the top-most surrounding {WasmCodeRefScope} by {PublishCodeLocked}.
-  std::vector<WasmCodeUpdate> code_updates;
-  code_updates.reserve(results.size());
+  std::vector<WasmCode*> code_vector;
+  code_vector.reserve(results.size());
  {
    base::MutexGuard lock(&allocation_mutex_);
    for (auto& result : generated_code)
-      code_updates.push_back(PublishCodeLocked(std::move(result)));
+      code_vector.push_back(PublishCodeLocked(std::move(result)));
  }

-  return code_updates;
+  return code_vector;
 }

 void NativeModule::FreeCode(Vector<WasmCode* const> codes) {
--- a/src/wasm/wasm-code-manager.h
+++ b/src/wasm/wasm-code-manager.h
@ -74,12 +74,6 @@ class V8_EXPORT_PRIVATE DisjointAllocationPool final {
  DISALLOW_COPY_AND_ASSIGN(DisjointAllocationPool);
 };

-struct WasmCodeUpdate {
-  WasmCode* code = nullptr;
-  base::Optional<ExecutionTier> tier;
-  base::Optional<ExecutionTier> prior_tier;
-};
-
 class V8_EXPORT_PRIVATE WasmCode final {
 public:
  enum Kind {
@ -278,9 +272,9 @@ class V8_EXPORT_PRIVATE NativeModule final {
  // {PublishCode} makes the code available to the system by entering it into
  // the code table and patching the jump table. It returns a raw pointer to the
  // given {WasmCode} object.
-  WasmCodeUpdate PublishCode(std::unique_ptr<WasmCode>);
+  WasmCode* PublishCode(std::unique_ptr<WasmCode>);
  // Hold the {allocation_mutex_} when calling {PublishCodeLocked}.
-  WasmCodeUpdate PublishCodeLocked(std::unique_ptr<WasmCode>);
+  WasmCode* PublishCodeLocked(std::unique_ptr<WasmCode>);

  WasmCode* AddDeserializedCode(
      uint32_t index, Vector<const byte> instructions, uint32_t stack_slots,
@ -406,8 +400,8 @@ class V8_EXPORT_PRIVATE NativeModule final {
  enum CodeSamplingTime : int8_t { kAfterBaseline, kAfterTopTier, kSampling };
  void SampleCodeSize(Counters*, CodeSamplingTime) const;

-  WasmCodeUpdate AddCompiledCode(WasmCompilationResult);
-  std::vector<WasmCodeUpdate> AddCompiledCode(Vector<WasmCompilationResult>);
+  WasmCode* AddCompiledCode(WasmCompilationResult);
+  std::vector<WasmCode*> AddCompiledCode(Vector<WasmCompilationResult>);

  // Free a set of functions of this module. Uncommits whole pages if possible.
  // The given vector must be ordered by the instruction start address, and all
--- a/src/wasm/wasm-tier.h
+++ b/src/wasm/wasm-tier.h
@ -13,10 +13,10 @@ namespace wasm {

 // All the tiers of WASM execution.
 enum class ExecutionTier : int8_t {
+  kNone,
  kInterpreter,
  kLiftoff,
  kTurbofan,
-  kNone,
 };

 }  // namespace wasm
--- a/test/cctest/wasm/test-run-wasm.cc
+++ b/test/cctest/wasm/test-run-wasm.cc
@ -3723,7 +3723,7 @@ TEST(Liftoff_tier_up) {
    desc.instr_size = static_cast<int>(sub_size);
    std::unique_ptr<WasmCode> new_code = native_module->AddCode(
        add.function_index(), desc, 0, 0, {}, OwnedVector<byte>(),
-        WasmCode::kFunction, ExecutionTier::kNone);
+        WasmCode::kFunction, ExecutionTier::kTurbofan);
    native_module->PublishCode(std::move(new_code));

    // Second run should now execute {sub}.
--- a/test/cctest/wasm/wasm-run-utils.cc
+++ b/test/cctest/wasm/wasm-run-utils.cc
@ -501,9 +501,7 @@ void WasmFunctionCompiler::Build(const byte* start, const byte* end) {
  WasmCompilationResult result = unit.ExecuteCompilation(
      &env, native_module->compilation_state()->GetWireBytesStorage(),
      isolate()->counters(), &unused_detected_features);
-  WasmCodeUpdate code_update =
-      native_module->AddCompiledCode(std::move(result));
-  WasmCode* code = code_update.code;
+  WasmCode* code = native_module->AddCompiledCode(std::move(result));
  DCHECK_NOT_NULL(code);
  if (WasmCode::ShouldBeLogged(isolate())) code->LogCode(isolate());
 }
--- a/test/unittests/wasm/wasm-code-manager-unittest.cc
+++ b/test/unittests/wasm/wasm-code-manager-unittest.cc
@ -177,7 +177,7 @@ class WasmCodeManagerTest : public TestWithContext,
    desc.instr_size = static_cast<int>(size);
    std::unique_ptr<WasmCode> code = native_module->AddCode(
        index, desc, 0, 0, {}, {}, WasmCode::kFunction, ExecutionTier::kNone);
-    return native_module->PublishCode(std::move(code)).code;
+    return native_module->PublishCode(std::move(code));
  }

  size_t page() const { return AllocatePageSize(); }