[wasm][serialization] Allocate code in large chunks

On most platforms, we can do a single allocation for all code. On platforms where this is not possible (e.g. ARM64 has a 128MB code space limit), we will at least allocate big chunks instead of one chunk per function. This reduces overhead in {WasmCodeAllocator} for maintaining sets of used and available code space, and reduces locking during deserialization. In order to know how much code space to pre-allocate, the serializer writes out the total code space size. This is then used during deserialization to know how much code to expect. R=thibaudm@chromium.org Bug: v8:11164 Change-Id: If3846292544c7b6832b7a0b56357b74310f6fb23 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2644942 Commit-Queue: Clemens Backes <clemensb@chromium.org> Reviewed-by: Thibaud Michaud <thibaudm@chromium.org> Cr-Commit-Position: refs/heads/master@{#72317}
2021-01-26 12:58:50 +01:00 · 2021-01-26 12:58:50 +01:00 · e284517ba8
commit e284517ba8
parent 2145c6c7b3
3 changed files with 79 additions and 25 deletions
--- a/src/wasm/wasm-code-manager.cc
+++ b/src/wasm/wasm-code-manager.cc
@ -1185,20 +1185,23 @@ WasmCode* NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
  return result;
 }

-std::unique_ptr<WasmCode> NativeModule::AllocateDeserializedCode(
-    int index, Vector<const byte> instructions, int stack_slots,
+Vector<uint8_t> NativeModule::AllocateForDeserializedCode(
+    size_t total_code_size) {
+  return code_allocator_.AllocateForCode(this, total_code_size);
+}
+
+std::unique_ptr<WasmCode> NativeModule::AddDeserializedCode(
+    int index, Vector<byte> instructions, int stack_slots,
    int tagged_parameter_slots, int safepoint_table_offset,
    int handler_table_offset, int constant_pool_offset,
    int code_comments_offset, int unpadded_binary_size,
    Vector<const byte> protected_instructions_data,
    Vector<const byte> reloc_info, Vector<const byte> source_position_table,
    WasmCode::Kind kind, ExecutionTier tier) {
-  Vector<uint8_t> dst_code_bytes =
-      code_allocator_.AllocateForCode(this, instructions.size());
-  UpdateCodeSize(dst_code_bytes.size(), tier, kNoDebugging);
+  UpdateCodeSize(instructions.size(), tier, kNoDebugging);

  return std::unique_ptr<WasmCode>{new WasmCode{
-      this, index, dst_code_bytes, stack_slots, tagged_parameter_slots,
+      this, index, instructions, stack_slots, tagged_parameter_slots,
      safepoint_table_offset, handler_table_offset, constant_pool_offset,
      code_comments_offset, unpadded_binary_size, protected_instructions_data,
      reloc_info, source_position_table, kind, tier, kNoDebugging}};
--- a/src/wasm/wasm-code-manager.h
+++ b/src/wasm/wasm-code-manager.h
@ -500,8 +500,10 @@ class V8_EXPORT_PRIVATE NativeModule final {
  WasmCode* PublishCode(std::unique_ptr<WasmCode>);
  std::vector<WasmCode*> PublishCode(Vector<std::unique_ptr<WasmCode>>);

-  std::unique_ptr<WasmCode> AllocateDeserializedCode(
-      int index, Vector<const byte> instructions, int stack_slots,
+  Vector<uint8_t> AllocateForDeserializedCode(size_t total_code_size);
+
+  std::unique_ptr<WasmCode> AddDeserializedCode(
+      int index, Vector<byte> instructions, int stack_slots,
      int tagged_parameter_slots, int safepoint_table_offset,
      int handler_table_offset, int constant_pool_offset,
      int code_comments_offset, int unpadded_binary_size,
--- a/src/wasm/wasm-serialization.cc
+++ b/src/wasm/wasm-serialization.cc
@ -188,7 +188,8 @@ uint32_t GetWasmCalleeTag(RelocInfo* rinfo) {

 constexpr size_t kHeaderSize =
    sizeof(uint32_t) +  // total wasm function count
-    sizeof(uint32_t);   // imported functions (index of first wasm function)
+    sizeof(uint32_t) +  // imported functions (index of first wasm function)
+    sizeof(size_t);     // total code size

 constexpr size_t kCodeHeaderSize = sizeof(bool) +  // whether code is present
                                   sizeof(int) +   // offset of constant pool
@ -285,17 +286,18 @@ class V8_EXPORT_PRIVATE NativeModuleSerializer {

 private:
  size_t MeasureCode(const WasmCode*) const;
-  void WriteHeader(Writer*);
+  void WriteHeader(Writer*, size_t total_code_size);
  bool WriteCode(const WasmCode*, Writer*);

  const NativeModule* const native_module_;
-  Vector<WasmCode* const> code_table_;
-  bool write_called_;
+  const Vector<WasmCode* const> code_table_;
+  bool write_called_ = false;
+  size_t total_written_code_ = 0;
 };

 NativeModuleSerializer::NativeModuleSerializer(
    const NativeModule* module, Vector<WasmCode* const> code_table)
-    : native_module_(module), code_table_(code_table), write_called_(false) {
+    : native_module_(module), code_table_(code_table) {
  DCHECK_NOT_NULL(native_module_);
  // TODO(mtrofin): persist the export wrappers. Ideally, we'd only persist
  // the unique ones, i.e. the cache.
@ -320,12 +322,14 @@ size_t NativeModuleSerializer::Measure() const {
  return size;
 }

-void NativeModuleSerializer::WriteHeader(Writer* writer) {
+void NativeModuleSerializer::WriteHeader(Writer* writer,
+                                         size_t total_code_size) {
  // TODO(eholk): We need to properly preserve the flag whether the trap
  // handler was used or not when serializing.

  writer->Write(native_module_->num_functions());
  writer->Write(native_module_->num_imported_functions());
+  writer->Write(total_code_size);
 }

 bool NativeModuleSerializer::WriteCode(const WasmCode* code, Writer* writer) {
@ -431,6 +435,7 @@ bool NativeModuleSerializer::WriteCode(const WasmCode* code, Writer* writer) {
  if (code_start != serialized_code_start) {
    base::Memcpy(serialized_code_start, code_start, code_size);
  }
+  total_written_code_ += code_size;
  return true;
 }

@ -438,11 +443,22 @@ bool NativeModuleSerializer::Write(Writer* writer) {
  DCHECK(!write_called_);
  write_called_ = true;

-  WriteHeader(writer);
+  size_t total_code_size = 0;
+  for (WasmCode* code : code_table_) {
+    if (code && code->tier() == ExecutionTier::kTurbofan) {
+      DCHECK(IsAligned(code->instructions().size(), kCodeAlignment));
+      total_code_size += code->instructions().size();
+    }
+  }
+  WriteHeader(writer, total_code_size);

  for (WasmCode* code : code_table_) {
    if (!WriteCode(code, writer)) return false;
  }
+
+  // Make sure that the serialized total code size was correct.
+  CHECK_EQ(total_written_code_, total_code_size);
+
  return true;
 }

@ -512,12 +528,18 @@ class V8_EXPORT_PRIVATE NativeModuleDeserializer {
  friend class PublishTask;

  bool ReadHeader(Reader* reader);
-  DeserializationUnit ReadCodeAndAlloc(int fn_index, Reader* reader);
+  DeserializationUnit ReadCode(int fn_index, Reader* reader);
  void CopyAndRelocate(const DeserializationUnit& unit);
  void Publish(std::vector<DeserializationUnit> batch);

  NativeModule* const native_module_;
-  bool read_called_;
+#ifdef DEBUG
+  bool read_called_ = false;
+#endif
+
+  // Updated in {ReadCode}.
+  size_t remaining_code_size_ = 0;
+  Vector<byte> current_code_space_;
 };

 class CopyAndRelocTask : public JobTask {
@ -586,15 +608,18 @@ class PublishTask : public JobTask {
 };

 NativeModuleDeserializer::NativeModuleDeserializer(NativeModule* native_module)
-    : native_module_(native_module), read_called_(false) {}
+    : native_module_(native_module) {}

 bool NativeModuleDeserializer::Read(Reader* reader) {
  DCHECK(!read_called_);
+#ifdef DEBUG
  read_called_ = true;
+#endif

  if (!ReadHeader(reader)) return false;
  uint32_t total_fns = native_module_->num_functions();
  uint32_t first_wasm_fn = native_module_->num_imported_functions();
+
  WasmCodeRefScope wasm_code_ref_scope;

  DeserializationQueue reloc_queue;
@ -613,7 +638,7 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
  std::vector<DeserializationUnit> batch;
  const byte* batch_start = reader->current_location();
  for (uint32_t i = first_wasm_fn; i < total_fns; ++i) {
-    DeserializationUnit unit = ReadCodeAndAlloc(i, reader);
+    DeserializationUnit unit = ReadCode(i, reader);
    if (!unit.code) continue;
    batch.emplace_back(std::move(unit));
    uint64_t batch_size_in_bytes = reader->current_location() - batch_start;
@ -626,6 +651,11 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
    }
  }

+  // We should have read the expected amount of code now, and should have fully
+  // utilized the allocated code space.
+  DCHECK_EQ(0, remaining_code_size_);
+  DCHECK_EQ(0, current_code_space_.size());
+
  if (!batch.empty()) {
    reloc_queue.Add(std::move(batch));
    copy_and_reloc_handle->NotifyConcurrencyIncrease();
@ -639,14 +669,15 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
 }

 bool NativeModuleDeserializer::ReadHeader(Reader* reader) {
-  size_t functions = reader->Read<uint32_t>();
-  size_t imports = reader->Read<uint32_t>();
+  uint32_t functions = reader->Read<uint32_t>();
+  uint32_t imports = reader->Read<uint32_t>();
+  remaining_code_size_ = reader->Read<size_t>();
  return functions == native_module_->num_functions() &&
         imports == native_module_->num_imported_functions();
 }

-DeserializationUnit NativeModuleDeserializer::ReadCodeAndAlloc(int fn_index,
-                                                               Reader* reader) {
+DeserializationUnit NativeModuleDeserializer::ReadCode(int fn_index,
+                                                       Reader* reader) {
  bool has_code = reader->Read<bool>();
  if (!has_code) {
    DCHECK(FLAG_wasm_lazy_compilation ||
@ -668,14 +699,32 @@ DeserializationUnit NativeModuleDeserializer::ReadCodeAndAlloc(int fn_index,
  WasmCode::Kind kind = reader->Read<WasmCode::Kind>();
  ExecutionTier tier = reader->Read<ExecutionTier>();

+  DCHECK(IsAligned(code_size, kCodeAlignment));
+  DCHECK_GE(remaining_code_size_, code_size);
+  if (current_code_space_.size() < static_cast<size_t>(code_size)) {
+    // Allocate the next code space. Don't allocate more than 90% of
+    // {kMaxCodeSpaceSize}, to leave some space for jump tables.
+    constexpr size_t kMaxReservation =
+        RoundUp<kCodeAlignment>(WasmCodeAllocator::kMaxCodeSpaceSize * 9 / 10);
+    size_t code_space_size = std::min(kMaxReservation, remaining_code_size_);
+    current_code_space_ =
+        native_module_->AllocateForDeserializedCode(code_space_size);
+    DCHECK_EQ(current_code_space_.size(), code_space_size);
+  }
+
  DeserializationUnit unit;
  unit.src_code_buffer = reader->ReadVector<byte>(code_size);
  auto reloc_info = reader->ReadVector<byte>(reloc_size);
  auto source_pos = reader->ReadVector<byte>(source_position_size);
  auto protected_instructions =
      reader->ReadVector<byte>(protected_instructions_size);
-  unit.code = native_module_->AllocateDeserializedCode(
-      fn_index, unit.src_code_buffer, stack_slot_count, tagged_parameter_slots,
+
+  Vector<uint8_t> instructions = current_code_space_.SubVector(0, code_size);
+  current_code_space_ += code_size;
+  remaining_code_size_ -= code_size;
+
+  unit.code = native_module_->AddDeserializedCode(
+      fn_index, instructions, stack_slot_count, tagged_parameter_slots,
      safepoint_table_offset, handler_table_offset, constant_pool_offset,
      code_comment_offset, unpadded_binary_size, protected_instructions,
      reloc_info, source_pos, kind, tier);