[wasm][serialization] Allocate code in large chunks

On most platforms, we can do a single allocation for all code. On
platforms where this is not possible (e.g. ARM64 has a 128MB code space
limit), we will at least allocate big chunks instead of one chunk per
function. This reduces overhead in {WasmCodeAllocator} for maintaining
sets of used and available code space, and reduces locking during
deserialization.

In order to know how much code space to pre-allocate, the serializer
writes out the total code space size. This is then used during
deserialization to know how much code to expect.

R=thibaudm@chromium.org

Bug: v8:11164
Change-Id: If3846292544c7b6832b7a0b56357b74310f6fb23
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2644942
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Thibaud Michaud <thibaudm@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72317}
This commit is contained in:
Clemens Backes 2021-01-26 12:58:50 +01:00 committed by Commit Bot
parent 2145c6c7b3
commit e284517ba8
3 changed files with 79 additions and 25 deletions

View File

@ -1185,20 +1185,23 @@ WasmCode* NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
return result;
}
std::unique_ptr<WasmCode> NativeModule::AllocateDeserializedCode(
int index, Vector<const byte> instructions, int stack_slots,
Vector<uint8_t> NativeModule::AllocateForDeserializedCode(
size_t total_code_size) {
return code_allocator_.AllocateForCode(this, total_code_size);
}
std::unique_ptr<WasmCode> NativeModule::AddDeserializedCode(
int index, Vector<byte> instructions, int stack_slots,
int tagged_parameter_slots, int safepoint_table_offset,
int handler_table_offset, int constant_pool_offset,
int code_comments_offset, int unpadded_binary_size,
Vector<const byte> protected_instructions_data,
Vector<const byte> reloc_info, Vector<const byte> source_position_table,
WasmCode::Kind kind, ExecutionTier tier) {
Vector<uint8_t> dst_code_bytes =
code_allocator_.AllocateForCode(this, instructions.size());
UpdateCodeSize(dst_code_bytes.size(), tier, kNoDebugging);
UpdateCodeSize(instructions.size(), tier, kNoDebugging);
return std::unique_ptr<WasmCode>{new WasmCode{
this, index, dst_code_bytes, stack_slots, tagged_parameter_slots,
this, index, instructions, stack_slots, tagged_parameter_slots,
safepoint_table_offset, handler_table_offset, constant_pool_offset,
code_comments_offset, unpadded_binary_size, protected_instructions_data,
reloc_info, source_position_table, kind, tier, kNoDebugging}};

View File

@ -500,8 +500,10 @@ class V8_EXPORT_PRIVATE NativeModule final {
WasmCode* PublishCode(std::unique_ptr<WasmCode>);
std::vector<WasmCode*> PublishCode(Vector<std::unique_ptr<WasmCode>>);
std::unique_ptr<WasmCode> AllocateDeserializedCode(
int index, Vector<const byte> instructions, int stack_slots,
Vector<uint8_t> AllocateForDeserializedCode(size_t total_code_size);
std::unique_ptr<WasmCode> AddDeserializedCode(
int index, Vector<byte> instructions, int stack_slots,
int tagged_parameter_slots, int safepoint_table_offset,
int handler_table_offset, int constant_pool_offset,
int code_comments_offset, int unpadded_binary_size,

View File

@ -188,7 +188,8 @@ uint32_t GetWasmCalleeTag(RelocInfo* rinfo) {
constexpr size_t kHeaderSize =
sizeof(uint32_t) + // total wasm function count
sizeof(uint32_t); // imported functions (index of first wasm function)
sizeof(uint32_t) + // imported functions (index of first wasm function)
sizeof(size_t); // total code size
constexpr size_t kCodeHeaderSize = sizeof(bool) + // whether code is present
sizeof(int) + // offset of constant pool
@ -285,17 +286,18 @@ class V8_EXPORT_PRIVATE NativeModuleSerializer {
private:
size_t MeasureCode(const WasmCode*) const;
void WriteHeader(Writer*);
void WriteHeader(Writer*, size_t total_code_size);
bool WriteCode(const WasmCode*, Writer*);
const NativeModule* const native_module_;
Vector<WasmCode* const> code_table_;
bool write_called_;
const Vector<WasmCode* const> code_table_;
bool write_called_ = false;
size_t total_written_code_ = 0;
};
NativeModuleSerializer::NativeModuleSerializer(
const NativeModule* module, Vector<WasmCode* const> code_table)
: native_module_(module), code_table_(code_table), write_called_(false) {
: native_module_(module), code_table_(code_table) {
DCHECK_NOT_NULL(native_module_);
// TODO(mtrofin): persist the export wrappers. Ideally, we'd only persist
// the unique ones, i.e. the cache.
@ -320,12 +322,14 @@ size_t NativeModuleSerializer::Measure() const {
return size;
}
void NativeModuleSerializer::WriteHeader(Writer* writer) {
void NativeModuleSerializer::WriteHeader(Writer* writer,
size_t total_code_size) {
// TODO(eholk): We need to properly preserve the flag whether the trap
// handler was used or not when serializing.
writer->Write(native_module_->num_functions());
writer->Write(native_module_->num_imported_functions());
writer->Write(total_code_size);
}
bool NativeModuleSerializer::WriteCode(const WasmCode* code, Writer* writer) {
@ -431,6 +435,7 @@ bool NativeModuleSerializer::WriteCode(const WasmCode* code, Writer* writer) {
if (code_start != serialized_code_start) {
base::Memcpy(serialized_code_start, code_start, code_size);
}
total_written_code_ += code_size;
return true;
}
@ -438,11 +443,22 @@ bool NativeModuleSerializer::Write(Writer* writer) {
DCHECK(!write_called_);
write_called_ = true;
WriteHeader(writer);
size_t total_code_size = 0;
for (WasmCode* code : code_table_) {
if (code && code->tier() == ExecutionTier::kTurbofan) {
DCHECK(IsAligned(code->instructions().size(), kCodeAlignment));
total_code_size += code->instructions().size();
}
}
WriteHeader(writer, total_code_size);
for (WasmCode* code : code_table_) {
if (!WriteCode(code, writer)) return false;
}
// Make sure that the serialized total code size was correct.
CHECK_EQ(total_written_code_, total_code_size);
return true;
}
@ -512,12 +528,18 @@ class V8_EXPORT_PRIVATE NativeModuleDeserializer {
friend class PublishTask;
bool ReadHeader(Reader* reader);
DeserializationUnit ReadCodeAndAlloc(int fn_index, Reader* reader);
DeserializationUnit ReadCode(int fn_index, Reader* reader);
void CopyAndRelocate(const DeserializationUnit& unit);
void Publish(std::vector<DeserializationUnit> batch);
NativeModule* const native_module_;
bool read_called_;
#ifdef DEBUG
bool read_called_ = false;
#endif
// Updated in {ReadCode}.
size_t remaining_code_size_ = 0;
Vector<byte> current_code_space_;
};
class CopyAndRelocTask : public JobTask {
@ -586,15 +608,18 @@ class PublishTask : public JobTask {
};
NativeModuleDeserializer::NativeModuleDeserializer(NativeModule* native_module)
: native_module_(native_module), read_called_(false) {}
: native_module_(native_module) {}
bool NativeModuleDeserializer::Read(Reader* reader) {
DCHECK(!read_called_);
#ifdef DEBUG
read_called_ = true;
#endif
if (!ReadHeader(reader)) return false;
uint32_t total_fns = native_module_->num_functions();
uint32_t first_wasm_fn = native_module_->num_imported_functions();
WasmCodeRefScope wasm_code_ref_scope;
DeserializationQueue reloc_queue;
@ -613,7 +638,7 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
std::vector<DeserializationUnit> batch;
const byte* batch_start = reader->current_location();
for (uint32_t i = first_wasm_fn; i < total_fns; ++i) {
DeserializationUnit unit = ReadCodeAndAlloc(i, reader);
DeserializationUnit unit = ReadCode(i, reader);
if (!unit.code) continue;
batch.emplace_back(std::move(unit));
uint64_t batch_size_in_bytes = reader->current_location() - batch_start;
@ -626,6 +651,11 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
}
}
// We should have read the expected amount of code now, and should have fully
// utilized the allocated code space.
DCHECK_EQ(0, remaining_code_size_);
DCHECK_EQ(0, current_code_space_.size());
if (!batch.empty()) {
reloc_queue.Add(std::move(batch));
copy_and_reloc_handle->NotifyConcurrencyIncrease();
@ -639,13 +669,14 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
}
bool NativeModuleDeserializer::ReadHeader(Reader* reader) {
size_t functions = reader->Read<uint32_t>();
size_t imports = reader->Read<uint32_t>();
uint32_t functions = reader->Read<uint32_t>();
uint32_t imports = reader->Read<uint32_t>();
remaining_code_size_ = reader->Read<size_t>();
return functions == native_module_->num_functions() &&
imports == native_module_->num_imported_functions();
}
DeserializationUnit NativeModuleDeserializer::ReadCodeAndAlloc(int fn_index,
DeserializationUnit NativeModuleDeserializer::ReadCode(int fn_index,
Reader* reader) {
bool has_code = reader->Read<bool>();
if (!has_code) {
@ -668,14 +699,32 @@ DeserializationUnit NativeModuleDeserializer::ReadCodeAndAlloc(int fn_index,
WasmCode::Kind kind = reader->Read<WasmCode::Kind>();
ExecutionTier tier = reader->Read<ExecutionTier>();
DCHECK(IsAligned(code_size, kCodeAlignment));
DCHECK_GE(remaining_code_size_, code_size);
if (current_code_space_.size() < static_cast<size_t>(code_size)) {
// Allocate the next code space. Don't allocate more than 90% of
// {kMaxCodeSpaceSize}, to leave some space for jump tables.
constexpr size_t kMaxReservation =
RoundUp<kCodeAlignment>(WasmCodeAllocator::kMaxCodeSpaceSize * 9 / 10);
size_t code_space_size = std::min(kMaxReservation, remaining_code_size_);
current_code_space_ =
native_module_->AllocateForDeserializedCode(code_space_size);
DCHECK_EQ(current_code_space_.size(), code_space_size);
}
DeserializationUnit unit;
unit.src_code_buffer = reader->ReadVector<byte>(code_size);
auto reloc_info = reader->ReadVector<byte>(reloc_size);
auto source_pos = reader->ReadVector<byte>(source_position_size);
auto protected_instructions =
reader->ReadVector<byte>(protected_instructions_size);
unit.code = native_module_->AllocateDeserializedCode(
fn_index, unit.src_code_buffer, stack_slot_count, tagged_parameter_slots,
Vector<uint8_t> instructions = current_code_space_.SubVector(0, code_size);
current_code_space_ += code_size;
remaining_code_size_ -= code_size;
unit.code = native_module_->AddDeserializedCode(
fn_index, instructions, stack_slot_count, tagged_parameter_slots,
safepoint_table_offset, handler_table_offset, constant_pool_offset,
code_comment_offset, unpadded_binary_size, protected_instructions,
reloc_info, source_pos, kind, tier);