[wasm][serialization] Allocate code in large chunks
On most platforms, we can do a single allocation for all code. On platforms where this is not possible (e.g. ARM64 has a 128MB code space limit), we will at least allocate big chunks instead of one chunk per function. This reduces overhead in {WasmCodeAllocator} for maintaining sets of used and available code space, and reduces locking during deserialization. In order to know how much code space to pre-allocate, the serializer writes out the total code space size. This is then used during deserialization to know how much code to expect. R=thibaudm@chromium.org Bug: v8:11164 Change-Id: If3846292544c7b6832b7a0b56357b74310f6fb23 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2644942 Commit-Queue: Clemens Backes <clemensb@chromium.org> Reviewed-by: Thibaud Michaud <thibaudm@chromium.org> Cr-Commit-Position: refs/heads/master@{#72317}
This commit is contained in:
parent
2145c6c7b3
commit
e284517ba8
@ -1185,20 +1185,23 @@ WasmCode* NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::unique_ptr<WasmCode> NativeModule::AllocateDeserializedCode(
|
||||
int index, Vector<const byte> instructions, int stack_slots,
|
||||
Vector<uint8_t> NativeModule::AllocateForDeserializedCode(
|
||||
size_t total_code_size) {
|
||||
return code_allocator_.AllocateForCode(this, total_code_size);
|
||||
}
|
||||
|
||||
std::unique_ptr<WasmCode> NativeModule::AddDeserializedCode(
|
||||
int index, Vector<byte> instructions, int stack_slots,
|
||||
int tagged_parameter_slots, int safepoint_table_offset,
|
||||
int handler_table_offset, int constant_pool_offset,
|
||||
int code_comments_offset, int unpadded_binary_size,
|
||||
Vector<const byte> protected_instructions_data,
|
||||
Vector<const byte> reloc_info, Vector<const byte> source_position_table,
|
||||
WasmCode::Kind kind, ExecutionTier tier) {
|
||||
Vector<uint8_t> dst_code_bytes =
|
||||
code_allocator_.AllocateForCode(this, instructions.size());
|
||||
UpdateCodeSize(dst_code_bytes.size(), tier, kNoDebugging);
|
||||
UpdateCodeSize(instructions.size(), tier, kNoDebugging);
|
||||
|
||||
return std::unique_ptr<WasmCode>{new WasmCode{
|
||||
this, index, dst_code_bytes, stack_slots, tagged_parameter_slots,
|
||||
this, index, instructions, stack_slots, tagged_parameter_slots,
|
||||
safepoint_table_offset, handler_table_offset, constant_pool_offset,
|
||||
code_comments_offset, unpadded_binary_size, protected_instructions_data,
|
||||
reloc_info, source_position_table, kind, tier, kNoDebugging}};
|
||||
|
@ -500,8 +500,10 @@ class V8_EXPORT_PRIVATE NativeModule final {
|
||||
WasmCode* PublishCode(std::unique_ptr<WasmCode>);
|
||||
std::vector<WasmCode*> PublishCode(Vector<std::unique_ptr<WasmCode>>);
|
||||
|
||||
std::unique_ptr<WasmCode> AllocateDeserializedCode(
|
||||
int index, Vector<const byte> instructions, int stack_slots,
|
||||
Vector<uint8_t> AllocateForDeserializedCode(size_t total_code_size);
|
||||
|
||||
std::unique_ptr<WasmCode> AddDeserializedCode(
|
||||
int index, Vector<byte> instructions, int stack_slots,
|
||||
int tagged_parameter_slots, int safepoint_table_offset,
|
||||
int handler_table_offset, int constant_pool_offset,
|
||||
int code_comments_offset, int unpadded_binary_size,
|
||||
|
@ -188,7 +188,8 @@ uint32_t GetWasmCalleeTag(RelocInfo* rinfo) {
|
||||
|
||||
constexpr size_t kHeaderSize =
|
||||
sizeof(uint32_t) + // total wasm function count
|
||||
sizeof(uint32_t); // imported functions (index of first wasm function)
|
||||
sizeof(uint32_t) + // imported functions (index of first wasm function)
|
||||
sizeof(size_t); // total code size
|
||||
|
||||
constexpr size_t kCodeHeaderSize = sizeof(bool) + // whether code is present
|
||||
sizeof(int) + // offset of constant pool
|
||||
@ -285,17 +286,18 @@ class V8_EXPORT_PRIVATE NativeModuleSerializer {
|
||||
|
||||
private:
|
||||
size_t MeasureCode(const WasmCode*) const;
|
||||
void WriteHeader(Writer*);
|
||||
void WriteHeader(Writer*, size_t total_code_size);
|
||||
bool WriteCode(const WasmCode*, Writer*);
|
||||
|
||||
const NativeModule* const native_module_;
|
||||
Vector<WasmCode* const> code_table_;
|
||||
bool write_called_;
|
||||
const Vector<WasmCode* const> code_table_;
|
||||
bool write_called_ = false;
|
||||
size_t total_written_code_ = 0;
|
||||
};
|
||||
|
||||
NativeModuleSerializer::NativeModuleSerializer(
|
||||
const NativeModule* module, Vector<WasmCode* const> code_table)
|
||||
: native_module_(module), code_table_(code_table), write_called_(false) {
|
||||
: native_module_(module), code_table_(code_table) {
|
||||
DCHECK_NOT_NULL(native_module_);
|
||||
// TODO(mtrofin): persist the export wrappers. Ideally, we'd only persist
|
||||
// the unique ones, i.e. the cache.
|
||||
@ -320,12 +322,14 @@ size_t NativeModuleSerializer::Measure() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
void NativeModuleSerializer::WriteHeader(Writer* writer) {
|
||||
void NativeModuleSerializer::WriteHeader(Writer* writer,
|
||||
size_t total_code_size) {
|
||||
// TODO(eholk): We need to properly preserve the flag whether the trap
|
||||
// handler was used or not when serializing.
|
||||
|
||||
writer->Write(native_module_->num_functions());
|
||||
writer->Write(native_module_->num_imported_functions());
|
||||
writer->Write(total_code_size);
|
||||
}
|
||||
|
||||
bool NativeModuleSerializer::WriteCode(const WasmCode* code, Writer* writer) {
|
||||
@ -431,6 +435,7 @@ bool NativeModuleSerializer::WriteCode(const WasmCode* code, Writer* writer) {
|
||||
if (code_start != serialized_code_start) {
|
||||
base::Memcpy(serialized_code_start, code_start, code_size);
|
||||
}
|
||||
total_written_code_ += code_size;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -438,11 +443,22 @@ bool NativeModuleSerializer::Write(Writer* writer) {
|
||||
DCHECK(!write_called_);
|
||||
write_called_ = true;
|
||||
|
||||
WriteHeader(writer);
|
||||
size_t total_code_size = 0;
|
||||
for (WasmCode* code : code_table_) {
|
||||
if (code && code->tier() == ExecutionTier::kTurbofan) {
|
||||
DCHECK(IsAligned(code->instructions().size(), kCodeAlignment));
|
||||
total_code_size += code->instructions().size();
|
||||
}
|
||||
}
|
||||
WriteHeader(writer, total_code_size);
|
||||
|
||||
for (WasmCode* code : code_table_) {
|
||||
if (!WriteCode(code, writer)) return false;
|
||||
}
|
||||
|
||||
// Make sure that the serialized total code size was correct.
|
||||
CHECK_EQ(total_written_code_, total_code_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -512,12 +528,18 @@ class V8_EXPORT_PRIVATE NativeModuleDeserializer {
|
||||
friend class PublishTask;
|
||||
|
||||
bool ReadHeader(Reader* reader);
|
||||
DeserializationUnit ReadCodeAndAlloc(int fn_index, Reader* reader);
|
||||
DeserializationUnit ReadCode(int fn_index, Reader* reader);
|
||||
void CopyAndRelocate(const DeserializationUnit& unit);
|
||||
void Publish(std::vector<DeserializationUnit> batch);
|
||||
|
||||
NativeModule* const native_module_;
|
||||
bool read_called_;
|
||||
#ifdef DEBUG
|
||||
bool read_called_ = false;
|
||||
#endif
|
||||
|
||||
// Updated in {ReadCode}.
|
||||
size_t remaining_code_size_ = 0;
|
||||
Vector<byte> current_code_space_;
|
||||
};
|
||||
|
||||
class CopyAndRelocTask : public JobTask {
|
||||
@ -586,15 +608,18 @@ class PublishTask : public JobTask {
|
||||
};
|
||||
|
||||
NativeModuleDeserializer::NativeModuleDeserializer(NativeModule* native_module)
|
||||
: native_module_(native_module), read_called_(false) {}
|
||||
: native_module_(native_module) {}
|
||||
|
||||
bool NativeModuleDeserializer::Read(Reader* reader) {
|
||||
DCHECK(!read_called_);
|
||||
#ifdef DEBUG
|
||||
read_called_ = true;
|
||||
#endif
|
||||
|
||||
if (!ReadHeader(reader)) return false;
|
||||
uint32_t total_fns = native_module_->num_functions();
|
||||
uint32_t first_wasm_fn = native_module_->num_imported_functions();
|
||||
|
||||
WasmCodeRefScope wasm_code_ref_scope;
|
||||
|
||||
DeserializationQueue reloc_queue;
|
||||
@ -613,7 +638,7 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
|
||||
std::vector<DeserializationUnit> batch;
|
||||
const byte* batch_start = reader->current_location();
|
||||
for (uint32_t i = first_wasm_fn; i < total_fns; ++i) {
|
||||
DeserializationUnit unit = ReadCodeAndAlloc(i, reader);
|
||||
DeserializationUnit unit = ReadCode(i, reader);
|
||||
if (!unit.code) continue;
|
||||
batch.emplace_back(std::move(unit));
|
||||
uint64_t batch_size_in_bytes = reader->current_location() - batch_start;
|
||||
@ -626,6 +651,11 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
|
||||
}
|
||||
}
|
||||
|
||||
// We should have read the expected amount of code now, and should have fully
|
||||
// utilized the allocated code space.
|
||||
DCHECK_EQ(0, remaining_code_size_);
|
||||
DCHECK_EQ(0, current_code_space_.size());
|
||||
|
||||
if (!batch.empty()) {
|
||||
reloc_queue.Add(std::move(batch));
|
||||
copy_and_reloc_handle->NotifyConcurrencyIncrease();
|
||||
@ -639,14 +669,15 @@ bool NativeModuleDeserializer::Read(Reader* reader) {
|
||||
}
|
||||
|
||||
bool NativeModuleDeserializer::ReadHeader(Reader* reader) {
|
||||
size_t functions = reader->Read<uint32_t>();
|
||||
size_t imports = reader->Read<uint32_t>();
|
||||
uint32_t functions = reader->Read<uint32_t>();
|
||||
uint32_t imports = reader->Read<uint32_t>();
|
||||
remaining_code_size_ = reader->Read<size_t>();
|
||||
return functions == native_module_->num_functions() &&
|
||||
imports == native_module_->num_imported_functions();
|
||||
}
|
||||
|
||||
DeserializationUnit NativeModuleDeserializer::ReadCodeAndAlloc(int fn_index,
|
||||
Reader* reader) {
|
||||
DeserializationUnit NativeModuleDeserializer::ReadCode(int fn_index,
|
||||
Reader* reader) {
|
||||
bool has_code = reader->Read<bool>();
|
||||
if (!has_code) {
|
||||
DCHECK(FLAG_wasm_lazy_compilation ||
|
||||
@ -668,14 +699,32 @@ DeserializationUnit NativeModuleDeserializer::ReadCodeAndAlloc(int fn_index,
|
||||
WasmCode::Kind kind = reader->Read<WasmCode::Kind>();
|
||||
ExecutionTier tier = reader->Read<ExecutionTier>();
|
||||
|
||||
DCHECK(IsAligned(code_size, kCodeAlignment));
|
||||
DCHECK_GE(remaining_code_size_, code_size);
|
||||
if (current_code_space_.size() < static_cast<size_t>(code_size)) {
|
||||
// Allocate the next code space. Don't allocate more than 90% of
|
||||
// {kMaxCodeSpaceSize}, to leave some space for jump tables.
|
||||
constexpr size_t kMaxReservation =
|
||||
RoundUp<kCodeAlignment>(WasmCodeAllocator::kMaxCodeSpaceSize * 9 / 10);
|
||||
size_t code_space_size = std::min(kMaxReservation, remaining_code_size_);
|
||||
current_code_space_ =
|
||||
native_module_->AllocateForDeserializedCode(code_space_size);
|
||||
DCHECK_EQ(current_code_space_.size(), code_space_size);
|
||||
}
|
||||
|
||||
DeserializationUnit unit;
|
||||
unit.src_code_buffer = reader->ReadVector<byte>(code_size);
|
||||
auto reloc_info = reader->ReadVector<byte>(reloc_size);
|
||||
auto source_pos = reader->ReadVector<byte>(source_position_size);
|
||||
auto protected_instructions =
|
||||
reader->ReadVector<byte>(protected_instructions_size);
|
||||
unit.code = native_module_->AllocateDeserializedCode(
|
||||
fn_index, unit.src_code_buffer, stack_slot_count, tagged_parameter_slots,
|
||||
|
||||
Vector<uint8_t> instructions = current_code_space_.SubVector(0, code_size);
|
||||
current_code_space_ += code_size;
|
||||
remaining_code_size_ -= code_size;
|
||||
|
||||
unit.code = native_module_->AddDeserializedCode(
|
||||
fn_index, instructions, stack_slot_count, tagged_parameter_slots,
|
||||
safepoint_table_offset, handler_table_offset, constant_pool_offset,
|
||||
code_comment_offset, unpadded_binary_size, protected_instructions,
|
||||
reloc_info, source_pos, kind, tier);
|
||||
|
Loading…
Reference in New Issue
Block a user