[wasm] Refactor runtime stub table to far jump table

This CL renames the runtime stub table to far jump table, and changes
the implementation so it can later be patched concurrently.
A follow-up CL will extend this table to also contain slots for wasm
functions (if needed).

R=mstarzinger@chromium.org

Bug: v8:9477
Change-Id: I20bf0a0bb66dc0333f794761c1506b27137b53e7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1789159
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63679}
This commit is contained in:
Clemens Hammacher 2019-09-11 15:13:19 +02:00 committed by Commit Bot
parent c8880a232b
commit 8734a486f4
3 changed files with 82 additions and 80 deletions

View File

@ -21,12 +21,6 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
EmitJumpSlot(lazy_compile_target); // 5 bytes
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
movq_imm64(kScratchRegister, builtin_target); // 10 bytes
jmp(kScratchRegister); // 3 bytes
STATIC_ASSERT(kJumpTableStubSlotSize == 13);
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
// On x64, all code is allocated within a single code section, so we can use
// relative jumps.
@ -36,6 +30,18 @@ void JumpTableAssembler::EmitJumpSlot(Address target) {
near_jmp(displacement, RelocInfo::NONE);
}
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
Label data;
int start_offset = pc_offset();
jmp(Operand(&data)); // 6 bytes
Nop(2); // 2 bytes
// The data must be properly aligned, so it can be patched atomically.
DCHECK_EQ(start_offset + 8, pc_offset());
USE(start_offset);
bind(&data);
dq(target); // 8 bytes
}
void JumpTableAssembler::NopBytes(int bytes) {
DCHECK_LE(0, bytes);
Nop(bytes);
@ -48,11 +54,11 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
jmp(lazy_compile_target, RelocInfo::NONE); // 5 bytes
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
jmp(builtin_target, RelocInfo::NONE);
void JumpTableAssembler::EmitJumpSlot(Address target) {
jmp(target, RelocInfo::NONE);
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
jmp(target, RelocInfo::NONE);
}
@ -76,15 +82,6 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
EmitJumpSlot(lazy_compile_target);
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
// Load from [pc + kInstrSize] to pc. Note that {pc} points two instructions
// after the currently executing one.
ldr_pcrel(pc, -kInstrSize); // 1 instruction
dd(builtin_target); // 4 bytes (== 1 instruction)
STATIC_ASSERT(kInstrSize == kInt32Size);
STATIC_ASSERT(kJumpTableStubSlotSize == 2 * kInstrSize);
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
// Note that {Move32BitImmediate} emits [ldr, constant] for the relocation
// mode used below, we need this to allow concurrent patching of this slot.
@ -92,6 +89,15 @@ void JumpTableAssembler::EmitJumpSlot(Address target) {
CheckConstPool(true, false); // force emit of const pool
}
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
// Load from [pc + kInstrSize] to pc. Note that {pc} points two instructions
// after the currently executing one.
ldr_pcrel(pc, -kInstrSize); // 1 instruction
dd(target); // 4 bytes (== 1 instruction)
STATIC_ASSERT(kInstrSize == kInt32Size);
STATIC_ASSERT(kFarJumpTableSlotSize == 2 * kInstrSize);
}
void JumpTableAssembler::NopBytes(int bytes) {
DCHECK_LE(0, bytes);
DCHECK_EQ(0, bytes % kInstrSize);
@ -111,7 +117,16 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
if (nop_bytes) nop();
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
void JumpTableAssembler::EmitJumpSlot(Address target) {
// TODO(wasm): Currently this is guaranteed to be a {near_call} and hence is
// patchable concurrently. Once {kMaxWasmCodeMemory} is raised on ARM64, make
// sure concurrent patching is still supported.
DCHECK(TurboAssembler::IsNearCallOffset(
(reinterpret_cast<byte*>(target) - pc_) / kInstrSize));
Jump(target, RelocInfo::NONE);
}
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
// This code uses hard-coded registers and instructions (and avoids
// {UseScratchRegisterScope} or {InstructionAccurateScope}) because this code
// will only be called for the very specific runtime slot table, and we want
@ -122,19 +137,9 @@ void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
// Load from [pc + 2 * kInstrSize] to {kTmpReg}, then branch there.
ldr_pcrel(kTmpReg, 2); // 1 instruction
br(kTmpReg); // 1 instruction
dq(builtin_target); // 8 bytes (== 2 instructions)
dq(target); // 8 bytes (== 2 instructions)
STATIC_ASSERT(kInstrSize == kInt32Size);
STATIC_ASSERT(kJumpTableStubSlotSize == 4 * kInstrSize);
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
// TODO(wasm): Currently this is guaranteed to be a {near_call} and hence is
// patchable concurrently. Once {kMaxWasmCodeMemory} is raised on ARM64, make
// sure concurrent patching is still supported.
DCHECK(TurboAssembler::IsNearCallOffset(
(reinterpret_cast<byte*>(target) - pc_) / kInstrSize));
Jump(target, RelocInfo::NONE);
STATIC_ASSERT(kFarJumpTableSlotSize == 4 * kInstrSize);
}
void JumpTableAssembler::NopBytes(int bytes) {
@ -155,15 +160,15 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
b(r1); // 2 bytes
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
JumpToInstructionStream(builtin_target);
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
mov(r1, Operand(target));
b(r1);
}
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
JumpToInstructionStream(target);
}
void JumpTableAssembler::NopBytes(int bytes) {
DCHECK_LE(0, bytes);
DCHECK_EQ(0, bytes % 2);
@ -185,14 +190,14 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
JumpToInstructionStream(builtin_target);
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
Jump(target, RelocInfo::NONE);
}
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
JumpToInstructionStream(target);
}
void JumpTableAssembler::NopBytes(int bytes) {
DCHECK_LE(0, bytes);
DCHECK_EQ(0, bytes % kInstrSize);
@ -216,16 +221,16 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
JumpToInstructionStream(builtin_target);
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
mov(r0, Operand(target));
mtctr(r0);
bctr();
}
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
JumpToInstructionStream(target);
}
void JumpTableAssembler::NopBytes(int bytes) {
DCHECK_LE(0, bytes);
DCHECK_EQ(0, bytes % 4);
@ -240,12 +245,10 @@ void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
UNIMPLEMENTED();
}
void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
UNIMPLEMENTED();
}
void JumpTableAssembler::EmitJumpSlot(Address target) { UNIMPLEMENTED(); }
void JumpTableAssembler::EmitFarJumpSlot(Address target) { UNIMPLEMENTED(); }
void JumpTableAssembler::NopBytes(int bytes) {
DCHECK_LE(0, bytes);
UNIMPLEMENTED();

View File

@ -19,7 +19,7 @@ namespace wasm {
//
// Additionally to this main jump table, there exist special jump tables for
// other purposes:
// - the runtime stub table contains one entry per wasm runtime stub (see
// - the far stub table contains one entry per wasm runtime stub (see
// {WasmCode::RuntimeStubId}, which jumps to the corresponding embedded
// builtin.
// - the lazy compile table contains one entry per wasm function which jumps to
@ -73,16 +73,20 @@ class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler {
// Determine the size of a jump table containing the given number of slots.
static constexpr uint32_t SizeForNumberOfSlots(uint32_t slot_count) {
// TODO(wasm): Once the {RoundUp} utility handles non-powers of two values,
// use: {RoundUp<kJumpTableSlotsPerLine>(slot_count) * kJumpTableLineSize}
return ((slot_count + kJumpTableSlotsPerLine - 1) /
kJumpTableSlotsPerLine) *
kJumpTableLineSize;
}
// Translate a stub slot index to an offset into the continuous jump table.
static uint32_t StubSlotIndexToOffset(uint32_t slot_index) {
return slot_index * kJumpTableStubSlotSize;
// Translate a far jump table index to an offset into the table.
static uint32_t FarJumpSlotIndexToOffset(uint32_t slot_index) {
return slot_index * kFarJumpTableSlotSize;
}
// Determine the size of a far jump table containing the given number of
// slots.
static constexpr uint32_t SizeForNumberOfFarJumpSlots(int num_stubs) {
return num_stubs * kFarJumpTableSlotSize;
}
// Translate a slot index to an offset into the lazy compile table.
@ -90,11 +94,6 @@ class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler {
return slot_index * kLazyCompileTableSlotSize;
}
// Determine the size of a jump table containing only runtime stub slots.
static constexpr uint32_t SizeForNumberOfStubSlots(uint32_t slot_count) {
return slot_count * kJumpTableStubSlotSize;
}
// Determine the size of a lazy compile table.
static constexpr uint32_t SizeForNumberOfLazyFunctions(uint32_t slot_count) {
return slot_count * kLazyCompileTableSlotSize;
@ -115,18 +114,17 @@ class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler {
FlushInstructionCache(base, lazy_compile_table_size);
}
static void GenerateRuntimeStubTable(Address base, Address* targets,
int num_stubs) {
uint32_t table_size = num_stubs * kJumpTableStubSlotSize;
static void GenerateFarJumpTable(Address base, Address* stub_targets,
int num_stubs) {
uint32_t table_size = num_stubs * kFarJumpTableSlotSize;
// Assume enough space, so the Assembler does not try to grow the buffer.
JumpTableAssembler jtasm(base, table_size + 256);
int offset = 0;
for (int index = 0; index < num_stubs; ++index) {
DCHECK_EQ(offset, StubSlotIndexToOffset(index));
DCHECK_EQ(offset, FarJumpSlotIndexToOffset(index));
jtasm.EmitFarJumpSlot(stub_targets[index]);
offset += kFarJumpTableSlotSize;
DCHECK_EQ(offset, jtasm.pc_offset());
jtasm.EmitRuntimeStubSlot(targets[index]);
offset += kJumpTableStubSlotSize;
jtasm.NopBytes(offset - jtasm.pc_offset());
}
FlushInstructionCache(base, table_size);
}
@ -157,48 +155,48 @@ class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler {
#if V8_TARGET_ARCH_X64
static constexpr int kJumpTableLineSize = 64;
static constexpr int kJumpTableSlotSize = 5;
static constexpr int kFarJumpTableSlotSize = 16;
static constexpr int kLazyCompileTableSlotSize = 10;
static constexpr int kJumpTableStubSlotSize = 13;
#elif V8_TARGET_ARCH_IA32
static constexpr int kJumpTableLineSize = 64;
static constexpr int kJumpTableSlotSize = 5;
static constexpr int kFarJumpTableSlotSize = 5;
static constexpr int kLazyCompileTableSlotSize = 10;
static constexpr int kJumpTableStubSlotSize = 5;
#elif V8_TARGET_ARCH_ARM
static constexpr int kJumpTableLineSize = 3 * kInstrSize;
static constexpr int kJumpTableSlotSize = 3 * kInstrSize;
static constexpr int kFarJumpTableSlotSize = 2 * kInstrSize;
static constexpr int kLazyCompileTableSlotSize = 5 * kInstrSize;
static constexpr int kJumpTableStubSlotSize = 2 * kInstrSize;
#elif V8_TARGET_ARCH_ARM64
static constexpr int kJumpTableLineSize = 1 * kInstrSize;
static constexpr int kJumpTableSlotSize = 1 * kInstrSize;
static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize;
static constexpr int kLazyCompileTableSlotSize = 3 * kInstrSize;
static constexpr int kJumpTableStubSlotSize = 4 * kInstrSize;
#elif V8_TARGET_ARCH_S390X
static constexpr int kJumpTableLineSize = 128;
static constexpr int kJumpTableSlotSize = 14;
static constexpr int kFarJumpTableSlotSize = 14;
static constexpr int kLazyCompileTableSlotSize = 20;
static constexpr int kJumpTableStubSlotSize = 14;
#elif V8_TARGET_ARCH_PPC64
static constexpr int kJumpTableLineSize = 64;
static constexpr int kJumpTableSlotSize = 7 * kInstrSize;
static constexpr int kFarJumpTableSlotSize = 7 * kInstrSize;
static constexpr int kLazyCompileTableSlotSize = 12 * kInstrSize;
static constexpr int kJumpTableStubSlotSize = 7 * kInstrSize;
#elif V8_TARGET_ARCH_MIPS
static constexpr int kJumpTableLineSize = 6 * kInstrSize;
static constexpr int kJumpTableSlotSize = 4 * kInstrSize;
static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize;
static constexpr int kLazyCompileTableSlotSize = 6 * kInstrSize;
static constexpr int kJumpTableStubSlotSize = 4 * kInstrSize;
#elif V8_TARGET_ARCH_MIPS64
static constexpr int kJumpTableLineSize = 8 * kInstrSize;
static constexpr int kJumpTableSlotSize = 6 * kInstrSize;
static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize;
static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize;
static constexpr int kJumpTableStubSlotSize = 6 * kInstrSize;
#else
static constexpr int kJumpTableLineSize = 1;
static constexpr int kJumpTableSlotSize = 1;
static constexpr int kFarJumpTableSlotSize = 1;
static constexpr int kLazyCompileTableSlotSize = 1;
static constexpr int kJumpTableStubSlotSize = 1;
#endif
static constexpr int kJumpTableSlotsPerLine =
@ -218,10 +216,10 @@ class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler {
void EmitLazyCompileJumpSlot(uint32_t func_index,
Address lazy_compile_target);
void EmitRuntimeStubSlot(Address builtin_target);
void EmitJumpSlot(Address target);
void EmitFarJumpSlot(Address target);
void NopBytes(int bytes);
};

View File

@ -861,7 +861,8 @@ void NativeModule::SetRuntimeStubs(Isolate* isolate) {
WasmCodeRefScope code_ref_scope;
DCHECK_EQ(1, code_space_data_.size());
WasmCode* jump_table = CreateEmptyJumpTableInRegion(
JumpTableAssembler::SizeForNumberOfStubSlots(WasmCode::kRuntimeStubCount),
JumpTableAssembler::SizeForNumberOfFarJumpSlots(
WasmCode::kRuntimeStubCount),
code_space_data_[0].region);
Address base = jump_table->instruction_start();
EmbeddedData embedded_data = EmbeddedData::FromBlob();
@ -877,10 +878,10 @@ void NativeModule::SetRuntimeStubs(Isolate* isolate) {
CHECK(embedded_data.ContainsBuiltin(builtin));
builtin_address[i] = embedded_data.InstructionStartOfBuiltin(builtin);
runtime_stub_entries_[i] =
base + JumpTableAssembler::StubSlotIndexToOffset(i);
base + JumpTableAssembler::FarJumpSlotIndexToOffset(i);
}
JumpTableAssembler::GenerateRuntimeStubTable(base, builtin_address,
WasmCode::kRuntimeStubCount);
JumpTableAssembler::GenerateFarJumpTable(base, builtin_address,
WasmCode::kRuntimeStubCount);
DCHECK_NULL(runtime_stub_table_);
runtime_stub_table_ = jump_table;
DCHECK_NE(kNullAddress, runtime_stub_entries_[0]);