MIPS[64]: Port '[Deopt] Remove jump table in prologue of deopt entries.'
Port commit 4ab96a9a81
Original message:
> Remove the use of a jump table in the prologue of the deopt entries
> and instead pass the bailout id explicitly in a register when calling
> the deopt entry routine from optimized code. This unifies the logic
> with the way the Arm64 code works. It saves the following amount of
> memory in code stubs:
>
> - arm: 384KB
> - ia32: 480KB
> - x64: 240KB
>
> This could be offset by a slight increase in the size of optimized code
> for loading the immediate, however this impact should be minimal and
> will scale with the maximum number of bailout ids (e.g., the size of
> code will increase by one instruction per bailout id on Arm, therefore
> ~98,000 bailouts will be needed before the overhead is greater than
> the current fixed table size).
>
> Change-Id: I838604b48fa04cbd45320c7b9dac0de08fd8eb25
> Reviewed-on: https://chromium-review.googlesource.com/c/1398224
> Commit-Queue: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#58636}
Change-Id: I4d070b90ebd4f9d4e82eaa74fe6d41c3a39d93e8
Reviewed-on: https://chromium-review.googlesource.com/c/1400848
Reviewed-by: Sreten Kovacevic <skovacevic@wavecomp.com>
Commit-Queue: Sreten Kovacevic <skovacevic@wavecomp.com>
Cr-Commit-Position: refs/heads/master@{#58655}
This commit is contained in:
parent
ba712bf89f
commit
b0dc60f6b3
@ -202,11 +202,6 @@ bool RelocInfo::IsInConstantPool() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
|
|
||||||
DCHECK(IsRuntimeEntry(rmode_));
|
|
||||||
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t RelocInfo::wasm_call_tag() const {
|
uint32_t RelocInfo::wasm_call_tag() const {
|
||||||
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
|
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
|
||||||
return static_cast<uint32_t>(
|
return static_cast<uint32_t>(
|
||||||
|
@ -15,10 +15,9 @@ namespace internal {
|
|||||||
// This code tries to be close to ia32 code so that any changes can be
|
// This code tries to be close to ia32 code so that any changes can be
|
||||||
// easily ported.
|
// easily ported.
|
||||||
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||||
Isolate* isolate, int count,
|
Isolate* isolate,
|
||||||
DeoptimizeKind deopt_kind) {
|
DeoptimizeKind deopt_kind) {
|
||||||
NoRootArrayScope no_root_array(masm);
|
NoRootArrayScope no_root_array(masm);
|
||||||
GenerateDeoptimizationEntriesPrologue(masm, count);
|
|
||||||
|
|
||||||
// Unlike on ARM we don't save all the registers, just the useful ones.
|
// Unlike on ARM we don't save all the registers, just the useful ones.
|
||||||
// For the rest, there are gaps on the stack, so the offsets remain the same.
|
// For the rest, there are gaps on the stack, so the offsets remain the same.
|
||||||
@ -64,16 +63,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
|||||||
const int kSavedRegistersAreaSize =
|
const int kSavedRegistersAreaSize =
|
||||||
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
|
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
|
||||||
|
|
||||||
// Get the bailout id from the stack.
|
// Get the bailout id is passed as kRootRegister by the caller.
|
||||||
__ lw(a2, MemOperand(sp, kSavedRegistersAreaSize));
|
__ mov(a2, kRootRegister);
|
||||||
|
|
||||||
// Get the address of the location in the code object (a3) (return
|
// Get the address of the location in the code object (a3) (return
|
||||||
// address for lazy deoptimization) and compute the fp-to-sp delta in
|
// address for lazy deoptimization) and compute the fp-to-sp delta in
|
||||||
// register t0.
|
// register t0.
|
||||||
__ mov(a3, ra);
|
__ mov(a3, ra);
|
||||||
// Correct one word for bailout id.
|
__ Addu(t0, sp, Operand(kSavedRegistersAreaSize));
|
||||||
__ Addu(t0, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
|
||||||
|
|
||||||
__ Subu(t0, fp, t0);
|
__ Subu(t0, fp, t0);
|
||||||
|
|
||||||
// Allocate a new deoptimizer object.
|
// Allocate a new deoptimizer object.
|
||||||
@ -139,8 +136,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
|||||||
__ swc1(f0, MemOperand(a1, dst_offset));
|
__ swc1(f0, MemOperand(a1, dst_offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove the bailout id and the saved registers from the stack.
|
// Remove the saved registers from the stack.
|
||||||
__ Addu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
__ Addu(sp, sp, Operand(kSavedRegistersAreaSize));
|
||||||
|
|
||||||
// Compute a pointer to the unwinding limit in register a2; that is
|
// Compute a pointer to the unwinding limit in register a2; that is
|
||||||
// the first stack slot not part of the input frame.
|
// the first stack slot not part of the input frame.
|
||||||
@ -240,73 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
|
|||||||
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
|
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
|
|
||||||
int count) {
|
|
||||||
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
|
|
||||||
|
|
||||||
// Create a sequence of deoptimization entries.
|
|
||||||
// Note that registers are still live when jumping to an entry.
|
|
||||||
Label table_start, done, trampoline_jump;
|
|
||||||
__ bind(&table_start);
|
|
||||||
|
|
||||||
#ifdef _MIPS_ARCH_MIPS32R6
|
|
||||||
int kMaxEntriesBranchReach =
|
|
||||||
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
|
|
||||||
#else
|
|
||||||
int kMaxEntriesBranchReach =
|
|
||||||
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (count <= kMaxEntriesBranchReach) {
|
|
||||||
// Common case.
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
Label start;
|
|
||||||
__ bind(&start);
|
|
||||||
DCHECK(is_int16(i));
|
|
||||||
if (IsMipsArchVariant(kMips32r6)) {
|
|
||||||
__ li(kScratchReg, i);
|
|
||||||
__ BranchShort(PROTECT, &done);
|
|
||||||
} else {
|
|
||||||
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
|
|
||||||
__ li(kScratchReg, i); // In the delay slot.
|
|
||||||
__ nop();
|
|
||||||
}
|
|
||||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
|
||||||
}
|
|
||||||
|
|
||||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
|
||||||
count * table_entry_size_);
|
|
||||||
__ bind(&done);
|
|
||||||
__ Push(kScratchReg);
|
|
||||||
} else {
|
|
||||||
DCHECK(!IsMipsArchVariant(kMips32r6));
|
|
||||||
// Uncommon case, the branch cannot reach.
|
|
||||||
// Create mini trampoline to reach the end of the table
|
|
||||||
for (int i = 0, j = 0; i < count; i++, j++) {
|
|
||||||
Label start;
|
|
||||||
__ bind(&start);
|
|
||||||
DCHECK(is_int16(i));
|
|
||||||
if (j >= kMaxEntriesBranchReach) {
|
|
||||||
j = 0;
|
|
||||||
__ li(kScratchReg, i);
|
|
||||||
__ bind(&trampoline_jump);
|
|
||||||
trampoline_jump = Label();
|
|
||||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
|
|
||||||
__ nop();
|
|
||||||
} else {
|
|
||||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
|
|
||||||
__ li(kScratchReg, i); // In the delay slot.
|
|
||||||
__ nop();
|
|
||||||
}
|
|
||||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
|
||||||
}
|
|
||||||
|
|
||||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
|
||||||
count * table_entry_size_);
|
|
||||||
__ bind(&trampoline_jump);
|
|
||||||
__ Push(kScratchReg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Deoptimizer::PadTopOfStackRegister() { return false; }
|
bool Deoptimizer::PadTopOfStackRegister() { return false; }
|
||||||
|
|
||||||
|
@ -5519,6 +5519,17 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
|
|||||||
li(kSpeculationPoisonRegister, -1);
|
li(kSpeculationPoisonRegister, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
|
||||||
|
NoRootArrayScope no_root_array(this);
|
||||||
|
|
||||||
|
// Save the deipt id in kRootRegister (we don't need the roots array from now
|
||||||
|
// on).
|
||||||
|
DCHECK_LE(deopt_id, 0xFFFF);
|
||||||
|
li(kRootRegister, deopt_id);
|
||||||
|
|
||||||
|
Call(target, RelocInfo::RUNTIME_ENTRY);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace v8
|
} // namespace v8
|
||||||
|
|
||||||
|
@ -252,11 +252,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
|||||||
// The return address on the stack is used by frame iteration.
|
// The return address on the stack is used by frame iteration.
|
||||||
void StoreReturnAddressAndCall(Register target);
|
void StoreReturnAddressAndCall(Register target);
|
||||||
|
|
||||||
void CallForDeoptimization(Address target, int deopt_id,
|
void CallForDeoptimization(Address target, int deopt_id);
|
||||||
RelocInfo::Mode rmode) {
|
|
||||||
USE(deopt_id);
|
|
||||||
Call(target, rmode);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Ret(COND_ARGS);
|
void Ret(COND_ARGS);
|
||||||
inline void Ret(BranchDelaySlot bd, Condition cond = al,
|
inline void Ret(BranchDelaySlot bd, Condition cond = al,
|
||||||
|
@ -179,11 +179,6 @@ bool RelocInfo::IsInConstantPool() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
|
|
||||||
DCHECK(IsRuntimeEntry(rmode_));
|
|
||||||
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t RelocInfo::wasm_call_tag() const {
|
uint32_t RelocInfo::wasm_call_tag() const {
|
||||||
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
|
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
|
||||||
return static_cast<uint32_t>(
|
return static_cast<uint32_t>(
|
||||||
|
@ -15,10 +15,9 @@ namespace internal {
|
|||||||
// This code tries to be close to ia32 code so that any changes can be
|
// This code tries to be close to ia32 code so that any changes can be
|
||||||
// easily ported.
|
// easily ported.
|
||||||
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||||
Isolate* isolate, int count,
|
Isolate* isolate,
|
||||||
DeoptimizeKind deopt_kind) {
|
DeoptimizeKind deopt_kind) {
|
||||||
NoRootArrayScope no_root_array(masm);
|
NoRootArrayScope no_root_array(masm);
|
||||||
GenerateDeoptimizationEntriesPrologue(masm, count);
|
|
||||||
|
|
||||||
// Unlike on ARM we don't save all the registers, just the useful ones.
|
// Unlike on ARM we don't save all the registers, just the useful ones.
|
||||||
// For the rest, there are gaps on the stack, so the offsets remain the same.
|
// For the rest, there are gaps on the stack, so the offsets remain the same.
|
||||||
@ -65,15 +64,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
|||||||
const int kSavedRegistersAreaSize =
|
const int kSavedRegistersAreaSize =
|
||||||
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
|
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
|
||||||
|
|
||||||
// Get the bailout id from the stack.
|
// Get the bailout is passed as kRootRegister by the caller.
|
||||||
__ Ld(a2, MemOperand(sp, kSavedRegistersAreaSize));
|
__ mov(a2, kRootRegister);
|
||||||
|
|
||||||
// Get the address of the location in the code object (a3) (return
|
// Get the address of the location in the code object (a3) (return
|
||||||
// address for lazy deoptimization) and compute the fp-to-sp delta in
|
// address for lazy deoptimization) and compute the fp-to-sp delta in
|
||||||
// register a4.
|
// register a4.
|
||||||
__ mov(a3, ra);
|
__ mov(a3, ra);
|
||||||
// Correct one word for bailout id.
|
__ Daddu(a4, sp, Operand(kSavedRegistersAreaSize));
|
||||||
__ Daddu(a4, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
|
||||||
|
|
||||||
__ Dsubu(a4, fp, a4);
|
__ Dsubu(a4, fp, a4);
|
||||||
|
|
||||||
@ -140,8 +138,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
|||||||
__ Swc1(f0, MemOperand(a1, dst_offset));
|
__ Swc1(f0, MemOperand(a1, dst_offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove the bailout id and the saved registers from the stack.
|
// Remove the saved registers from the stack.
|
||||||
__ Daddu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
__ Daddu(sp, sp, Operand(kSavedRegistersAreaSize));
|
||||||
|
|
||||||
// Compute a pointer to the unwinding limit in register a2; that is
|
// Compute a pointer to the unwinding limit in register a2; that is
|
||||||
// the first stack slot not part of the input frame.
|
// the first stack slot not part of the input frame.
|
||||||
@ -239,74 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
|
|||||||
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
|
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
|
|
||||||
int count) {
|
|
||||||
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
|
|
||||||
|
|
||||||
// Create a sequence of deoptimization entries.
|
|
||||||
// Note that registers are still live when jumping to an entry.
|
|
||||||
Label table_start, done, trampoline_jump;
|
|
||||||
__ bind(&table_start);
|
|
||||||
#ifdef _MIPS_ARCH_MIPS64R6
|
|
||||||
int kMaxEntriesBranchReach =
|
|
||||||
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
|
|
||||||
#else
|
|
||||||
int kMaxEntriesBranchReach =
|
|
||||||
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (count <= kMaxEntriesBranchReach) {
|
|
||||||
// Common case.
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
Label start;
|
|
||||||
__ bind(&start);
|
|
||||||
DCHECK(is_int16(i));
|
|
||||||
if (kArchVariant == kMips64r6) {
|
|
||||||
__ li(kScratchReg, i);
|
|
||||||
__ BranchShort(PROTECT, &done);
|
|
||||||
} else {
|
|
||||||
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
|
|
||||||
__ li(kScratchReg, i); // In the delay slot.
|
|
||||||
__ nop();
|
|
||||||
}
|
|
||||||
|
|
||||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
|
||||||
}
|
|
||||||
|
|
||||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
|
||||||
count * table_entry_size_);
|
|
||||||
__ bind(&done);
|
|
||||||
__ Push(kScratchReg);
|
|
||||||
} else {
|
|
||||||
DCHECK_NE(kArchVariant, kMips64r6);
|
|
||||||
// Uncommon case, the branch cannot reach.
|
|
||||||
// Create mini trampoline to reach the end of the table
|
|
||||||
for (int i = 0, j = 0; i < count; i++, j++) {
|
|
||||||
Label start;
|
|
||||||
__ bind(&start);
|
|
||||||
DCHECK(is_int16(i));
|
|
||||||
if (j >= kMaxEntriesBranchReach) {
|
|
||||||
j = 0;
|
|
||||||
__ li(kScratchReg, i);
|
|
||||||
__ bind(&trampoline_jump);
|
|
||||||
trampoline_jump = Label();
|
|
||||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
|
|
||||||
__ nop();
|
|
||||||
} else {
|
|
||||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
|
|
||||||
__ li(kScratchReg, i); // In the delay slot.
|
|
||||||
__ nop();
|
|
||||||
}
|
|
||||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
|
||||||
}
|
|
||||||
|
|
||||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
|
||||||
count * table_entry_size_);
|
|
||||||
__ bind(&trampoline_jump);
|
|
||||||
__ Push(kScratchReg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Deoptimizer::PadTopOfStackRegister() { return false; }
|
bool Deoptimizer::PadTopOfStackRegister() { return false; }
|
||||||
|
|
||||||
void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) {
|
void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) {
|
||||||
|
@ -5886,6 +5886,16 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
|
|||||||
li(kSpeculationPoisonRegister, -1);
|
li(kSpeculationPoisonRegister, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
|
||||||
|
NoRootArrayScope no_root_array(this);
|
||||||
|
|
||||||
|
// Save the deopt id in kRootRegister (we don't need the roots array from now
|
||||||
|
// on).
|
||||||
|
DCHECK_LE(deopt_id, 0xFFFF);
|
||||||
|
li(kRootRegister, deopt_id);
|
||||||
|
Call(target, RelocInfo::RUNTIME_ENTRY);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace v8
|
} // namespace v8
|
||||||
|
|
||||||
|
@ -277,11 +277,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
|||||||
// The return address on the stack is used by frame iteration.
|
// The return address on the stack is used by frame iteration.
|
||||||
void StoreReturnAddressAndCall(Register target);
|
void StoreReturnAddressAndCall(Register target);
|
||||||
|
|
||||||
void CallForDeoptimization(Address target, int deopt_id,
|
void CallForDeoptimization(Address target, int deopt_id);
|
||||||
RelocInfo::Mode rmode) {
|
|
||||||
USE(deopt_id);
|
|
||||||
Call(target, rmode);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Ret(COND_ARGS);
|
void Ret(COND_ARGS);
|
||||||
inline void Ret(BranchDelaySlot bd, Condition cond = al,
|
inline void Ret(BranchDelaySlot bd, Condition cond = al,
|
||||||
|
Loading…
Reference in New Issue
Block a user