MIPS[64]: Port '[Deopt] Remove jump table in prologue of deopt entries.'
Port commit 4ab96a9a81
Original message:
> Remove the use of a jump table in the prologue of the deopt entries
> and instead pass the bailout id explicitly in a register when calling
> the deopt entry routine from optimized code. This unifies the logic
> with the way the Arm64 code works. It saves the following amount of
> memory in code stubs:
>
> - arm: 384KB
> - ia32: 480KB
> - x64: 240KB
>
> This could be offset by a slight increase in the size of optimized code
> for loading the immediate, however this impact should be minimal and
> will scale with the maximum number of bailout ids (e.g., the size of
> code will increase by one instruction per bailout id on Arm, therefore
> ~98,000 bailouts will be needed before the overhead is greater than
> the current fixed table size).
>
> Change-Id: I838604b48fa04cbd45320c7b9dac0de08fd8eb25
> Reviewed-on: https://chromium-review.googlesource.com/c/1398224
> Commit-Queue: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#58636}
Change-Id: I4d070b90ebd4f9d4e82eaa74fe6d41c3a39d93e8
Reviewed-on: https://chromium-review.googlesource.com/c/1400848
Reviewed-by: Sreten Kovacevic <skovacevic@wavecomp.com>
Commit-Queue: Sreten Kovacevic <skovacevic@wavecomp.com>
Cr-Commit-Position: refs/heads/master@{#58655}
This commit is contained in:
parent
ba712bf89f
commit
b0dc60f6b3
@ -202,11 +202,6 @@ bool RelocInfo::IsInConstantPool() {
|
||||
return false;
|
||||
}
|
||||
|
||||
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
|
||||
DCHECK(IsRuntimeEntry(rmode_));
|
||||
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
|
||||
}
|
||||
|
||||
uint32_t RelocInfo::wasm_call_tag() const {
|
||||
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
|
||||
return static_cast<uint32_t>(
|
||||
|
@ -15,10 +15,9 @@ namespace internal {
|
||||
// This code tries to be close to ia32 code so that any changes can be
|
||||
// easily ported.
|
||||
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||
Isolate* isolate, int count,
|
||||
Isolate* isolate,
|
||||
DeoptimizeKind deopt_kind) {
|
||||
NoRootArrayScope no_root_array(masm);
|
||||
GenerateDeoptimizationEntriesPrologue(masm, count);
|
||||
|
||||
// Unlike on ARM we don't save all the registers, just the useful ones.
|
||||
// For the rest, there are gaps on the stack, so the offsets remain the same.
|
||||
@ -64,16 +63,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||
const int kSavedRegistersAreaSize =
|
||||
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
|
||||
|
||||
// Get the bailout id from the stack.
|
||||
__ lw(a2, MemOperand(sp, kSavedRegistersAreaSize));
|
||||
// Get the bailout id is passed as kRootRegister by the caller.
|
||||
__ mov(a2, kRootRegister);
|
||||
|
||||
// Get the address of the location in the code object (a3) (return
|
||||
// address for lazy deoptimization) and compute the fp-to-sp delta in
|
||||
// register t0.
|
||||
__ mov(a3, ra);
|
||||
// Correct one word for bailout id.
|
||||
__ Addu(t0, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
||||
|
||||
__ Addu(t0, sp, Operand(kSavedRegistersAreaSize));
|
||||
__ Subu(t0, fp, t0);
|
||||
|
||||
// Allocate a new deoptimizer object.
|
||||
@ -139,8 +136,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||
__ swc1(f0, MemOperand(a1, dst_offset));
|
||||
}
|
||||
|
||||
// Remove the bailout id and the saved registers from the stack.
|
||||
__ Addu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
||||
// Remove the saved registers from the stack.
|
||||
__ Addu(sp, sp, Operand(kSavedRegistersAreaSize));
|
||||
|
||||
// Compute a pointer to the unwinding limit in register a2; that is
|
||||
// the first stack slot not part of the input frame.
|
||||
@ -240,73 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
|
||||
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
|
||||
#endif
|
||||
|
||||
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
|
||||
int count) {
|
||||
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
|
||||
|
||||
// Create a sequence of deoptimization entries.
|
||||
// Note that registers are still live when jumping to an entry.
|
||||
Label table_start, done, trampoline_jump;
|
||||
__ bind(&table_start);
|
||||
|
||||
#ifdef _MIPS_ARCH_MIPS32R6
|
||||
int kMaxEntriesBranchReach =
|
||||
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
|
||||
#else
|
||||
int kMaxEntriesBranchReach =
|
||||
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
|
||||
#endif
|
||||
|
||||
if (count <= kMaxEntriesBranchReach) {
|
||||
// Common case.
|
||||
for (int i = 0; i < count; i++) {
|
||||
Label start;
|
||||
__ bind(&start);
|
||||
DCHECK(is_int16(i));
|
||||
if (IsMipsArchVariant(kMips32r6)) {
|
||||
__ li(kScratchReg, i);
|
||||
__ BranchShort(PROTECT, &done);
|
||||
} else {
|
||||
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
|
||||
__ li(kScratchReg, i); // In the delay slot.
|
||||
__ nop();
|
||||
}
|
||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
||||
}
|
||||
|
||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
||||
count * table_entry_size_);
|
||||
__ bind(&done);
|
||||
__ Push(kScratchReg);
|
||||
} else {
|
||||
DCHECK(!IsMipsArchVariant(kMips32r6));
|
||||
// Uncommon case, the branch cannot reach.
|
||||
// Create mini trampoline to reach the end of the table
|
||||
for (int i = 0, j = 0; i < count; i++, j++) {
|
||||
Label start;
|
||||
__ bind(&start);
|
||||
DCHECK(is_int16(i));
|
||||
if (j >= kMaxEntriesBranchReach) {
|
||||
j = 0;
|
||||
__ li(kScratchReg, i);
|
||||
__ bind(&trampoline_jump);
|
||||
trampoline_jump = Label();
|
||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
|
||||
__ nop();
|
||||
} else {
|
||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
|
||||
__ li(kScratchReg, i); // In the delay slot.
|
||||
__ nop();
|
||||
}
|
||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
||||
}
|
||||
|
||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
||||
count * table_entry_size_);
|
||||
__ bind(&trampoline_jump);
|
||||
__ Push(kScratchReg);
|
||||
}
|
||||
}
|
||||
|
||||
bool Deoptimizer::PadTopOfStackRegister() { return false; }
|
||||
|
||||
|
@ -5519,6 +5519,17 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
|
||||
li(kSpeculationPoisonRegister, -1);
|
||||
}
|
||||
|
||||
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
|
||||
NoRootArrayScope no_root_array(this);
|
||||
|
||||
// Save the deipt id in kRootRegister (we don't need the roots array from now
|
||||
// on).
|
||||
DCHECK_LE(deopt_id, 0xFFFF);
|
||||
li(kRootRegister, deopt_id);
|
||||
|
||||
Call(target, RelocInfo::RUNTIME_ENTRY);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
|
@ -252,11 +252,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
// The return address on the stack is used by frame iteration.
|
||||
void StoreReturnAddressAndCall(Register target);
|
||||
|
||||
void CallForDeoptimization(Address target, int deopt_id,
|
||||
RelocInfo::Mode rmode) {
|
||||
USE(deopt_id);
|
||||
Call(target, rmode);
|
||||
}
|
||||
void CallForDeoptimization(Address target, int deopt_id);
|
||||
|
||||
void Ret(COND_ARGS);
|
||||
inline void Ret(BranchDelaySlot bd, Condition cond = al,
|
||||
|
@ -179,11 +179,6 @@ bool RelocInfo::IsInConstantPool() {
|
||||
return false;
|
||||
}
|
||||
|
||||
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
|
||||
DCHECK(IsRuntimeEntry(rmode_));
|
||||
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
|
||||
}
|
||||
|
||||
uint32_t RelocInfo::wasm_call_tag() const {
|
||||
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
|
||||
return static_cast<uint32_t>(
|
||||
|
@ -15,10 +15,9 @@ namespace internal {
|
||||
// This code tries to be close to ia32 code so that any changes can be
|
||||
// easily ported.
|
||||
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||
Isolate* isolate, int count,
|
||||
Isolate* isolate,
|
||||
DeoptimizeKind deopt_kind) {
|
||||
NoRootArrayScope no_root_array(masm);
|
||||
GenerateDeoptimizationEntriesPrologue(masm, count);
|
||||
|
||||
// Unlike on ARM we don't save all the registers, just the useful ones.
|
||||
// For the rest, there are gaps on the stack, so the offsets remain the same.
|
||||
@ -65,15 +64,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||
const int kSavedRegistersAreaSize =
|
||||
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
|
||||
|
||||
// Get the bailout id from the stack.
|
||||
__ Ld(a2, MemOperand(sp, kSavedRegistersAreaSize));
|
||||
// Get the bailout is passed as kRootRegister by the caller.
|
||||
__ mov(a2, kRootRegister);
|
||||
|
||||
// Get the address of the location in the code object (a3) (return
|
||||
// address for lazy deoptimization) and compute the fp-to-sp delta in
|
||||
// register a4.
|
||||
__ mov(a3, ra);
|
||||
// Correct one word for bailout id.
|
||||
__ Daddu(a4, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
||||
__ Daddu(a4, sp, Operand(kSavedRegistersAreaSize));
|
||||
|
||||
__ Dsubu(a4, fp, a4);
|
||||
|
||||
@ -140,8 +138,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
|
||||
__ Swc1(f0, MemOperand(a1, dst_offset));
|
||||
}
|
||||
|
||||
// Remove the bailout id and the saved registers from the stack.
|
||||
__ Daddu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
|
||||
// Remove the saved registers from the stack.
|
||||
__ Daddu(sp, sp, Operand(kSavedRegistersAreaSize));
|
||||
|
||||
// Compute a pointer to the unwinding limit in register a2; that is
|
||||
// the first stack slot not part of the input frame.
|
||||
@ -239,74 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
|
||||
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
|
||||
#endif
|
||||
|
||||
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
|
||||
int count) {
|
||||
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
|
||||
|
||||
// Create a sequence of deoptimization entries.
|
||||
// Note that registers are still live when jumping to an entry.
|
||||
Label table_start, done, trampoline_jump;
|
||||
__ bind(&table_start);
|
||||
#ifdef _MIPS_ARCH_MIPS64R6
|
||||
int kMaxEntriesBranchReach =
|
||||
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
|
||||
#else
|
||||
int kMaxEntriesBranchReach =
|
||||
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
|
||||
#endif
|
||||
|
||||
if (count <= kMaxEntriesBranchReach) {
|
||||
// Common case.
|
||||
for (int i = 0; i < count; i++) {
|
||||
Label start;
|
||||
__ bind(&start);
|
||||
DCHECK(is_int16(i));
|
||||
if (kArchVariant == kMips64r6) {
|
||||
__ li(kScratchReg, i);
|
||||
__ BranchShort(PROTECT, &done);
|
||||
} else {
|
||||
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
|
||||
__ li(kScratchReg, i); // In the delay slot.
|
||||
__ nop();
|
||||
}
|
||||
|
||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
||||
}
|
||||
|
||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
||||
count * table_entry_size_);
|
||||
__ bind(&done);
|
||||
__ Push(kScratchReg);
|
||||
} else {
|
||||
DCHECK_NE(kArchVariant, kMips64r6);
|
||||
// Uncommon case, the branch cannot reach.
|
||||
// Create mini trampoline to reach the end of the table
|
||||
for (int i = 0, j = 0; i < count; i++, j++) {
|
||||
Label start;
|
||||
__ bind(&start);
|
||||
DCHECK(is_int16(i));
|
||||
if (j >= kMaxEntriesBranchReach) {
|
||||
j = 0;
|
||||
__ li(kScratchReg, i);
|
||||
__ bind(&trampoline_jump);
|
||||
trampoline_jump = Label();
|
||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
|
||||
__ nop();
|
||||
} else {
|
||||
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
|
||||
__ li(kScratchReg, i); // In the delay slot.
|
||||
__ nop();
|
||||
}
|
||||
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
|
||||
}
|
||||
|
||||
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
|
||||
count * table_entry_size_);
|
||||
__ bind(&trampoline_jump);
|
||||
__ Push(kScratchReg);
|
||||
}
|
||||
}
|
||||
|
||||
bool Deoptimizer::PadTopOfStackRegister() { return false; }
|
||||
|
||||
void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) {
|
||||
|
@ -5886,6 +5886,16 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
|
||||
li(kSpeculationPoisonRegister, -1);
|
||||
}
|
||||
|
||||
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
|
||||
NoRootArrayScope no_root_array(this);
|
||||
|
||||
// Save the deopt id in kRootRegister (we don't need the roots array from now
|
||||
// on).
|
||||
DCHECK_LE(deopt_id, 0xFFFF);
|
||||
li(kRootRegister, deopt_id);
|
||||
Call(target, RelocInfo::RUNTIME_ENTRY);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
|
@ -277,11 +277,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
// The return address on the stack is used by frame iteration.
|
||||
void StoreReturnAddressAndCall(Register target);
|
||||
|
||||
void CallForDeoptimization(Address target, int deopt_id,
|
||||
RelocInfo::Mode rmode) {
|
||||
USE(deopt_id);
|
||||
Call(target, rmode);
|
||||
}
|
||||
void CallForDeoptimization(Address target, int deopt_id);
|
||||
|
||||
void Ret(COND_ARGS);
|
||||
inline void Ret(BranchDelaySlot bd, Condition cond = al,
|
||||
|
Loading…
Reference in New Issue
Block a user