MIPS[64]: Port '[Deopt] Remove jump table in prologue of deopt entries.'

Port commit 4ab96a9a81

Original message:
> Remove the use of a jump table in the prologue of the deopt entries
> and instead pass the bailout id explicitly in a register when calling
> the deopt entry routine from optimized code. This unifies the logic
> with the way the Arm64 code works. It saves the following amount of
> memory in code stubs:
>
>  - arm:  384KB
>  - ia32: 480KB
>  - x64:  240KB
>
> This could be offset by a slight increase in the size of optimized code
> for loading the immediate, however this impact should be minimal and
> will scale with the maximum number of bailout ids (e.g., the size of
> code will increase by one instruction per bailout id on Arm, therefore
> ~98,000 bailouts will be needed before the overhead is greater than
> the current fixed table size).
>
> Change-Id: I838604b48fa04cbd45320c7b9dac0de08fd8eb25
> Reviewed-on: https://chromium-review.googlesource.com/c/1398224
> Commit-Queue: Ross McIlroy <rmcilroy@chromium.org>
> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#58636}

Change-Id: I4d070b90ebd4f9d4e82eaa74fe6d41c3a39d93e8
Reviewed-on: https://chromium-review.googlesource.com/c/1400848
Reviewed-by: Sreten Kovacevic <skovacevic@wavecomp.com>
Commit-Queue: Sreten Kovacevic <skovacevic@wavecomp.com>
Cr-Commit-Position: refs/heads/master@{#58655}
This commit is contained in:
Predrag Rudic 2019-01-08 18:07:30 +01:00 committed by Commit Bot
parent ba712bf89f
commit b0dc60f6b3
8 changed files with 35 additions and 172 deletions

View File

@ -202,11 +202,6 @@ bool RelocInfo::IsInConstantPool() {
return false; return false;
} }
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
DCHECK(IsRuntimeEntry(rmode_));
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
}
uint32_t RelocInfo::wasm_call_tag() const { uint32_t RelocInfo::wasm_call_tag() const {
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL); DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
return static_cast<uint32_t>( return static_cast<uint32_t>(

View File

@ -15,10 +15,9 @@ namespace internal {
// This code tries to be close to ia32 code so that any changes can be // This code tries to be close to ia32 code so that any changes can be
// easily ported. // easily ported.
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
Isolate* isolate, int count, Isolate* isolate,
DeoptimizeKind deopt_kind) { DeoptimizeKind deopt_kind) {
NoRootArrayScope no_root_array(masm); NoRootArrayScope no_root_array(masm);
GenerateDeoptimizationEntriesPrologue(masm, count);
// Unlike on ARM we don't save all the registers, just the useful ones. // Unlike on ARM we don't save all the registers, just the useful ones.
// For the rest, there are gaps on the stack, so the offsets remain the same. // For the rest, there are gaps on the stack, so the offsets remain the same.
@ -64,16 +63,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
const int kSavedRegistersAreaSize = const int kSavedRegistersAreaSize =
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize; (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
// Get the bailout id from the stack. // Get the bailout id is passed as kRootRegister by the caller.
__ lw(a2, MemOperand(sp, kSavedRegistersAreaSize)); __ mov(a2, kRootRegister);
// Get the address of the location in the code object (a3) (return // Get the address of the location in the code object (a3) (return
// address for lazy deoptimization) and compute the fp-to-sp delta in // address for lazy deoptimization) and compute the fp-to-sp delta in
// register t0. // register t0.
__ mov(a3, ra); __ mov(a3, ra);
// Correct one word for bailout id. __ Addu(t0, sp, Operand(kSavedRegistersAreaSize));
__ Addu(t0, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
__ Subu(t0, fp, t0); __ Subu(t0, fp, t0);
// Allocate a new deoptimizer object. // Allocate a new deoptimizer object.
@ -139,8 +136,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ swc1(f0, MemOperand(a1, dst_offset)); __ swc1(f0, MemOperand(a1, dst_offset));
} }
// Remove the bailout id and the saved registers from the stack. // Remove the saved registers from the stack.
__ Addu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize))); __ Addu(sp, sp, Operand(kSavedRegistersAreaSize));
// Compute a pointer to the unwinding limit in register a2; that is // Compute a pointer to the unwinding limit in register a2; that is
// the first stack slot not part of the input frame. // the first stack slot not part of the input frame.
@ -240,73 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize; const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
#endif #endif
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
int count) {
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
// Create a sequence of deoptimization entries.
// Note that registers are still live when jumping to an entry.
Label table_start, done, trampoline_jump;
__ bind(&table_start);
#ifdef _MIPS_ARCH_MIPS32R6
int kMaxEntriesBranchReach =
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
#else
int kMaxEntriesBranchReach =
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
#endif
if (count <= kMaxEntriesBranchReach) {
// Common case.
for (int i = 0; i < count; i++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (IsMipsArchVariant(kMips32r6)) {
__ li(kScratchReg, i);
__ BranchShort(PROTECT, &done);
} else {
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&done);
__ Push(kScratchReg);
} else {
DCHECK(!IsMipsArchVariant(kMips32r6));
// Uncommon case, the branch cannot reach.
// Create mini trampoline to reach the end of the table
for (int i = 0, j = 0; i < count; i++, j++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (j >= kMaxEntriesBranchReach) {
j = 0;
__ li(kScratchReg, i);
__ bind(&trampoline_jump);
trampoline_jump = Label();
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
__ nop();
} else {
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&trampoline_jump);
__ Push(kScratchReg);
}
}
bool Deoptimizer::PadTopOfStackRegister() { return false; } bool Deoptimizer::PadTopOfStackRegister() { return false; }

View File

@ -5519,6 +5519,17 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
li(kSpeculationPoisonRegister, -1); li(kSpeculationPoisonRegister, -1);
} }
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
NoRootArrayScope no_root_array(this);
// Save the deipt id in kRootRegister (we don't need the roots array from now
// on).
DCHECK_LE(deopt_id, 0xFFFF);
li(kRootRegister, deopt_id);
Call(target, RelocInfo::RUNTIME_ENTRY);
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8

View File

@ -252,11 +252,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// The return address on the stack is used by frame iteration. // The return address on the stack is used by frame iteration.
void StoreReturnAddressAndCall(Register target); void StoreReturnAddressAndCall(Register target);
void CallForDeoptimization(Address target, int deopt_id, void CallForDeoptimization(Address target, int deopt_id);
RelocInfo::Mode rmode) {
USE(deopt_id);
Call(target, rmode);
}
void Ret(COND_ARGS); void Ret(COND_ARGS);
inline void Ret(BranchDelaySlot bd, Condition cond = al, inline void Ret(BranchDelaySlot bd, Condition cond = al,

View File

@ -179,11 +179,6 @@ bool RelocInfo::IsInConstantPool() {
return false; return false;
} }
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
DCHECK(IsRuntimeEntry(rmode_));
return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
}
uint32_t RelocInfo::wasm_call_tag() const { uint32_t RelocInfo::wasm_call_tag() const {
DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL); DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
return static_cast<uint32_t>( return static_cast<uint32_t>(

View File

@ -15,10 +15,9 @@ namespace internal {
// This code tries to be close to ia32 code so that any changes can be // This code tries to be close to ia32 code so that any changes can be
// easily ported. // easily ported.
void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
Isolate* isolate, int count, Isolate* isolate,
DeoptimizeKind deopt_kind) { DeoptimizeKind deopt_kind) {
NoRootArrayScope no_root_array(masm); NoRootArrayScope no_root_array(masm);
GenerateDeoptimizationEntriesPrologue(masm, count);
// Unlike on ARM we don't save all the registers, just the useful ones. // Unlike on ARM we don't save all the registers, just the useful ones.
// For the rest, there are gaps on the stack, so the offsets remain the same. // For the rest, there are gaps on the stack, so the offsets remain the same.
@ -65,15 +64,14 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
const int kSavedRegistersAreaSize = const int kSavedRegistersAreaSize =
(kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize; (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize + kFloatRegsSize;
// Get the bailout id from the stack. // Get the bailout is passed as kRootRegister by the caller.
__ Ld(a2, MemOperand(sp, kSavedRegistersAreaSize)); __ mov(a2, kRootRegister);
// Get the address of the location in the code object (a3) (return // Get the address of the location in the code object (a3) (return
// address for lazy deoptimization) and compute the fp-to-sp delta in // address for lazy deoptimization) and compute the fp-to-sp delta in
// register a4. // register a4.
__ mov(a3, ra); __ mov(a3, ra);
// Correct one word for bailout id. __ Daddu(a4, sp, Operand(kSavedRegistersAreaSize));
__ Daddu(a4, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
__ Dsubu(a4, fp, a4); __ Dsubu(a4, fp, a4);
@ -140,8 +138,8 @@ void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm,
__ Swc1(f0, MemOperand(a1, dst_offset)); __ Swc1(f0, MemOperand(a1, dst_offset));
} }
// Remove the bailout id and the saved registers from the stack. // Remove the saved registers from the stack.
__ Daddu(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize))); __ Daddu(sp, sp, Operand(kSavedRegistersAreaSize));
// Compute a pointer to the unwinding limit in register a2; that is // Compute a pointer to the unwinding limit in register a2; that is
// the first stack slot not part of the input frame. // the first stack slot not part of the input frame.
@ -239,74 +237,6 @@ const int Deoptimizer::table_entry_size_ = 2 * kInstrSize;
const int Deoptimizer::table_entry_size_ = 3 * kInstrSize; const int Deoptimizer::table_entry_size_ = 3 * kInstrSize;
#endif #endif
void Deoptimizer::GenerateDeoptimizationEntriesPrologue(MacroAssembler* masm,
int count) {
Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm);
// Create a sequence of deoptimization entries.
// Note that registers are still live when jumping to an entry.
Label table_start, done, trampoline_jump;
__ bind(&table_start);
#ifdef _MIPS_ARCH_MIPS64R6
int kMaxEntriesBranchReach =
(1 << (kImm26Bits - 2)) / (table_entry_size_ / kInstrSize);
#else
int kMaxEntriesBranchReach =
(1 << (kImm16Bits - 2)) / (table_entry_size_ / kInstrSize);
#endif
if (count <= kMaxEntriesBranchReach) {
// Common case.
for (int i = 0; i < count; i++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (kArchVariant == kMips64r6) {
__ li(kScratchReg, i);
__ BranchShort(PROTECT, &done);
} else {
__ BranchShort(USE_DELAY_SLOT, &done); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&done);
__ Push(kScratchReg);
} else {
DCHECK_NE(kArchVariant, kMips64r6);
// Uncommon case, the branch cannot reach.
// Create mini trampoline to reach the end of the table
for (int i = 0, j = 0; i < count; i++, j++) {
Label start;
__ bind(&start);
DCHECK(is_int16(i));
if (j >= kMaxEntriesBranchReach) {
j = 0;
__ li(kScratchReg, i);
__ bind(&trampoline_jump);
trampoline_jump = Label();
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump);
__ nop();
} else {
__ BranchShort(USE_DELAY_SLOT, &trampoline_jump); // Expose delay slot.
__ li(kScratchReg, i); // In the delay slot.
__ nop();
}
DCHECK_EQ(table_entry_size_, masm->SizeOfCodeGeneratedSince(&start));
}
DCHECK_EQ(masm->SizeOfCodeGeneratedSince(&table_start),
count * table_entry_size_);
__ bind(&trampoline_jump);
__ Push(kScratchReg);
}
}
bool Deoptimizer::PadTopOfStackRegister() { return false; } bool Deoptimizer::PadTopOfStackRegister() { return false; }
void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) { void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) {

View File

@ -5886,6 +5886,16 @@ void TurboAssembler::ResetSpeculationPoisonRegister() {
li(kSpeculationPoisonRegister, -1); li(kSpeculationPoisonRegister, -1);
} }
void TurboAssembler::CallForDeoptimization(Address target, int deopt_id) {
NoRootArrayScope no_root_array(this);
// Save the deopt id in kRootRegister (we don't need the roots array from now
// on).
DCHECK_LE(deopt_id, 0xFFFF);
li(kRootRegister, deopt_id);
Call(target, RelocInfo::RUNTIME_ENTRY);
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8

View File

@ -277,11 +277,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// The return address on the stack is used by frame iteration. // The return address on the stack is used by frame iteration.
void StoreReturnAddressAndCall(Register target); void StoreReturnAddressAndCall(Register target);
void CallForDeoptimization(Address target, int deopt_id, void CallForDeoptimization(Address target, int deopt_id);
RelocInfo::Mode rmode) {
USE(deopt_id);
Call(target, rmode);
}
void Ret(COND_ARGS); void Ret(COND_ARGS);
inline void Ret(BranchDelaySlot bd, Condition cond = al, inline void Ret(BranchDelaySlot bd, Condition cond = al,