[wasm] Fix tier-up budget tracking for recursive calls

In the previous implementation, functions overwrote any budget
decrements caused by recursive invocations of themselves, which
could cause tier-up decisions for certain unlucky functions to
get delayed unreasonably long.
This patch avoids this by working with the on-instance value
directly instead of caching it in a stack slot. That generates
the same amount of Liftoff code as the status quo, but handles
recursive functions properly.
The "barista3" benchmark's peak performance improves by almost 20%.

Bug: v8:12281
Change-Id: I8b487a88da99c2d22e132f2cc72bdf36aa5f6e63
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3693710
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81249}
This commit is contained in:
Jakob Kummerow 2022-06-20 13:43:46 +02:00 committed by V8 LUCI CQ
parent 3c34137ad2
commit 15f372afaf
13 changed files with 26 additions and 51 deletions

View File

@ -332,6 +332,9 @@ RUNTIME_FUNCTION(Runtime_WasmTriggerTierUp) {
DCHECK_EQ(instance, frame_finder.frame()->wasm_instance());
wasm::TriggerTierUp(instance, func_index);
int array_index =
wasm::declared_function_index(instance.module(), func_index);
instance.tiering_budget_array()[array_index] = FLAG_wasm_tiering_budget;
return ReadOnlyRoots(isolate).undefined_value();
}

View File

@ -71,7 +71,6 @@ static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize,
"Slot size should be twice the size of the 32 bit pointer.");
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
// kPatchInstructionsRequired sets a maximum limit of how many instructions that
// PatchPrepareStackFrame will use in order to increase the stack appropriately.
// Three instructions are required to sub a large constant, movw + movt + sub.
@ -562,7 +561,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -71,7 +71,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -387,7 +386,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -52,7 +52,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
// ebp-4 holds the stack marker, ebp-8 is the instance parameter.
constexpr int kInstanceOffset = 8;
constexpr int kFeedbackVectorOffset = 12; // ebp-12 is the feedback vector.
constexpr int kTierupBudgetOffset = 16; // ebp-16 is the tiering budget.
inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); }
@ -314,7 +313,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -742,8 +742,17 @@ class LiftoffCompiler {
const int kMax = FLAG_wasm_tiering_budget / 4;
if (budget_used > kMax) budget_used = kMax;
LiftoffRegister budget_reg = __ GetUnusedRegister(kGpReg, {});
__ Fill(budget_reg, liftoff::kTierupBudgetOffset, ValueKind::kI32);
LiftoffRegList pinned;
LiftoffRegister budget_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister array_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(array_reg.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Load(budget_reg, array_reg.gp(), no_reg, offset, LoadType::kI32Load,
pinned);
LiftoffRegList regs_to_save = __ cache_state()->used_registers;
// The cached instance will be reloaded separately.
if (__ cache_state()->cached_instance != no_reg) {
@ -763,7 +772,8 @@ class LiftoffCompiler {
OutOfLineCode& ool = out_of_line_code_.back();
__ emit_i32_subi_jump_negative(budget_reg.gp(), budget_used,
ool.label.get());
__ Spill(liftoff::kTierupBudgetOffset, budget_reg, ValueKind::kI32);
__ Store(array_reg.gp(), no_reg, offset, budget_reg, StoreType::kI32Store,
pinned);
__ bind(ool.continuation.get());
}
@ -848,17 +858,6 @@ class LiftoffCompiler {
pinned);
__ Spill(liftoff::kFeedbackVectorOffset, tmp, kPointerKind);
}
if (dynamic_tiering()) {
CODE_COMMENT("load tier up budget");
LiftoffRegList pinned = kGpParamRegisters;
LiftoffRegister tmp = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(tmp.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Load(tmp, tmp.gp(), no_reg, offset, LoadType::kI32Load, pinned);
__ Spill(liftoff::kTierupBudgetOffset, tmp, ValueKind::kI32);
}
if (for_debugging_) __ ResetOSRTarget();
// Process parameters.
@ -986,11 +985,6 @@ class LiftoffCompiler {
__ RecordSpillsInSafepoint(safepoint, gp_regs,
ool->safepoint_info->spills, index);
}
if (is_tierup) {
// Reset the budget.
__ Spill(liftoff::kTierupBudgetOffset,
WasmValue(FLAG_wasm_tiering_budget));
}
DCHECK_EQ(!debug_sidetable_builder_, !ool->debug_sidetable_entry_builder);
if (V8_UNLIKELY(ool->debug_sidetable_entry_builder)) {
@ -2246,16 +2240,6 @@ class LiftoffCompiler {
void TierupCheckOnExit(FullDecoder* decoder) {
if (!dynamic_tiering()) return;
TierupCheck(decoder, decoder->position(), __ pc_offset());
CODE_COMMENT("update tiering budget");
LiftoffRegList pinned;
LiftoffRegister budget = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister array = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(array.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Fill(budget, liftoff::kTierupBudgetOffset, ValueKind::kI32);
__ Store(array.gp(), no_reg, offset, budget, StoreType::kI32Store, pinned);
}
void DoReturn(FullDecoder* decoder, uint32_t /* drop_values */) {

View File

@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -293,7 +292,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -75,7 +75,6 @@ constexpr int32_t kHighWordOffset = 4;
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -422,7 +421,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -409,7 +408,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -49,8 +49,6 @@ constexpr int32_t kInstanceOffset =
(FLAG_enable_embedded_constant_pool.value() ? 3 : 2) * kSystemPointerSize;
constexpr int kFeedbackVectorOffset =
(FLAG_enable_embedded_constant_pool.value() ? 4 : 3) * kSystemPointerSize;
constexpr int kTierupBudgetOffset =
(FLAG_enable_embedded_constant_pool.value() ? 5 : 4) * kSystemPointerSize;
inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
int32_t half_offset =
@ -222,7 +220,7 @@ void LiftoffAssembler::AbortCompilation() { FinishCode(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
// fp-8 holds the stack marker, fp-16 is the instance parameter.
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -393,7 +392,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -85,7 +85,6 @@ inline constexpr bool UseSignedOp(LiftoffCondition liftoff_cond) {
//
constexpr int32_t kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(uint32_t offset) {
return MemOperand(fp, -offset);
}
@ -200,7 +199,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -69,8 +69,6 @@ static_assert((kLiftoffAssemblerFpCacheRegs &
constexpr int kInstanceOffset = 16;
// rbp-24 is the feedback vector.
constexpr int kFeedbackVectorOffset = 24;
// rbp-32 is the remaining tier-up budget.
constexpr int kTierupBudgetOffset = 32;
inline constexpr Operand GetStackSlot(int offset) {
return Operand(rbp, -offset);

View File

@ -190,7 +190,7 @@ constexpr uint32_t kMinimumSupertypeArraySize = 3;
constexpr int kMaxPolymorphism = 4;
#if V8_TARGET_ARCH_X64
constexpr int32_t kOSRTargetOffset = 5 * kSystemPointerSize;
constexpr int32_t kOSRTargetOffset = 4 * kSystemPointerSize;
#endif
} // namespace wasm