Reland "[wasm] Fix tier-up budget tracking for recursive calls"

This is a reland of commit 15f372afaf

Change since revert: TSan fix for tier-up budget reset.

Original change's description:
> [wasm] Fix tier-up budget tracking for recursive calls
>
> In the previous implementation, functions overwrote any budget
> decrements caused by recursive invocations of themselves, which
> could cause tier-up decisions for certain unlucky functions to
> get delayed unreasonably long.
> This patch avoids this by working with the on-instance value
> directly instead of caching it in a stack slot. That generates
> the same amount of Liftoff code as the status quo, but handles
> recursive functions properly.
> The "barista3" benchmark's peak performance improves by almost 20%.
>
> Bug: v8:12281
> Change-Id: I8b487a88da99c2d22e132f2cc72bdf36aa5f6e63
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3693710
> Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#81249}

Bug: v8:12281,v8:12984
Change-Id: Ia6ce776848dc86617546ec514660c9a840484cb1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3716479
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Auto-Submit: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81281}
This commit is contained in:
Jakob Kummerow 2022-06-21 16:28:54 +02:00 committed by V8 LUCI CQ
parent d39d75b5e9
commit 6f3985534c
13 changed files with 26 additions and 52 deletions

View File

@ -71,7 +71,6 @@ static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize,
"Slot size should be twice the size of the 32 bit pointer.");
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
// kPatchInstructionsRequired sets a maximum limit of how many instructions that
// PatchPrepareStackFrame will use in order to increase the stack appropriately.
// Three instructions are required to sub a large constant, movw + movt + sub.
@ -562,7 +561,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -71,7 +71,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -387,7 +386,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -52,7 +52,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
// ebp-4 holds the stack marker, ebp-8 is the instance parameter.
constexpr int kInstanceOffset = 8;
constexpr int kFeedbackVectorOffset = 12; // ebp-12 is the feedback vector.
constexpr int kTierupBudgetOffset = 16; // ebp-16 is the tiering budget.
inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); }
@ -314,7 +313,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -742,8 +742,17 @@ class LiftoffCompiler {
const int kMax = FLAG_wasm_tiering_budget / 4;
if (budget_used > kMax) budget_used = kMax;
LiftoffRegister budget_reg = __ GetUnusedRegister(kGpReg, {});
__ Fill(budget_reg, liftoff::kTierupBudgetOffset, ValueKind::kI32);
LiftoffRegList pinned;
LiftoffRegister budget_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister array_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(array_reg.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Load(budget_reg, array_reg.gp(), no_reg, offset, LoadType::kI32Load,
pinned);
LiftoffRegList regs_to_save = __ cache_state()->used_registers;
// The cached instance will be reloaded separately.
if (__ cache_state()->cached_instance != no_reg) {
@ -763,7 +772,8 @@ class LiftoffCompiler {
OutOfLineCode& ool = out_of_line_code_.back();
__ emit_i32_subi_jump_negative(budget_reg.gp(), budget_used,
ool.label.get());
__ Spill(liftoff::kTierupBudgetOffset, budget_reg, ValueKind::kI32);
__ Store(array_reg.gp(), no_reg, offset, budget_reg, StoreType::kI32Store,
pinned);
__ bind(ool.continuation.get());
}
@ -848,17 +858,6 @@ class LiftoffCompiler {
pinned);
__ Spill(liftoff::kFeedbackVectorOffset, tmp, kPointerKind);
}
if (dynamic_tiering()) {
CODE_COMMENT("load tier up budget");
LiftoffRegList pinned = kGpParamRegisters;
LiftoffRegister tmp = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(tmp.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Load(tmp, tmp.gp(), no_reg, offset, LoadType::kI32Load, pinned);
__ Spill(liftoff::kTierupBudgetOffset, tmp, ValueKind::kI32);
}
if (for_debugging_) __ ResetOSRTarget();
// Process parameters.
@ -986,11 +985,6 @@ class LiftoffCompiler {
__ RecordSpillsInSafepoint(safepoint, gp_regs,
ool->safepoint_info->spills, index);
}
if (is_tierup) {
// Reset the budget.
__ Spill(liftoff::kTierupBudgetOffset,
WasmValue(FLAG_wasm_tiering_budget));
}
DCHECK_EQ(!debug_sidetable_builder_, !ool->debug_sidetable_entry_builder);
if (V8_UNLIKELY(ool->debug_sidetable_entry_builder)) {
@ -2246,16 +2240,6 @@ class LiftoffCompiler {
void TierupCheckOnExit(FullDecoder* decoder) {
if (!dynamic_tiering()) return;
TierupCheck(decoder, decoder->position(), __ pc_offset());
CODE_COMMENT("update tiering budget");
LiftoffRegList pinned;
LiftoffRegister budget = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LiftoffRegister array = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(array.gp(), TieringBudgetArray, kSystemPointerSize,
pinned);
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
__ Fill(budget, liftoff::kTierupBudgetOffset, ValueKind::kI32);
__ Store(array.gp(), no_reg, offset, budget, StoreType::kI32Store, pinned);
}
void DoReturn(FullDecoder* decoder, uint32_t /* drop_values */) {

View File

@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -293,7 +292,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -75,7 +75,6 @@ constexpr int32_t kHighWordOffset = 4;
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -422,7 +421,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -409,7 +408,7 @@ void LiftoffAssembler::AbortCompilation() {}
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -49,8 +49,6 @@ constexpr int32_t kInstanceOffset =
(FLAG_enable_embedded_constant_pool.value() ? 3 : 2) * kSystemPointerSize;
constexpr int kFeedbackVectorOffset =
(FLAG_enable_embedded_constant_pool.value() ? 4 : 3) * kSystemPointerSize;
constexpr int kTierupBudgetOffset =
(FLAG_enable_embedded_constant_pool.value() ? 5 : 4) * kSystemPointerSize;
inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
int32_t half_offset =
@ -222,7 +220,7 @@ void LiftoffAssembler::AbortCompilation() { FinishCode(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
// fp-8 holds the stack marker, fp-16 is the instance parameter.
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
@ -393,7 +392,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -85,7 +85,6 @@ inline constexpr bool UseSignedOp(LiftoffCondition liftoff_cond) {
//
constexpr int32_t kInstanceOffset = 2 * kSystemPointerSize;
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
inline MemOperand GetStackSlot(uint32_t offset) {
return MemOperand(fp, -offset);
}
@ -200,7 +199,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
// static
constexpr int LiftoffAssembler::StaticStackFrameSize() {
return liftoff::kTierupBudgetOffset;
return liftoff::kFeedbackVectorOffset;
}
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {

View File

@ -69,8 +69,6 @@ static_assert((kLiftoffAssemblerFpCacheRegs &
constexpr int kInstanceOffset = 16;
// rbp-24 is the feedback vector.
constexpr int kFeedbackVectorOffset = 24;
// rbp-32 is the remaining tier-up budget.
constexpr int kTierupBudgetOffset = 32;
inline constexpr Operand GetStackSlot(int offset) {
return Operand(rbp, -offset);

View File

@ -1430,8 +1430,10 @@ void TriggerTierUp(WasmInstanceObject instance, int func_index) {
const WasmModule* module = native_module->module();
int priority;
{
// TODO(clemensb): Try to avoid the MutexGuard here.
base::MutexGuard mutex_guard(&module->type_feedback.mutex);
int array_index =
wasm::declared_function_index(instance.module(), func_index);
instance.tiering_budget_array()[array_index] = FLAG_wasm_tiering_budget;
int& stored_priority =
module->type_feedback.feedback_for_function[func_index].tierup_priority;
if (stored_priority < kMaxInt) ++stored_priority;

View File

@ -190,7 +190,7 @@ constexpr uint32_t kMinimumSupertypeArraySize = 3;
constexpr int kMaxPolymorphism = 4;
#if V8_TARGET_ARCH_X64
constexpr int32_t kOSRTargetOffset = 5 * kSystemPointerSize;
constexpr int32_t kOSRTargetOffset = 4 * kSystemPointerSize;
#endif
} // namespace wasm