Reland "[wasm] Fix tier-up budget tracking for recursive calls"
This is a reland of commit 15f372afaf
Change since revert: TSan fix for tier-up budget reset.
Original change's description:
> [wasm] Fix tier-up budget tracking for recursive calls
>
> In the previous implementation, functions overwrote any budget
> decrements caused by recursive invocations of themselves, which
> could cause tier-up decisions for certain unlucky functions to
> get delayed unreasonably long.
> This patch avoids this by working with the on-instance value
> directly instead of caching it in a stack slot. That generates
> the same amount of Liftoff code as the status quo, but handles
> recursive functions properly.
> The "barista3" benchmark's peak performance improves by almost 20%.
>
> Bug: v8:12281
> Change-Id: I8b487a88da99c2d22e132f2cc72bdf36aa5f6e63
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3693710
> Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#81249}
Bug: v8:12281,v8:12984
Change-Id: Ia6ce776848dc86617546ec514660c9a840484cb1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3716479
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Auto-Submit: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81281}
This commit is contained in:
parent
d39d75b5e9
commit
6f3985534c
@ -71,7 +71,6 @@ static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize,
|
||||
"Slot size should be twice the size of the 32 bit pointer.");
|
||||
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
|
||||
// kPatchInstructionsRequired sets a maximum limit of how many instructions that
|
||||
// PatchPrepareStackFrame will use in order to increase the stack appropriately.
|
||||
// Three instructions are required to sub a large constant, movw + movt + sub.
|
||||
@ -562,7 +561,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -71,7 +71,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
|
||||
|
||||
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
|
||||
|
||||
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
|
||||
|
||||
@ -387,7 +386,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -52,7 +52,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
|
||||
// ebp-4 holds the stack marker, ebp-8 is the instance parameter.
|
||||
constexpr int kInstanceOffset = 8;
|
||||
constexpr int kFeedbackVectorOffset = 12; // ebp-12 is the feedback vector.
|
||||
constexpr int kTierupBudgetOffset = 16; // ebp-16 is the tiering budget.
|
||||
|
||||
inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); }
|
||||
|
||||
@ -314,7 +313,7 @@ void LiftoffAssembler::AbortCompilation() {}
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -742,8 +742,17 @@ class LiftoffCompiler {
|
||||
const int kMax = FLAG_wasm_tiering_budget / 4;
|
||||
if (budget_used > kMax) budget_used = kMax;
|
||||
|
||||
LiftoffRegister budget_reg = __ GetUnusedRegister(kGpReg, {});
|
||||
__ Fill(budget_reg, liftoff::kTierupBudgetOffset, ValueKind::kI32);
|
||||
LiftoffRegList pinned;
|
||||
LiftoffRegister budget_reg =
|
||||
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
|
||||
LiftoffRegister array_reg =
|
||||
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
|
||||
LOAD_INSTANCE_FIELD(array_reg.gp(), TieringBudgetArray, kSystemPointerSize,
|
||||
pinned);
|
||||
uint32_t offset =
|
||||
kInt32Size * declared_function_index(env_->module, func_index_);
|
||||
__ Load(budget_reg, array_reg.gp(), no_reg, offset, LoadType::kI32Load,
|
||||
pinned);
|
||||
LiftoffRegList regs_to_save = __ cache_state()->used_registers;
|
||||
// The cached instance will be reloaded separately.
|
||||
if (__ cache_state()->cached_instance != no_reg) {
|
||||
@ -763,7 +772,8 @@ class LiftoffCompiler {
|
||||
OutOfLineCode& ool = out_of_line_code_.back();
|
||||
__ emit_i32_subi_jump_negative(budget_reg.gp(), budget_used,
|
||||
ool.label.get());
|
||||
__ Spill(liftoff::kTierupBudgetOffset, budget_reg, ValueKind::kI32);
|
||||
__ Store(array_reg.gp(), no_reg, offset, budget_reg, StoreType::kI32Store,
|
||||
pinned);
|
||||
__ bind(ool.continuation.get());
|
||||
}
|
||||
|
||||
@ -848,17 +858,6 @@ class LiftoffCompiler {
|
||||
pinned);
|
||||
__ Spill(liftoff::kFeedbackVectorOffset, tmp, kPointerKind);
|
||||
}
|
||||
if (dynamic_tiering()) {
|
||||
CODE_COMMENT("load tier up budget");
|
||||
LiftoffRegList pinned = kGpParamRegisters;
|
||||
LiftoffRegister tmp = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
|
||||
LOAD_INSTANCE_FIELD(tmp.gp(), TieringBudgetArray, kSystemPointerSize,
|
||||
pinned);
|
||||
uint32_t offset =
|
||||
kInt32Size * declared_function_index(env_->module, func_index_);
|
||||
__ Load(tmp, tmp.gp(), no_reg, offset, LoadType::kI32Load, pinned);
|
||||
__ Spill(liftoff::kTierupBudgetOffset, tmp, ValueKind::kI32);
|
||||
}
|
||||
if (for_debugging_) __ ResetOSRTarget();
|
||||
|
||||
// Process parameters.
|
||||
@ -986,11 +985,6 @@ class LiftoffCompiler {
|
||||
__ RecordSpillsInSafepoint(safepoint, gp_regs,
|
||||
ool->safepoint_info->spills, index);
|
||||
}
|
||||
if (is_tierup) {
|
||||
// Reset the budget.
|
||||
__ Spill(liftoff::kTierupBudgetOffset,
|
||||
WasmValue(FLAG_wasm_tiering_budget));
|
||||
}
|
||||
|
||||
DCHECK_EQ(!debug_sidetable_builder_, !ool->debug_sidetable_entry_builder);
|
||||
if (V8_UNLIKELY(ool->debug_sidetable_entry_builder)) {
|
||||
@ -2246,16 +2240,6 @@ class LiftoffCompiler {
|
||||
void TierupCheckOnExit(FullDecoder* decoder) {
|
||||
if (!dynamic_tiering()) return;
|
||||
TierupCheck(decoder, decoder->position(), __ pc_offset());
|
||||
CODE_COMMENT("update tiering budget");
|
||||
LiftoffRegList pinned;
|
||||
LiftoffRegister budget = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
|
||||
LiftoffRegister array = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
|
||||
LOAD_INSTANCE_FIELD(array.gp(), TieringBudgetArray, kSystemPointerSize,
|
||||
pinned);
|
||||
uint32_t offset =
|
||||
kInt32Size * declared_function_index(env_->module, func_index_);
|
||||
__ Fill(budget, liftoff::kTierupBudgetOffset, ValueKind::kI32);
|
||||
__ Store(array.gp(), no_reg, offset, budget, StoreType::kI32Store, pinned);
|
||||
}
|
||||
|
||||
void DoReturn(FullDecoder* decoder, uint32_t /* drop_values */) {
|
||||
|
@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
|
||||
|
||||
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
|
||||
|
||||
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
|
||||
|
||||
@ -293,7 +292,7 @@ void LiftoffAssembler::AbortCompilation() {}
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -75,7 +75,6 @@ constexpr int32_t kHighWordOffset = 4;
|
||||
|
||||
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
|
||||
|
||||
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
|
||||
|
||||
@ -422,7 +421,7 @@ void LiftoffAssembler::AbortCompilation() {}
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
|
||||
|
||||
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
|
||||
|
||||
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
|
||||
|
||||
@ -409,7 +408,7 @@ void LiftoffAssembler::AbortCompilation() {}
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -49,8 +49,6 @@ constexpr int32_t kInstanceOffset =
|
||||
(FLAG_enable_embedded_constant_pool.value() ? 3 : 2) * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset =
|
||||
(FLAG_enable_embedded_constant_pool.value() ? 4 : 3) * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset =
|
||||
(FLAG_enable_embedded_constant_pool.value() ? 5 : 4) * kSystemPointerSize;
|
||||
|
||||
inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
|
||||
int32_t half_offset =
|
||||
@ -222,7 +220,7 @@ void LiftoffAssembler::AbortCompilation() { FinishCode(); }
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -72,7 +72,6 @@ inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
|
||||
// fp-8 holds the stack marker, fp-16 is the instance parameter.
|
||||
constexpr int kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
|
||||
|
||||
inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
|
||||
|
||||
@ -393,7 +392,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -85,7 +85,6 @@ inline constexpr bool UseSignedOp(LiftoffCondition liftoff_cond) {
|
||||
//
|
||||
constexpr int32_t kInstanceOffset = 2 * kSystemPointerSize;
|
||||
constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
|
||||
constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
|
||||
inline MemOperand GetStackSlot(uint32_t offset) {
|
||||
return MemOperand(fp, -offset);
|
||||
}
|
||||
@ -200,7 +199,7 @@ void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
|
||||
|
||||
// static
|
||||
constexpr int LiftoffAssembler::StaticStackFrameSize() {
|
||||
return liftoff::kTierupBudgetOffset;
|
||||
return liftoff::kFeedbackVectorOffset;
|
||||
}
|
||||
|
||||
int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
|
||||
|
@ -69,8 +69,6 @@ static_assert((kLiftoffAssemblerFpCacheRegs &
|
||||
constexpr int kInstanceOffset = 16;
|
||||
// rbp-24 is the feedback vector.
|
||||
constexpr int kFeedbackVectorOffset = 24;
|
||||
// rbp-32 is the remaining tier-up budget.
|
||||
constexpr int kTierupBudgetOffset = 32;
|
||||
|
||||
inline constexpr Operand GetStackSlot(int offset) {
|
||||
return Operand(rbp, -offset);
|
||||
|
@ -1430,8 +1430,10 @@ void TriggerTierUp(WasmInstanceObject instance, int func_index) {
|
||||
const WasmModule* module = native_module->module();
|
||||
int priority;
|
||||
{
|
||||
// TODO(clemensb): Try to avoid the MutexGuard here.
|
||||
base::MutexGuard mutex_guard(&module->type_feedback.mutex);
|
||||
int array_index =
|
||||
wasm::declared_function_index(instance.module(), func_index);
|
||||
instance.tiering_budget_array()[array_index] = FLAG_wasm_tiering_budget;
|
||||
int& stored_priority =
|
||||
module->type_feedback.feedback_for_function[func_index].tierup_priority;
|
||||
if (stored_priority < kMaxInt) ++stored_priority;
|
||||
|
@ -190,7 +190,7 @@ constexpr uint32_t kMinimumSupertypeArraySize = 3;
|
||||
constexpr int kMaxPolymorphism = 4;
|
||||
|
||||
#if V8_TARGET_ARCH_X64
|
||||
constexpr int32_t kOSRTargetOffset = 5 * kSystemPointerSize;
|
||||
constexpr int32_t kOSRTargetOffset = 4 * kSystemPointerSize;
|
||||
#endif
|
||||
|
||||
} // namespace wasm
|
||||
|
Loading…
Reference in New Issue
Block a user