[compiler] Don't remove OSR code cache if deoptimizing at out of loop
The main purpose of OSR compilation is fasten inner loop execution, the OSR code cache is still correct for loop if optimizing at out of loop, keep OSR code cache can reduce unnecessary slow bytecode execution with feedback collection and avoid re-OSR compilation. This CL can improve JetStream2 case navier-stokes by ~6%. Change-Id: I9518317fb922071b131cab5b56998a0fc198804a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3494981 Reviewed-by: Leszek Swirski <leszeks@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Tao Pan <tao.pan@intel.com> Cr-Commit-Position: refs/heads/main@{#79413}
This commit is contained in:
parent
4e9ab3a0f5
commit
190b5d959b
@ -1934,7 +1934,7 @@ void BaselineCompiler::VisitJumpLoop() {
|
||||
__ LoadRegister(osr_level, interpreter::Register::bytecode_array());
|
||||
__ LoadByteField(osr_level, osr_level,
|
||||
BytecodeArray::kOsrLoopNestingLevelOffset);
|
||||
int loop_depth = iterator().GetImmediateOperand(1);
|
||||
int loop_depth = iterator().GetJumpLoopNestingLevel();
|
||||
__ JumpIfByte(Condition::kUnsignedLessThanEqual, osr_level, loop_depth,
|
||||
&osr_not_armed);
|
||||
CallBuiltin<Builtin::kBaselineOnStackReplacement>();
|
||||
|
@ -559,6 +559,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() {
|
||||
.set_is_turbofanned()
|
||||
.set_stack_slots(frame()->GetTotalFrameSlotCount())
|
||||
.set_profiler_data(info()->profiler_data())
|
||||
.set_osr_offset(info()->osr_offset())
|
||||
.TryBuild();
|
||||
|
||||
Handle<Code> code;
|
||||
|
@ -532,13 +532,13 @@ Deoptimizer::Deoptimizer(Isolate* isolate, JSFunction function,
|
||||
function.shared().internal_formal_parameter_count_with_receiver();
|
||||
input_ = new (size) FrameDescription(size, parameter_count);
|
||||
|
||||
DeoptimizationData deopt_data =
|
||||
DeoptimizationData::cast(compiled_code_.deoptimization_data());
|
||||
if (kSupportsFixedDeoptExitSizes) {
|
||||
DCHECK_EQ(deopt_exit_index_, kFixedExitSizeMarker);
|
||||
// Calculate the deopt exit index from return address.
|
||||
DCHECK_GT(kNonLazyDeoptExitSize, 0);
|
||||
DCHECK_GT(kLazyDeoptExitSize, 0);
|
||||
DeoptimizationData deopt_data =
|
||||
DeoptimizationData::cast(compiled_code_.deoptimization_data());
|
||||
Address deopt_start = compiled_code_.raw_instruction_start() +
|
||||
deopt_data.DeoptExitStart().value();
|
||||
int eager_soft_and_bailout_deopt_count =
|
||||
@ -580,6 +580,7 @@ Deoptimizer::Deoptimizer(Isolate* isolate, JSFunction function,
|
||||
(offset / kEagerWithResumeDeoptExitSize);
|
||||
}
|
||||
}
|
||||
deopt_exit_bytecode_offset_ = deopt_data.GetBytecodeOffset(deopt_exit_index_);
|
||||
}
|
||||
|
||||
Code Deoptimizer::FindOptimizedCode() {
|
||||
@ -713,8 +714,7 @@ int LookupCatchHandler(Isolate* isolate, TranslatedFrame* translated_frame,
|
||||
|
||||
} // namespace
|
||||
|
||||
void Deoptimizer::TraceDeoptBegin(int optimization_id,
|
||||
BytecodeOffset bytecode_offset) {
|
||||
void Deoptimizer::TraceDeoptBegin(int optimization_id) {
|
||||
DCHECK(tracing_enabled());
|
||||
FILE* file = trace_scope()->file();
|
||||
Deoptimizer::DeoptInfo info =
|
||||
@ -738,8 +738,9 @@ void Deoptimizer::TraceDeoptBegin(int optimization_id,
|
||||
#ifdef DEBUG
|
||||
info.node_id,
|
||||
#endif // DEBUG
|
||||
bytecode_offset.ToInt(), deopt_exit_index_, fp_to_sp_delta_,
|
||||
caller_frame_top_, PointerAuthentication::StripPAC(from_));
|
||||
deopt_exit_bytecode_offset_.ToInt(), deopt_exit_index_,
|
||||
fp_to_sp_delta_, caller_frame_top_,
|
||||
PointerAuthentication::StripPAC(from_));
|
||||
if (verbose_tracing_enabled() && deopt_kind_ != DeoptimizeKind::kLazy) {
|
||||
PrintF(file, " ;;; deoptimize at ");
|
||||
OFStream outstr(file);
|
||||
@ -867,15 +868,13 @@ void Deoptimizer::DoComputeOutputFrames() {
|
||||
CHECK_GT(static_cast<uintptr_t>(caller_frame_top_),
|
||||
stack_guard->real_jslimit());
|
||||
|
||||
BytecodeOffset bytecode_offset =
|
||||
input_data.GetBytecodeOffset(deopt_exit_index_);
|
||||
ByteArray translations = input_data.TranslationByteArray();
|
||||
unsigned translation_index =
|
||||
input_data.TranslationIndex(deopt_exit_index_).value();
|
||||
|
||||
if (tracing_enabled()) {
|
||||
timer.Start();
|
||||
TraceDeoptBegin(input_data.OptimizationId().value(), bytecode_offset);
|
||||
TraceDeoptBegin(input_data.OptimizationId().value());
|
||||
}
|
||||
|
||||
FILE* trace_file =
|
||||
|
@ -54,6 +54,9 @@ class Deoptimizer : public Malloced {
|
||||
Handle<JSFunction> function() const;
|
||||
Handle<Code> compiled_code() const;
|
||||
DeoptimizeKind deopt_kind() const { return deopt_kind_; }
|
||||
BytecodeOffset deopt_exit_bytecode_offset() const {
|
||||
return deopt_exit_bytecode_offset_;
|
||||
}
|
||||
|
||||
static Deoptimizer* New(Address raw_function, DeoptimizeKind kind,
|
||||
unsigned deopt_exit_index, Address from,
|
||||
@ -199,7 +202,7 @@ class Deoptimizer : public Malloced {
|
||||
CodeTracer::Scope* verbose_trace_scope() const {
|
||||
return FLAG_trace_deopt_verbose ? trace_scope() : nullptr;
|
||||
}
|
||||
void TraceDeoptBegin(int optimization_id, BytecodeOffset bytecode_offset);
|
||||
void TraceDeoptBegin(int optimization_id);
|
||||
void TraceDeoptEnd(double deopt_duration);
|
||||
#ifdef DEBUG
|
||||
static void TraceFoundActivation(Isolate* isolate, JSFunction function);
|
||||
@ -211,6 +214,7 @@ class Deoptimizer : public Malloced {
|
||||
JSFunction function_;
|
||||
Code compiled_code_;
|
||||
unsigned deopt_exit_index_;
|
||||
BytecodeOffset deopt_exit_bytecode_offset_ = BytecodeOffset::None();
|
||||
DeoptimizeKind deopt_kind_;
|
||||
Address from_;
|
||||
int fp_to_sp_delta_;
|
||||
|
@ -297,7 +297,7 @@ OptimizationDecision TieringManager::ShouldOptimize(JSFunction function,
|
||||
int jump_target_offset = iterator.GetJumpTargetOffset();
|
||||
if (jump_offset >= current_offset &&
|
||||
current_offset >= jump_target_offset) {
|
||||
bytecode.set_osr_loop_nesting_level(iterator.GetImmediateOperand(1) +
|
||||
bytecode.set_osr_loop_nesting_level(iterator.GetJumpLoopNestingLevel() +
|
||||
1);
|
||||
return OptimizationDecision::TurbofanHotAndStable();
|
||||
}
|
||||
|
@ -180,6 +180,7 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal(
|
||||
// this field. We currently assume it's immutable thus a relaxed read (after
|
||||
// passing IsPendingAllocation).
|
||||
raw_code.set_inlined_bytecode_size(inlined_bytecode_size_);
|
||||
raw_code.set_osr_offset(osr_offset_);
|
||||
raw_code.set_code_data_container(*data_container, kReleaseStore);
|
||||
if (kind_ == CodeKind::BASELINE) {
|
||||
raw_code.set_bytecode_or_interpreter_data(*interpreter_data_);
|
||||
|
@ -913,6 +913,12 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
|
||||
return *this;
|
||||
}
|
||||
|
||||
CodeBuilder& set_osr_offset(BytecodeOffset offset) {
|
||||
DCHECK_IMPLIES(!offset.IsNone(), CodeKindIsOptimizedJSFunction(kind_));
|
||||
osr_offset_ = offset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
CodeBuilder& set_source_position_table(Handle<ByteArray> table) {
|
||||
DCHECK_NE(kind_, CodeKind::BASELINE);
|
||||
DCHECK(!table.is_null());
|
||||
@ -990,6 +996,7 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
|
||||
MaybeHandle<Object> self_reference_;
|
||||
Builtin builtin_ = Builtin::kNoBuiltinId;
|
||||
uint32_t inlined_bytecode_size_ = 0;
|
||||
BytecodeOffset osr_offset_ = BytecodeOffset::None();
|
||||
int32_t kind_specific_flags_ = 0;
|
||||
// Either source_position_table for non-baseline code
|
||||
// or bytecode_offset_table for baseline code.
|
||||
|
@ -244,6 +244,11 @@ int BytecodeArrayIterator::GetJumpTargetOffset() const {
|
||||
return GetAbsoluteOffset(GetRelativeJumpTargetOffset());
|
||||
}
|
||||
|
||||
int BytecodeArrayIterator::GetJumpLoopNestingLevel() const {
|
||||
DCHECK_EQ(current_bytecode(), interpreter::Bytecode::kJumpLoop);
|
||||
return GetImmediateOperand(1);
|
||||
}
|
||||
|
||||
JumpTableTargetOffsets BytecodeArrayIterator::GetJumpTableTargetOffsets()
|
||||
const {
|
||||
uint32_t table_start, table_size;
|
||||
|
@ -141,6 +141,10 @@ class V8_EXPORT_PRIVATE BytecodeArrayIterator {
|
||||
// bytecode is not a switch.
|
||||
JumpTableTargetOffsets GetJumpTableTargetOffsets() const;
|
||||
|
||||
// Returns loop nesting level of current bytecode.
|
||||
// It is an error to call this method if the bytecode is not for a JumpLoop.
|
||||
int GetJumpLoopNestingLevel() const;
|
||||
|
||||
// Returns the absolute offset of the bytecode at the given relative offset
|
||||
// from the current bytecode.
|
||||
int GetAbsoluteOffset(int relative_offset) const;
|
||||
|
@ -694,6 +694,15 @@ void Code::set_inlined_bytecode_size(unsigned size) {
|
||||
RELAXED_WRITE_UINT_FIELD(*this, kInlinedBytecodeSizeOffset, size);
|
||||
}
|
||||
|
||||
BytecodeOffset Code::osr_offset() const {
|
||||
int32_t offset = RELAXED_READ_INT32_FIELD(*this, kOsrOffsetOffset);
|
||||
return BytecodeOffset(offset);
|
||||
}
|
||||
|
||||
void Code::set_osr_offset(BytecodeOffset offset) {
|
||||
RELAXED_WRITE_INT32_FIELD(*this, kOsrOffsetOffset, offset.ToInt());
|
||||
}
|
||||
|
||||
bool Code::uses_safepoint_table() const {
|
||||
return is_turbofanned() || is_maglevved() || is_wasm_code();
|
||||
}
|
||||
|
@ -455,6 +455,9 @@ class Code : public HeapObject {
|
||||
inline unsigned inlined_bytecode_size() const;
|
||||
inline void set_inlined_bytecode_size(unsigned size);
|
||||
|
||||
inline BytecodeOffset osr_offset() const;
|
||||
inline void set_osr_offset(BytecodeOffset offset);
|
||||
|
||||
// [uses_safepoint_table]: Whether this Code object uses safepoint tables
|
||||
// (note the table may still be empty, see has_safepoint_table).
|
||||
inline bool uses_safepoint_table() const;
|
||||
@ -633,6 +636,7 @@ class Code : public HeapObject {
|
||||
V(kFlagsOffset, kInt32Size) \
|
||||
V(kBuiltinIndexOffset, kIntSize) \
|
||||
V(kInlinedBytecodeSizeOffset, kIntSize) \
|
||||
V(kOsrOffsetOffset, kInt32Size) \
|
||||
/* Offsets describing inline metadata tables, relative to MetadataStart. */ \
|
||||
V(kHandlerTableOffsetOffset, kIntSize) \
|
||||
V(kConstantPoolOffsetOffset, \
|
||||
@ -652,28 +656,28 @@ class Code : public HeapObject {
|
||||
// due to padding for code alignment.
|
||||
#if V8_TARGET_ARCH_ARM64
|
||||
static constexpr int kHeaderPaddingSize =
|
||||
V8_EXTERNAL_CODE_SPACE_BOOL ? 8 : (COMPRESS_POINTERS_BOOL ? 12 : 24);
|
||||
V8_EXTERNAL_CODE_SPACE_BOOL ? 4 : (COMPRESS_POINTERS_BOOL ? 8 : 20);
|
||||
#elif V8_TARGET_ARCH_MIPS64
|
||||
static constexpr int kHeaderPaddingSize = 24;
|
||||
static constexpr int kHeaderPaddingSize = 20;
|
||||
#elif V8_TARGET_ARCH_LOONG64
|
||||
static constexpr int kHeaderPaddingSize = 24;
|
||||
static constexpr int kHeaderPaddingSize = 20;
|
||||
#elif V8_TARGET_ARCH_X64
|
||||
static constexpr int kHeaderPaddingSize =
|
||||
V8_EXTERNAL_CODE_SPACE_BOOL ? 8 : (COMPRESS_POINTERS_BOOL ? 12 : 56);
|
||||
V8_EXTERNAL_CODE_SPACE_BOOL ? 4 : (COMPRESS_POINTERS_BOOL ? 8 : 52);
|
||||
#elif V8_TARGET_ARCH_ARM
|
||||
static constexpr int kHeaderPaddingSize = 12;
|
||||
static constexpr int kHeaderPaddingSize = 8;
|
||||
#elif V8_TARGET_ARCH_IA32
|
||||
static constexpr int kHeaderPaddingSize = 12;
|
||||
static constexpr int kHeaderPaddingSize = 8;
|
||||
#elif V8_TARGET_ARCH_MIPS
|
||||
static constexpr int kHeaderPaddingSize = 12;
|
||||
static constexpr int kHeaderPaddingSize = 8;
|
||||
#elif V8_TARGET_ARCH_PPC64
|
||||
static constexpr int kHeaderPaddingSize =
|
||||
FLAG_enable_embedded_constant_pool ? (COMPRESS_POINTERS_BOOL ? 8 : 20)
|
||||
: (COMPRESS_POINTERS_BOOL ? 12 : 24);
|
||||
FLAG_enable_embedded_constant_pool ? (COMPRESS_POINTERS_BOOL ? 4 : 16)
|
||||
: (COMPRESS_POINTERS_BOOL ? 8 : 20);
|
||||
#elif V8_TARGET_ARCH_S390X
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 12 : 24;
|
||||
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 8 : 20;
|
||||
#elif V8_TARGET_ARCH_RISCV64
|
||||
static constexpr int kHeaderPaddingSize = (COMPRESS_POINTERS_BOOL ? 12 : 24);
|
||||
static constexpr int kHeaderPaddingSize = (COMPRESS_POINTERS_BOOL ? 8 : 20);
|
||||
#else
|
||||
#error Unknown architecture.
|
||||
#endif
|
||||
|
@ -196,6 +196,8 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) {
|
||||
|
||||
// Make sure to materialize objects before causing any allocation.
|
||||
deoptimizer->MaterializeHeapObjects();
|
||||
BytecodeOffset deopt_exit_bytecode_offset =
|
||||
deoptimizer->deopt_exit_bytecode_offset();
|
||||
delete deoptimizer;
|
||||
|
||||
// Ensure the context register is updated for materialized objects.
|
||||
@ -205,7 +207,33 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) {
|
||||
|
||||
// Invalidate the underlying optimized code on eager and soft deopts.
|
||||
if (type == DeoptimizeKind::kEager || type == DeoptimizeKind::kSoft) {
|
||||
Deoptimizer::DeoptimizeFunction(*function, *optimized_code);
|
||||
// Non-OSR'd code is deoptimized unconditionally.
|
||||
//
|
||||
// For OSR'd code, we keep the optimized code around if deoptimization
|
||||
// occurs outside the outermost loop (containing the loop that triggered OSR
|
||||
// compilation). The reasoning is that OSR is intended to speed up the
|
||||
// long-running loop; so if the deoptimization occurs outside this loop it
|
||||
// is still worth jumping to the OSR'd code on the next run. The reduced
|
||||
// cost of the loop should pay for the deoptimization costs.
|
||||
if (!optimized_code->osr_offset().IsNone()) {
|
||||
interpreter::BytecodeArrayIterator iterator(
|
||||
Handle<BytecodeArray>(function->shared().GetBytecodeArray(isolate),
|
||||
isolate),
|
||||
optimized_code->osr_offset().ToInt());
|
||||
// Iterate forward from current JumpLoop to the outermost JumpLoop.
|
||||
for (; !iterator.done(); iterator.Advance()) {
|
||||
if (deopt_exit_bytecode_offset.ToInt() <= iterator.current_offset()) {
|
||||
Deoptimizer::DeoptimizeFunction(*function, *optimized_code);
|
||||
break;
|
||||
}
|
||||
if (iterator.current_bytecode() == interpreter::Bytecode::kJumpLoop &&
|
||||
iterator.GetJumpLoopNestingLevel() == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Deoptimizer::DeoptimizeFunction(*function, *optimized_code);
|
||||
}
|
||||
}
|
||||
|
||||
return ReadOnlyRoots(isolate).undefined_value();
|
||||
|
Loading…
Reference in New Issue
Block a user