[maglev] Implement Maglev-to-Turbofan tiering

ML-TF tiering remains very similar to Ignition-TF tiering:

- When the interrupt budget is exhausted, enter the TieringManager
  which potentially decides to tier up and sets the appropriate
  TieringState on the FeedbackVector.
- The prologue on ML function entry recognizes the TieringState (and
  also available cached TF code) and starts compilation and/or jumps
  into optimized code.

TODOs:

- ML-to-TF OSR is not yet supported.
- ML code is no longer cached on the FeedbackVector.
- Tracing is rudimentary.
- The generated function-entry prologue is fairly large and must be
  either minimized or extracted into a builtin.
- Tiering involving Sparkplug is not entirely robust yet (Sparkplug
  code may be installed with unexpected timing).

Bug: v8:7700
Change-Id: I86b0692477f51b9967f318a4093bc874344120b3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3629149
Reviewed-by: Victor Gomes <victorgomes@chromium.org>
Commit-Queue: Jakob Linke <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#82351}
This commit is contained in:
Jakob Linke 2022-08-10 15:15:27 +02:00 committed by V8 LUCI CQ
parent 012fa89948
commit 0bc4b452af
11 changed files with 179 additions and 22 deletions

View File

@ -1184,7 +1184,7 @@ MaybeHandle<CodeT> CompileMaglev(Isolate* isolate, Handle<JSFunction> function,
CHECK_EQ(status, CompilationJob::SUCCEEDED); // TODO(v8:7700): Use status.
if (IsSynchronous(mode)) {
function->reset_tiering_state();
ResetTieringState(*job->function(), osr_offset);
{
// Park the main thread Isolate here, to be in the same state as
// background threads.
@ -1201,9 +1201,16 @@ MaybeHandle<CodeT> CompileMaglev(Isolate* isolate, Handle<JSFunction> function,
}
RecordMaglevFunctionCompilation(isolate, function);
const bool kIsContextSpecializing = false;
OptimizedCodeCache::Insert(isolate, *function, osr_offset, function->code(),
kIsContextSpecializing);
// TODO(v8:7700): Re-enable caching in a separate feedback vector slot. We
// probably shouldn't reuse the same slot as TF since that makes tiering
// logic from ML to TF more involved (it'd have to check the cached code
// kind).
// const bool kIsContextSpecializing = false;
// OptimizedCodeCache::Insert(isolate, *function, osr_offset,
// function->code(),
// kIsContextSpecializing);
return handle(function->code(), isolate);
}
@ -3974,12 +3981,37 @@ bool Compiler::FinalizeMaglevCompilationJob(maglev::MaglevCompilationJob* job,
Isolate* isolate) {
#ifdef V8_ENABLE_MAGLEV
VMState<COMPILER> state(isolate);
const bool kIsContextSpecializing = false;
OptimizedCodeCache::Insert(isolate, *job->function(), BytecodeOffset::None(),
job->function()->code(), kIsContextSpecializing);
RecordMaglevFunctionCompilation(isolate, job->function());
#endif
const CompilationJob::Status status = job->FinalizeJob(isolate);
// TODO(v8:7700): Use the result and check if job succeed
// when all the bytecodes are implemented.
USE(status);
// TODO(v8:7700): Re-enable caching in a separate feedback vector slot. We
// probably shouldn't reuse the same slot as TF since that makes tiering
// logic from ML to TF more involved (it'd have to check the cached code
// kind).
// const bool kIsContextSpecializing = false;
// OptimizedCodeCache::Insert(isolate, *job->function(),
// BytecodeOffset::None(),
// job->function()->code(),
// kIsContextSpecializing);
static constexpr BytecodeOffset osr_offset = BytecodeOffset::None();
ResetTieringState(*job->function(), osr_offset);
if (status == CompilationJob::SUCCEEDED) {
// Note the finalized Code object has already been installed on the
// function by MaglevCompilationJob::FinalizeJobImpl.
RecordMaglevFunctionCompilation(isolate, job->function());
}
return status;
#else
return CompilationJob::SUCCEEDED;
#endif
}
// static

View File

@ -263,7 +263,8 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
const bool is_marked_for_any_optimization =
(static_cast<uint32_t>(tiering_state) & kNoneOrInProgressMask) != 0;
if (is_marked_for_any_optimization || function.HasAvailableOptimizedCode()) {
if (is_marked_for_any_optimization ||
function.HasAvailableHigherTierCodeThan(code_kind)) {
// OSR kicks in only once we've previously decided to tier up, but we are
// still in the unoptimized frame (this implies a long-running loop).
if (SmallEnoughForOSR(isolate_, function)) {
@ -276,7 +277,7 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
}
DCHECK(!is_marked_for_any_optimization &&
!function.HasAvailableOptimizedCode());
!function.HasAvailableHigherTierCodeThan(code_kind));
OptimizationDecision d = ShouldOptimize(function, code_kind);
if (d.should_optimize()) Optimize(function, d);
}

View File

@ -441,6 +441,38 @@ class MaglevCodeGeneratingNodeProcessor {
__ BailoutIfDeoptimized(rbx);
// Tiering support.
{
// Scratch registers. Don't clobber regs related to the calling
// convention (e.g. kJavaScriptCallArgCountRegister).
Register optimization_state = rcx;
Register feedback_vector = r9;
// Load the feedback vector.
__ LoadTaggedPointerField(
feedback_vector,
FieldOperand(kJSFunctionRegister, JSFunction::kFeedbackCellOffset));
__ LoadTaggedPointerField(
feedback_vector, FieldOperand(feedback_vector, Cell::kValueOffset));
__ AssertFeedbackVector(feedback_vector);
Label has_optimized_code_or_state, next;
__ LoadTieringStateAndJumpIfNeedsProcessing(
optimization_state, feedback_vector, &has_optimized_code_or_state);
__ jmp(&next);
__ bind(&has_optimized_code_or_state);
{
ASM_CODE_COMMENT_STRING(masm(), "Optimized marker check");
__ MaybeOptimizeCodeOrTailCallOptimizedCodeSlot(
optimization_state, feedback_vector, kJSFunctionRegister,
JumpMode::kJump);
__ Trap();
}
__ bind(&next);
}
__ EnterFrame(StackFrame::MAGLEV);
// Save arguments in frame.
@ -450,10 +482,6 @@ class MaglevCodeGeneratingNodeProcessor {
__ Push(kJSFunctionRegister); // Callee's JS function.
__ Push(kJavaScriptCallArgCountRegister); // Actual argument count.
// TODO(v8:7700): Handle TieringState and cached optimized code. See also:
// LoadTieringStateAndJumpIfNeedsProcessing and
// MaybeOptimizeCodeOrTailCallOptimizedCodeSlot.
code_gen_state_->set_untagged_slots(graph->untagged_stack_slots());
code_gen_state_->set_tagged_slots(graph->tagged_stack_slots());

View File

@ -188,12 +188,7 @@ void MaglevConcurrentDispatcher::FinalizeFinishedJobs() {
while (!outgoing_queue_.IsEmpty()) {
std::unique_ptr<MaglevCompilationJob> job;
outgoing_queue_.Dequeue(&job);
CompilationJob::Status status = job->FinalizeJob(isolate_);
// TODO(v8:7700): Use the result and check if job succeed
// when all the bytecodes are implemented.
if (status == CompilationJob::SUCCEEDED) {
Compiler::FinalizeMaglevCompilationJob(job.get(), isolate_);
}
Compiler::FinalizeMaglevCompilationJob(job.get(), isolate_);
}
}

View File

@ -90,7 +90,7 @@ inline constexpr bool CodeKindCanOSR(CodeKind kind) {
}
inline constexpr bool CodeKindCanTierUp(CodeKind kind) {
return CodeKindIsUnoptimizedJSFunction(kind);
return CodeKindIsUnoptimizedJSFunction(kind) || kind == CodeKind::MAGLEV;
}
// TODO(jgruber): Rename or remove this predicate. Currently it means 'is this

View File

@ -65,6 +65,14 @@ bool JSFunction::HasAttachedOptimizedCode() const {
return (result & kOptimizedJSFunctionCodeKindsMask) != 0;
}
bool JSFunction::HasAvailableHigherTierCodeThan(CodeKind kind) const {
const int kind_as_int_flag = static_cast<int>(CodeKindToCodeKindFlag(kind));
DCHECK(base::bits::IsPowerOfTwo(kind_as_int_flag));
// Smear right - any higher present bit means we have a higher tier available.
const int mask = kind_as_int_flag | (kind_as_int_flag - 1);
return (GetAvailableCodeKinds() & static_cast<CodeKinds>(~mask)) != 0;
}
bool JSFunction::HasAvailableOptimizedCode() const {
CodeKinds result = GetAvailableCodeKinds();
return (result & kOptimizedJSFunctionCodeKindsMask) != 0;

View File

@ -152,6 +152,8 @@ class JSFunction : public TorqueGeneratedJSFunction<
// been already deoptimized but its code() still needs to be unlinked, which
// will happen on its next activation.
bool HasAvailableHigherTierCodeThan(CodeKind kind) const;
// True, iff any generated code kind is attached/available to this function.
V8_EXPORT_PRIVATE bool HasAttachedOptimizedCode() const;
bool HasAvailableOptimizedCode() const;

View File

@ -448,6 +448,20 @@ RUNTIME_FUNCTION(Runtime_BenchMaglev) {
}
#endif // V8_ENABLE_MAGLEV
RUNTIME_FUNCTION(Runtime_ActiveTierIsIgnition) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 1);
Handle<JSFunction> function = args.at<JSFunction>(0);
return isolate->heap()->ToBoolean(function->ActiveTierIsIgnition());
}
RUNTIME_FUNCTION(Runtime_ActiveTierIsSparkplug) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 1);
Handle<JSFunction> function = args.at<JSFunction>(0);
return isolate->heap()->ToBoolean(function->ActiveTierIsBaseline());
}
RUNTIME_FUNCTION(Runtime_ActiveTierIsMaglev) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 1);
@ -455,6 +469,28 @@ RUNTIME_FUNCTION(Runtime_ActiveTierIsMaglev) {
return isolate->heap()->ToBoolean(function->ActiveTierIsMaglev());
}
RUNTIME_FUNCTION(Runtime_ActiveTierIsTurbofan) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 1);
Handle<JSFunction> function = args.at<JSFunction>(0);
return isolate->heap()->ToBoolean(function->ActiveTierIsTurbofan());
}
RUNTIME_FUNCTION(Runtime_IsSparkplugEnabled) {
DCHECK_EQ(args.length(), 0);
return isolate->heap()->ToBoolean(FLAG_sparkplug);
}
RUNTIME_FUNCTION(Runtime_IsMaglevEnabled) {
DCHECK_EQ(args.length(), 0);
return isolate->heap()->ToBoolean(FLAG_maglev);
}
RUNTIME_FUNCTION(Runtime_IsTurbofanEnabled) {
DCHECK_EQ(args.length(), 0);
return isolate->heap()->ToBoolean(FLAG_turbofan);
}
#ifdef V8_ENABLE_MAGLEV
RUNTIME_FUNCTION(Runtime_OptimizeMaglevOnNextCall) {
HandleScope scope(isolate);

View File

@ -477,7 +477,10 @@ namespace internal {
F(Abort, 1, 1) \
F(AbortCSADcheck, 1, 1) \
F(AbortJS, 1, 1) \
F(ActiveTierIsIgnition, 1, 1) \
F(ActiveTierIsSparkplug, 1, 1) \
F(ActiveTierIsMaglev, 1, 1) \
F(ActiveTierIsTurbofan, 1, 1) \
F(ArrayIteratorProtector, 0, 1) \
F(ArraySpeciesProtector, 0, 1) \
F(BaselineOsr, -1, 1) \
@ -541,8 +544,11 @@ namespace internal {
F(IsConcurrentRecompilationSupported, 0, 1) \
F(IsDictPropertyConstTrackingEnabled, 0, 1) \
F(IsInternalizedString, 1, 1) \
F(IsMaglevEnabled, 0, 1) \
F(IsSameHeapObject, 2, 1) \
F(IsSharedString, 1, 1) \
F(IsSparkplugEnabled, 0, 1) \
F(IsTurbofanEnabled, 0, 1) \
F(MapIteratorProtector, 0, 1) \
F(NeverOptimizeFunction, 1, 1) \
F(NewRegExpWithBacktrackLimit, 3, 1) \

View File

@ -0,0 +1,46 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Flags: --allow-natives-syntax --maglev --no-stress-opt
function f(x) {
var y = 0;
for (var i = 0; i < x; i++) {
y = 1;
}
return y;
}
let keep_going = 100000; // A counter to avoid test hangs on failure.
function g() {
// Test that normal tiering (without OptimizeFooOnNextCall) works.
// We test the entire pipeline, i.e. Ignition-SP-ML-TF.
f(10);
// TODO(v8:7700): Enable.
/*
if (%IsSparkplugEnabled()) {
while (!%ActiveTierIsSparkplug(f) && --keep_going) f(10);
assertTrue(%ActiveTierIsSparkplug(f));
}
if (%IsMaglevEnabled()) {
while (!%ActiveTierIsMaglev(f) && --keep_going) f(10);
assertTrue(%ActiveTierIsMaglev(f));
}
*/
if (%IsTurbofanEnabled()) {
while (!%ActiveTierIsTurbofan(f) && --keep_going) f(10);
assertTrue(%ActiveTierIsTurbofan(f));
f(10);
assertTrue(%ActiveTierIsTurbofan(f));
}
}
%NeverOptimizeFunction(g);
g();

View File

@ -607,6 +607,9 @@
# Tests that need to run sequentially (e.g. due to memory consumption).
'wasm/asm-wasm': [PASS, HEAVY],
# TODO(v8:7700): Fix leaks involving std containers in Zone objects.
'maglev/tier-to-ml-to-tf': [SKIP],
}], # 'asan == True'
##############################################################################