PPC/S390: Reland "[osr] Use the new OSR cache"

Port 9145388055

Original Commit Message:

  This is a reland of commit 91da38831d

  Original change's description:
  > Fixed: Use an X register for JumpIfCodeTIsMarkedForDeoptimization
  > on arm64.
  > Bug: v8:12161

Change-Id: I6e63bd5995340bac32654ef12c52d25b496140e3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3607997
Reviewed-by: Junliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#80194}
This commit is contained in:
Milad Fa 2022-04-26 14:22:23 -04:00 committed by V8 LUCI CQ
parent cd427d98e6
commit c575e8ae97
8 changed files with 193 additions and 124 deletions

View File

@ -532,6 +532,32 @@ void BaselineAssembler::StoreTaggedFieldNoWriteBarrier(Register target,
__ StoreTaggedField(value, FieldMemOperand(target, offset), r0);
}
void BaselineAssembler::TryLoadOptimizedOsrCode(Register scratch_and_result,
Register feedback_vector,
FeedbackSlot slot,
Label* on_result,
Label::Distance) {
Label fallthrough;
LoadTaggedPointerField(scratch_and_result, feedback_vector,
FeedbackVector::OffsetOfElementAt(slot.ToInt()));
__ LoadWeakValue(scratch_and_result, scratch_and_result, &fallthrough);
// Is it marked_for_deoptimization? If yes, clear the slot.
{
ScratchRegisterScope temps(this);
Register scratch = temps.AcquireScratch();
__ TestCodeTIsMarkedForDeoptimization(scratch_and_result, scratch, r0);
__ beq(on_result, cr0);
__ mov(scratch, __ ClearedValue());
StoreTaggedFieldNoWriteBarrier(
feedback_vector, FeedbackVector::OffsetOfElementAt(slot.ToInt()),
scratch);
}
__ bind(&fallthrough);
Move(scratch_and_result, 0);
}
void BaselineAssembler::AddToInterruptBudgetAndJumpIfNotExceeded(
int32_t weight, Label* skip_interrupt_label) {
ASM_CODE_COMMENT(masm_);

View File

@ -530,6 +530,32 @@ void BaselineAssembler::StoreTaggedFieldNoWriteBarrier(Register target,
__ StoreTaggedField(value, FieldMemOperand(target, offset), r0);
}
void BaselineAssembler::TryLoadOptimizedOsrCode(Register scratch_and_result,
Register feedback_vector,
FeedbackSlot slot,
Label* on_result,
Label::Distance) {
Label fallthrough;
LoadTaggedPointerField(scratch_and_result, feedback_vector,
FeedbackVector::OffsetOfElementAt(slot.ToInt()));
__ LoadWeakValue(scratch_and_result, scratch_and_result, &fallthrough);
// Is it marked_for_deoptimization? If yes, clear the slot.
{
ScratchRegisterScope temps(this);
Register scratch = temps.AcquireScratch();
__ TestCodeTIsMarkedForDeoptimization(scratch_and_result, scratch);
__ beq(on_result);
__ mov(scratch, __ ClearedValue());
StoreTaggedFieldNoWriteBarrier(
feedback_vector, FeedbackVector::OffsetOfElementAt(slot.ToInt()),
scratch);
}
__ bind(&fallthrough);
Move(scratch_and_result, 0);
}
void BaselineAssembler::AddToInterruptBudgetAndJumpIfNotExceeded(
int32_t weight, Label* skip_interrupt_label) {
ASM_CODE_COMMENT(masm_);

View File

@ -79,16 +79,28 @@ void Generate_OSREntry(MacroAssembler* masm, Register entry_address,
__ Ret();
}
void ResetBytecodeAgeAndOsrState(MacroAssembler* masm, Register bytecode_array,
Register scratch) {
// Reset the bytecode age and OSR state (optimized to a single write).
static_assert(BytecodeArray::kOsrStateAndBytecodeAgeAreContiguous32Bits);
void ResetBytecodeAge(MacroAssembler* masm, Register bytecode_array,
Register scratch) {
STATIC_ASSERT(BytecodeArray::kNoAgeBytecodeAge == 0);
DCHECK(!AreAliased(bytecode_array, scratch));
__ mov(scratch, Operand(0));
__ StoreU32(scratch,
FieldMemOperand(bytecode_array,
BytecodeArray::kOsrUrgencyAndInstallTargetOffset),
r0);
__ StoreU16(
scratch,
FieldMemOperand(bytecode_array, BytecodeArray::kBytecodeAgeOffset), r0);
}
void ResetFeedbackVectorOsrUrgency(MacroAssembler* masm,
Register feedback_vector, Register scratch1,
Register scratch2) {
DCHECK(!AreAliased(feedback_vector, scratch1));
__ LoadU8(scratch1,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset),
scratch2);
__ andi(scratch1, scratch1,
Operand(FeedbackVector::MaybeHasOptimizedOsrCodeBit::kMask));
__ StoreU8(scratch1,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset),
scratch2);
}
// Restarts execution either at the current or next (in execution order)
@ -216,8 +228,7 @@ void Generate_BaselineOrInterpreterEntry(MacroAssembler* masm,
if (is_osr) {
Register scratch = ip;
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister,
scratch);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister, scratch);
Generate_OSREntry(masm, code_obj, Code::kHeaderSize - kHeapObjectTag);
} else {
__ AddS64(code_obj, code_obj, Operand(Code::kHeaderSize - kHeapObjectTag));
@ -391,37 +402,15 @@ enum class OsrSourceTier {
};
void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
Register current_loop_depth,
Register encoded_current_bytecode_offset,
Register osr_urgency_and_install_target) {
static constexpr Register scratch = r6;
DCHECK(!AreAliased(scratch, current_loop_depth,
encoded_current_bytecode_offset,
osr_urgency_and_install_target));
// OSR based on urgency, i.e. is the OSR urgency greater than the current
// loop depth?
Label try_osr;
STATIC_ASSERT(BytecodeArray::OsrUrgencyBits::kShift == 0);
Register urgency = scratch;
__ andi(urgency, osr_urgency_and_install_target,
Operand(BytecodeArray::OsrUrgencyBits::kMask));
__ CmpU64(urgency, current_loop_depth);
__ bgt(&try_osr);
// OSR based on the install target offset, i.e. does the current bytecode
// offset match the install target offset?
static constexpr int kMask = BytecodeArray::OsrInstallTargetBits::kMask;
Register install_target = osr_urgency_and_install_target;
__ andi(install_target, osr_urgency_and_install_target, Operand(kMask));
__ CmpU64(install_target, encoded_current_bytecode_offset);
__ beq(&try_osr);
// Neither urgency nor the install target triggered, return to the caller.
// Note: the return value must be nullptr or a valid Code object.
__ mov(r3, Operand(0));
__ Ret(0);
__ bind(&try_osr);
Register maybe_target_code) {
Label jump_to_optimized_code;
{
// If maybe_target_code is not null, no need to call into runtime. A
// precondition here is: if maybe_target_code is a Code object, it must NOT
// be marked_for_deoptimization (callers must ensure this).
__ CmpSmiLiteral(maybe_target_code, Smi::zero(), r0);
__ bne(&jump_to_optimized_code);
}
ASM_CODE_COMMENT(masm);
{
@ -430,12 +419,12 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
}
// If the code object is null, just return to the caller.
Label skip;
__ CmpSmiLiteral(r3, Smi::zero(), r0);
__ bne(&skip);
__ bne(&jump_to_optimized_code);
__ Ret();
__ bind(&skip);
__ bind(&jump_to_optimized_code);
DCHECK_EQ(maybe_target_code, r3); // Already in the right spot.
if (source == OsrSourceTier::kInterpreter) {
// Drop the handler frame that is be sitting on top of the actual
@ -1192,16 +1181,12 @@ static void TailCallOptimizedCodeSlot(MacroAssembler* masm,
// Check if the optimized code is marked for deopt. If it is, call the
// runtime to clear it.
__ LoadTaggedPointerField(
scratch,
FieldMemOperand(optimized_code_entry, Code::kCodeDataContainerOffset),
r0);
__ LoadS32(
scratch,
FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset),
r0);
__ TestBit(scratch, Code::kMarkedForDeoptimizationBit, r0);
__ bne(&heal_optimized_code_slot, cr0);
{
UseScratchRegisterScope temps(masm);
__ TestCodeTIsMarkedForDeoptimization(optimized_code_entry, temps.Acquire(),
scratch);
__ bne(&heal_optimized_code_slot, cr0);
}
// Optimized code is good, get it into the closure and link the closure
// into the optimized functions list, then tail call the optimized code.
@ -1394,6 +1379,8 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
&has_optimized_code_or_state);
}
{ ResetFeedbackVectorOsrUrgency(masm, feedback_vector, r11, r0); }
// Increment invocation count for the function.
{
Register invocation_count = r13;
@ -1429,7 +1416,7 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
// the frame, so load it into a register.
Register bytecodeArray = descriptor.GetRegisterParameter(
BaselineOutOfLinePrologueDescriptor::kInterpreterBytecodeArray);
ResetBytecodeAgeAndOsrState(masm, bytecodeArray, r13);
ResetBytecodeAge(masm, bytecodeArray, r13);
__ Push(argc, bytecodeArray);
@ -1555,6 +1542,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
LoadTieringStateAndJumpIfNeedsProcessing(
masm, optimization_state, feedback_vector, &has_optimized_code_or_state);
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, ip, r0);
}
Label not_optimized;
__ bind(&not_optimized);
@ -1578,7 +1570,7 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
FrameScope frame_scope(masm, StackFrame::MANUAL);
__ PushStandardFrame(closure);
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister, r8);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister, r8);
// Load initial bytecode offset.
__ mov(kInterpreterBytecodeOffsetRegister,
@ -2072,24 +2064,20 @@ void Builtins::Generate_NotifyDeoptimized(MacroAssembler* masm) {
void Builtins::Generate_InterpreterOnStackReplacement(MacroAssembler* masm) {
using D = InterpreterOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
OnStackReplacement(masm, OsrSourceTier::kInterpreter,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
#if ENABLE_SPARKPLUG
void Builtins::Generate_BaselineOnStackReplacement(MacroAssembler* masm) {
using D = BaselineOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
__ LoadU64(kContextRegister,
MemOperand(fp, BaselineFrameConstants::kContextOffset), r0);
OnStackReplacement(masm, OsrSourceTier::kBaseline,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
#endif

View File

@ -83,16 +83,25 @@ void Generate_OSREntry(MacroAssembler* masm, Register entry_address,
__ Ret();
}
void ResetBytecodeAgeAndOsrState(MacroAssembler* masm, Register bytecode_array,
Register scratch) {
// Reset the bytecode age and OSR state (optimized to a single write).
static_assert(BytecodeArray::kOsrStateAndBytecodeAgeAreContiguous32Bits);
void ResetBytecodeAge(MacroAssembler* masm, Register bytecode_array,
Register scratch) {
STATIC_ASSERT(BytecodeArray::kNoAgeBytecodeAge == 0);
DCHECK(!AreAliased(bytecode_array, scratch));
__ mov(r0, Operand(0));
__ StoreU32(r0,
FieldMemOperand(bytecode_array,
BytecodeArray::kOsrUrgencyAndInstallTargetOffset),
scratch);
__ StoreU16(
r0, FieldMemOperand(bytecode_array, BytecodeArray::kBytecodeAgeOffset),
scratch);
}
void ResetFeedbackVectorOsrUrgency(MacroAssembler* masm,
Register feedback_vector, Register scratch) {
DCHECK(!AreAliased(feedback_vector, scratch));
__ LoadU8(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
__ AndP(scratch, scratch,
Operand(FeedbackVector::MaybeHasOptimizedOsrCodeBit::kMask));
__ StoreU8(scratch,
FieldMemOperand(feedback_vector, FeedbackVector::kOsrStateOffset));
}
// Restarts execution either at the current or next (in execution order)
@ -218,7 +227,7 @@ void Generate_BaselineOrInterpreterEntry(MacroAssembler* masm,
if (is_osr) {
// TODO(pthier): Separate baseline Sparkplug from TF arming and don't
// disarm Sparkplug here.
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister, r1);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister, r1);
Generate_OSREntry(masm, code_obj, Code::kHeaderSize - kHeapObjectTag);
} else {
__ AddS64(code_obj, code_obj, Operand(Code::kHeaderSize - kHeapObjectTag));
@ -256,37 +265,15 @@ enum class OsrSourceTier {
};
void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
Register current_loop_depth,
Register encoded_current_bytecode_offset,
Register osr_urgency_and_install_target) {
static constexpr Register scratch = r5;
DCHECK(!AreAliased(scratch, current_loop_depth,
encoded_current_bytecode_offset,
osr_urgency_and_install_target));
// OSR based on urgency, i.e. is the OSR urgency greater than the current
// loop depth?
Label try_osr;
STATIC_ASSERT(BytecodeArray::OsrUrgencyBits::kShift == 0);
Register urgency = scratch;
__ AndP(urgency, osr_urgency_and_install_target,
Operand(BytecodeArray::OsrUrgencyBits::kMask));
__ CmpU64(urgency, current_loop_depth);
__ bgt(&try_osr);
// OSR based on the install target offset, i.e. does the current bytecode
// offset match the install target offset?
static constexpr int kMask = BytecodeArray::OsrInstallTargetBits::kMask;
Register install_target = osr_urgency_and_install_target;
__ AndP(install_target, osr_urgency_and_install_target, Operand(kMask));
__ CmpU64(install_target, encoded_current_bytecode_offset);
__ beq(&try_osr);
// Neither urgency nor the install target triggered, return to the caller.
// Note: the return value must be nullptr or a valid Code object.
__ mov(r2, Operand(0));
__ Ret(0);
__ bind(&try_osr);
Register maybe_target_code) {
Label jump_to_optimized_code;
{
// If maybe_target_code is not null, no need to call into runtime. A
// precondition here is: if maybe_target_code is a Code object, it must NOT
// be marked_for_deoptimization (callers must ensure this).
__ CmpSmiLiteral(maybe_target_code, Smi::zero(), r0);
__ bne(&jump_to_optimized_code);
}
ASM_CODE_COMMENT(masm);
{
@ -295,12 +282,12 @@ void OnStackReplacement(MacroAssembler* masm, OsrSourceTier source,
}
// If the code object is null, just return to the caller.
Label jump_to_returned_code;
__ CmpSmiLiteral(r2, Smi::zero(), r0);
__ bne(&jump_to_returned_code);
__ bne(&jump_to_optimized_code);
__ Ret();
__ bind(&jump_to_returned_code);
__ bind(&jump_to_optimized_code);
DCHECK_EQ(maybe_target_code, r2); // Already in the right spot.
if (source == OsrSourceTier::kInterpreter) {
// Drop the handler frame that is be sitting on top of the actual
@ -1223,13 +1210,10 @@ static void TailCallOptimizedCodeSlot(MacroAssembler* masm,
// Check if the optimized code is marked for deopt. If it is, call the
// runtime to clear it.
__ LoadTaggedPointerField(
scratch,
FieldMemOperand(optimized_code_entry, Code::kCodeDataContainerOffset));
__ LoadS32(scratch, FieldMemOperand(
scratch, CodeDataContainer::kKindSpecificFlagsOffset));
__ TestBit(scratch, Code::kMarkedForDeoptimizationBit, r0);
__ bne(&heal_optimized_code_slot);
{
__ TestCodeTIsMarkedForDeoptimization(optimized_code_entry, scratch);
__ bne(&heal_optimized_code_slot);
}
// Optimized code is good, get it into the closure and link the closure
// into the optimized functions list, then tail call the optimized code.
@ -1423,6 +1407,11 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
&has_optimized_code_or_state);
}
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, r1);
}
// Increment invocation count for the function.
{
Register invocation_count = r1;
@ -1456,7 +1445,7 @@ void Builtins::Generate_BaselineOutOfLinePrologue(MacroAssembler* masm) {
// the frame, so load it into a register.
Register bytecodeArray = descriptor.GetRegisterParameter(
BaselineOutOfLinePrologueDescriptor::kInterpreterBytecodeArray);
ResetBytecodeAgeAndOsrState(masm, bytecodeArray, r1);
ResetBytecodeAge(masm, bytecodeArray, r1);
__ Push(argc, bytecodeArray);
@ -1583,6 +1572,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
LoadTieringStateAndJumpIfNeedsProcessing(
masm, optimization_state, feedback_vector, &has_optimized_code_or_state);
{
UseScratchRegisterScope temps(masm);
ResetFeedbackVectorOsrUrgency(masm, feedback_vector, r1);
}
Label not_optimized;
__ bind(&not_optimized);
@ -1600,7 +1594,7 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
FrameScope frame_scope(masm, StackFrame::MANUAL);
__ PushStandardFrame(closure);
ResetBytecodeAgeAndOsrState(masm, kInterpreterBytecodeArrayRegister, r1);
ResetBytecodeAge(masm, kInterpreterBytecodeArrayRegister, r1);
// Load the initial bytecode offset.
__ mov(kInterpreterBytecodeOffsetRegister,
@ -3861,24 +3855,20 @@ void Builtins::Generate_DeoptimizationEntry_Lazy(MacroAssembler* masm) {
void Builtins::Generate_InterpreterOnStackReplacement(MacroAssembler* masm) {
using D = InterpreterOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
OnStackReplacement(masm, OsrSourceTier::kInterpreter,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
#if ENABLE_SPARKPLUG
void Builtins::Generate_BaselineOnStackReplacement(MacroAssembler* masm) {
using D = BaselineOnStackReplacementDescriptor;
STATIC_ASSERT(D::kParameterCount == 3);
STATIC_ASSERT(D::kParameterCount == 1);
__ LoadU64(kContextRegister,
MemOperand(fp, BaselineFrameConstants::kContextOffset));
OnStackReplacement(masm, OsrSourceTier::kBaseline,
D::CurrentLoopDepthRegister(),
D::EncodedCurrentBytecodeOffsetRegister(),
D::OsrUrgencyAndInstallTargetRegister());
D::MaybeTargetCodeRegister());
}
#endif

View File

@ -297,6 +297,24 @@ void TurboAssembler::Drop(Register count, Register scratch) {
add(sp, sp, scratch);
}
void MacroAssembler::TestCodeTIsMarkedForDeoptimization(Register codet,
Register scratch1,
Register scratch2) {
LoadTaggedPointerField(scratch1,
FieldMemOperand(codet, Code::kCodeDataContainerOffset),
scratch2);
LoadS32(
scratch1,
FieldMemOperand(scratch1, CodeDataContainer::kKindSpecificFlagsOffset),
scratch2);
TestBit(scratch1, Code::kMarkedForDeoptimizationBit, scratch2);
}
Operand MacroAssembler::ClearedValue() const {
return Operand(
static_cast<int32_t>(HeapObjectReference::ClearedValue(isolate()).ptr()));
}
void TurboAssembler::Call(Label* target) { b(target, SetLK); }
void TurboAssembler::Push(Handle<HeapObject> handle) {

View File

@ -1361,6 +1361,10 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
DecodeField<Field>(reg, reg, rc);
}
void TestCodeTIsMarkedForDeoptimization(Register codet, Register scratch1,
Register scratch2);
Operand ClearedValue() const;
private:
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;

View File

@ -495,6 +495,20 @@ void TurboAssembler::Drop(Register count, Register scratch) {
AddS64(sp, sp, scratch);
}
void MacroAssembler::TestCodeTIsMarkedForDeoptimization(Register codet,
Register scratch) {
LoadTaggedPointerField(
scratch, FieldMemOperand(codet, Code::kCodeDataContainerOffset));
LoadS32(scratch, FieldMemOperand(
scratch, CodeDataContainer::kKindSpecificFlagsOffset));
TestBit(scratch, Code::kMarkedForDeoptimizationBit, scratch);
}
Operand MacroAssembler::ClearedValue() const {
return Operand(
static_cast<int32_t>(HeapObjectReference::ClearedValue(isolate()).ptr()));
}
void TurboAssembler::Call(Label* target) { b(r14, target); }
void TurboAssembler::Push(Handle<HeapObject> handle) {

View File

@ -1773,6 +1773,9 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
RememberedSetAction remembered_set_action = RememberedSetAction::kEmit,
SmiCheck smi_check = SmiCheck::kInline);
void TestCodeTIsMarkedForDeoptimization(Register codet, Register scratch);
Operand ClearedValue() const;
private:
static const int kSmiShift = kSmiTagSize + kSmiShiftSize;
// Helper functions for generating invokes.