diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc index d1896e3d6e..45ab35f0dc 100644 --- a/src/arm/macro-assembler-arm.cc +++ b/src/arm/macro-assembler-arm.cc @@ -2344,10 +2344,37 @@ void TurboAssembler::CallCFunctionHelper(Register function, } #endif + // Save the frame pointer and PC so that the stack layout remains iterable, + // even without an ExitFrame which normally exists between JS and C frames. + if (isolate() != nullptr) { + Register scratch = r4; + Push(scratch); + + Move(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate())); + str(pc, MemOperand(scratch)); + Move(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); + str(fp, MemOperand(scratch)); + Pop(scratch); + } + // Just call directly. The function called cannot cause a GC, or // allow preemption, so the return address in the link register // stays correct. Call(function); + + if (isolate() != nullptr) { + // We don't unset the PC; the FP is the source of truth. + Register scratch1 = r4; + Register scratch2 = r5; + Push(scratch1); + Push(scratch2); + Move(scratch1, ExternalReference::fast_c_call_caller_fp_address(isolate())); + mov(scratch2, Operand::Zero()); + str(scratch2, MemOperand(scratch1)); + Pop(scratch2); + Pop(scratch1); + } + int stack_passed_arguments = CalculateStackPassedWords( num_reg_arguments, num_double_arguments); if (ActivationFrameAlignment() > kPointerSize) { diff --git a/src/arm64/macro-assembler-arm64.cc b/src/arm64/macro-assembler-arm64.cc index 43c035e4e0..16c5303d6e 100644 --- a/src/arm64/macro-assembler-arm64.cc +++ b/src/arm64/macro-assembler-arm64.cc @@ -1827,10 +1827,38 @@ void TurboAssembler::CallCFunction(Register function, int num_of_reg_args, DCHECK_LE(num_of_double_args + num_of_reg_args, 2); } + // Save the frame pointer and PC so that the stack layout remains iterable, + // even without an ExitFrame which normally exists between JS and C frames. + if (isolate() != nullptr) { + Register scratch1 = x4; + Register scratch2 = x5; + Push(scratch1, scratch2); + + Label get_pc; + Bind(&get_pc); + Adr(scratch2, &get_pc); + + Mov(scratch1, ExternalReference::fast_c_call_caller_pc_address(isolate())); + Str(scratch2, MemOperand(scratch1)); + Mov(scratch1, ExternalReference::fast_c_call_caller_fp_address(isolate())); + Str(fp, MemOperand(scratch1)); + + Pop(scratch2, scratch1); + } + // Call directly. The function called cannot cause a GC, or allow preemption, // so the return address in the link register stays correct. Call(function); + if (isolate() != nullptr) { + // We don't unset the PC; the FP is the source of truth. + Register scratch = x4; + Push(scratch, xzr); + Mov(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); + Str(xzr, MemOperand(scratch)); + Pop(xzr, scratch); + } + if (num_of_reg_args > kRegisterPassedArguments) { // Drop the register passed arguments. int claim_slots = RoundUp(num_of_reg_args - kRegisterPassedArguments, 2); diff --git a/src/external-reference.cc b/src/external-reference.cc index 24ec9f0429..e13b204d8a 100644 --- a/src/external-reference.cc +++ b/src/external-reference.cc @@ -832,6 +832,18 @@ ExternalReference ExternalReference::wasm_thread_in_wasm_flag_address_address( &isolate->thread_local_top()->thread_in_wasm_flag_address_)); } +ExternalReference ExternalReference::fast_c_call_caller_fp_address( + Isolate* isolate) { + return ExternalReference( + isolate->isolate_data()->fast_c_call_caller_fp_address()); +} + +ExternalReference ExternalReference::fast_c_call_caller_pc_address( + Isolate* isolate) { + return ExternalReference( + isolate->isolate_data()->fast_c_call_caller_pc_address()); +} + ExternalReference ExternalReference::fixed_typed_array_base_data_offset() { return ExternalReference(reinterpret_cast( FixedTypedArrayBase::kDataOffset - kHeapObjectTag)); diff --git a/src/external-reference.h b/src/external-reference.h index 2533262990..20c876132d 100644 --- a/src/external-reference.h +++ b/src/external-reference.h @@ -72,6 +72,10 @@ class StatsCounter; V(debug_restart_fp_address, "Debug::restart_fp_address()") \ V(wasm_thread_in_wasm_flag_address_address, \ "&Isolate::thread_in_wasm_flag_address") \ + V(fast_c_call_caller_fp_address, \ + "IsolateData::fast_c_call_caller_fp_address") \ + V(fast_c_call_caller_pc_address, \ + "IsolateData::fast_c_call_caller_pc_address") \ EXTERNAL_REFERENCE_LIST_NON_INTERPRETED_REGEXP(V) #define EXTERNAL_REFERENCE_LIST(V) \ diff --git a/src/frames.cc b/src/frames.cc index 5ded2eb30f..c38a94e76a 100644 --- a/src/frames.cc +++ b/src/frames.cc @@ -226,7 +226,24 @@ SafeStackFrameIterator::SafeStackFrameIterator( StackFrame::Type type; ThreadLocalTop* top = isolate->thread_local_top(); bool advance_frame = true; - if (IsValidTop(top)) { + + Address fast_c_fp = isolate->isolate_data()->fast_c_call_caller_fp(); + // 'Fast C calls' are a special type of C call where we call directly from JS + // to C without an exit frame inbetween. The CEntryStub is responsible for + // setting Isolate::c_entry_fp, meaning that it won't be set for fast C calls. + // To keep the stack iterable, we store the FP and PC of the caller of the + // fast C call on the isolate. This is guaranteed to be the topmost JS frame, + // because fast C calls cannot call back into JS. We start iterating the stack + // from this topmost JS frame. + if (fast_c_fp) { + DCHECK_NE(kNullAddress, isolate->isolate_data()->fast_c_call_caller_pc()); + type = StackFrame::Type::OPTIMIZED; + top_frame_type_ = type; + state.fp = fast_c_fp; + state.sp = sp; + state.pc_address = isolate->isolate_data()->fast_c_call_caller_pc_address(); + advance_frame = false; + } else if (IsValidTop(top)) { type = ExitFrame::GetStateForFramePointer(Isolate::c_entry_fp(top), &state); top_frame_type_ = type; } else if (IsValidStackAddress(fp)) { diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index 1902e7dd6c..9a2999440a 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -1830,7 +1830,39 @@ void TurboAssembler::CallCFunction(Register function, int num_arguments) { CheckStackAlignment(); } + // Save the frame pointer and PC so that the stack layout remains iterable, + // even without an ExitFrame which normally exists between JS and C frames. + if (isolate() != nullptr) { + // Get the current PC via call, pop. This gets the return address pushed to + // the stack by call. + Label get_pc; + call(&get_pc); + bind(&get_pc); + // Find two caller-saved scratch registers. + Register scratch1 = eax; + Register scratch2 = ecx; + if (function == eax) scratch1 = edx; + if (function == ecx) scratch2 = edx; + pop(scratch1); + mov(ExternalReferenceAsOperand( + ExternalReference::fast_c_call_caller_pc_address(isolate()), + scratch2), + scratch1); + mov(ExternalReferenceAsOperand( + ExternalReference::fast_c_call_caller_fp_address(isolate()), + scratch2), + ebp); + } + call(function); + + if (isolate() != nullptr) { + // We don't unset the PC; the FP is the source of truth. + mov(ExternalReferenceAsOperand( + ExternalReference::fast_c_call_caller_fp_address(isolate()), edx), + Immediate(0)); + } + if (base::OS::ActivationFrameAlignment() != 0) { mov(esp, Operand(esp, num_arguments * kPointerSize)); } else { diff --git a/src/isolate-data.h b/src/isolate-data.h index 318a65eb8b..8c689d8186 100644 --- a/src/isolate-data.h +++ b/src/isolate-data.h @@ -71,6 +71,12 @@ class IsolateData final { return kVirtualCallTargetRegisterOffset - kIsolateRootBias; } + // The FP and PC that are saved right before TurboAssembler::CallCFunction. + Address* fast_c_call_caller_fp_address() { return &fast_c_call_caller_fp_; } + Address* fast_c_call_caller_pc_address() { return &fast_c_call_caller_pc_; } + Address fast_c_call_caller_fp() { return fast_c_call_caller_fp_; } + Address fast_c_call_caller_pc() { return fast_c_call_caller_pc_; } + // Returns true if this address points to data stored in this instance. // If it's the case then the value can be accessed indirectly through the // root register. @@ -100,6 +106,8 @@ class IsolateData final { V(kExternalReferenceTableOffset, ExternalReferenceTable::SizeInBytes()) \ V(kBuiltinsTableOffset, Builtins::builtin_count* kPointerSize) \ V(kVirtualCallTargetRegisterOffset, kPointerSize) \ + V(kFastCCallCallerFPOffset, kPointerSize) \ + V(kFastCCallCallerPCOffset, kPointerSize) \ /* This padding aligns IsolateData size by 8 bytes. */ \ V(kPaddingOffset, \ 8 + RoundUp<8>(static_cast(kPaddingOffset)) - kPaddingOffset) \ @@ -138,6 +146,13 @@ class IsolateData final { // ia32 (otherwise the arguments adaptor call runs out of registers). void* virtual_call_target_register_ = nullptr; + // Stores the state of the caller for TurboAssembler::CallCFunction so that + // the sampling CPU profiler can iterate the stack during such calls. These + // are stored on IsolateData so that they can be stored to with only one move + // instruction in compiled code. + Address fast_c_call_caller_fp_ = kNullAddress; + Address fast_c_call_caller_pc_ = kNullAddress; + // Ensure the size is 8-byte aligned in order to make alignment of the field // following the IsolateData field predictable. This solves the issue with // C++ compilers for 32-bit platforms which are not consistent at aligning @@ -177,6 +192,10 @@ void IsolateData::AssertPredictableLayout() { kExternalMemoryLlimitOffset); STATIC_ASSERT(offsetof(IsolateData, external_memory_at_last_mark_compact_) == kExternalMemoryAtLastMarkCompactOffset); + STATIC_ASSERT(offsetof(IsolateData, fast_c_call_caller_fp_) == + kFastCCallCallerFPOffset); + STATIC_ASSERT(offsetof(IsolateData, fast_c_call_caller_pc_) == + kFastCCallCallerPCOffset); STATIC_ASSERT(sizeof(IsolateData) == IsolateData::kSize); } diff --git a/src/mips/macro-assembler-mips.cc b/src/mips/macro-assembler-mips.cc index 259d744a8c..bd56f5f3a0 100644 --- a/src/mips/macro-assembler-mips.cc +++ b/src/mips/macro-assembler-mips.cc @@ -5399,7 +5399,38 @@ void TurboAssembler::CallCFunctionHelper(Register function_base, function_offset = 0; } + // Save the frame pointer and PC so that the stack layout remains iterable, + // even without an ExitFrame which normally exists between JS and C frames. + if (isolate() != nullptr) { + UseScratchRegisterScope temps(this); + Register scratch1 = temps.Acquire(); + // 't' registers are caller-saved so this is safe as a scratch register. + Register scratch2 = t5; + DCHECK(!AreAliased(scratch1, scratch2, function_base)); + + Label get_pc; + mov(scratch1, ra); + Call(&get_pc); + + bind(&get_pc); + mov(scratch2, ra); + mov(ra, scratch1); + + li(scratch1, ExternalReference::fast_c_call_caller_pc_address(isolate())); + sw(scratch2, MemOperand(scratch1)); + li(scratch1, ExternalReference::fast_c_call_caller_fp_address(isolate())); + sw(fp, MemOperand(scratch1)); + } + Call(function_base, function_offset); + + if (isolate() != nullptr) { + // We don't unset the PC; the FP is the source of truth. + UseScratchRegisterScope temps(this); + Register scratch = temps.Acquire(); + li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); + sw(zero_reg, MemOperand(scratch)); + } } int stack_passed_arguments = CalculateStackPassedWords( diff --git a/src/mips64/macro-assembler-mips64.cc b/src/mips64/macro-assembler-mips64.cc index 65d919ff0a..bf73d3f3b1 100644 --- a/src/mips64/macro-assembler-mips64.cc +++ b/src/mips64/macro-assembler-mips64.cc @@ -5761,7 +5761,38 @@ void TurboAssembler::CallCFunctionHelper(Register function, function = t9; } + // Save the frame pointer and PC so that the stack layout remains iterable, + // even without an ExitFrame which normally exists between JS and C frames. + if (isolate() != nullptr) { + UseScratchRegisterScope temps(this); + Register scratch1 = temps.Acquire(); + // 't' registers are caller-saved so this is safe as a scratch register. + Register scratch2 = t2; + DCHECK(!AreAliased(scratch1, scratch2, function)); + + Label get_pc; + mov(scratch1, ra); + Call(&get_pc); + + bind(&get_pc); + mov(scratch2, ra); + mov(ra, scratch1); + + li(scratch1, ExternalReference::fast_c_call_caller_pc_address(isolate())); + Sd(scratch2, MemOperand(scratch1)); + li(scratch1, ExternalReference::fast_c_call_caller_fp_address(isolate())); + Sd(fp, MemOperand(scratch1)); + } + Call(function); + + if (isolate() != nullptr) { + // We don't unset the PC; the FP is the source of truth. + UseScratchRegisterScope temps(this); + Register scratch = temps.Acquire(); + li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); + Sd(zero_reg, MemOperand(scratch)); + } } int stack_passed_arguments = CalculateStackPassedWords( diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc index ace051be65..a62674ef22 100644 --- a/src/x64/macro-assembler-x64.cc +++ b/src/x64/macro-assembler-x64.cc @@ -2652,7 +2652,30 @@ void TurboAssembler::CallCFunction(Register function, int num_arguments) { CheckStackAlignment(); } + // Save the frame pointer and PC so that the stack layout remains iterable, + // even without an ExitFrame which normally exists between JS and C frames. + if (isolate() != nullptr) { + Label get_pc; + DCHECK(!AreAliased(kScratchRegister, function)); + leaq(kScratchRegister, Operand(&get_pc, 0)); + bind(&get_pc); + movp(ExternalReferenceAsOperand( + ExternalReference::fast_c_call_caller_pc_address(isolate())), + kScratchRegister); + movp(ExternalReferenceAsOperand( + ExternalReference::fast_c_call_caller_fp_address(isolate())), + rbp); + } + call(function); + + if (isolate() != nullptr) { + // We don't unset the PC; the FP is the source of truth. + movp(ExternalReferenceAsOperand( + ExternalReference::fast_c_call_caller_fp_address(isolate())), + Immediate(0)); + } + DCHECK_NE(base::OS::ActivationFrameAlignment(), 0); DCHECK_GE(num_arguments, 0); int argument_slots_on_stack = diff --git a/test/cctest/cctest.status b/test/cctest/cctest.status index 3b75aa2c8d..76333b193c 100644 --- a/test/cctest/cctest.status +++ b/test/cctest/cctest.status @@ -93,9 +93,6 @@ 'test-cpu-profiler/TracingCpuProfiler': [SKIP], 'test-sampler/LibSamplerCollectSample': [SKIP], - # BUG(7202). The test is flaky. - 'test-cpu-profiler/NativeFrameStackTrace': [SKIP], - # BUG(7054) 'test-cpu-profiler/StaticCollectSampleAPI': [SKIP],