[fastcall] Add CPU profiler support for fast calls
This CL introduces a new fast_api_call_target field on the isolate, which is set by Turbofan before making the fast call. It then uses the field when creating a stack sample and stores it in the existing external_callback_entry used for regular API callbacks. The CL also adds a cctest with simple usage scenario and introduces a minor refactoring in test-api.cc. Design doc: https://docs.google.com/document/d/1r32qlPzGz0P7nieisJ5h2qfSnWOs40Cigt0LXPipejE/edit Bug: chromium:1052746 Change-Id: I2dab1bc395ccab0c14088f7c354fb52b08df8d32 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488683 Commit-Queue: Maya Lekova <mslekova@chromium.org> Reviewed-by: Georg Neis <neis@chromium.org> Reviewed-by: Peter Marshall <petermarshall@chromium.org> Cr-Commit-Position: refs/heads/master@{#71254}
This commit is contained in:
parent
18cbf05e8e
commit
7a62cceb72
@ -207,8 +207,10 @@ class Internals {
|
|||||||
kNumIsolateDataSlots * kApiSystemPointerSize;
|
kNumIsolateDataSlots * kApiSystemPointerSize;
|
||||||
static const int kIsolateFastCCallCallerPcOffset =
|
static const int kIsolateFastCCallCallerPcOffset =
|
||||||
kIsolateFastCCallCallerFpOffset + kApiSystemPointerSize;
|
kIsolateFastCCallCallerFpOffset + kApiSystemPointerSize;
|
||||||
static const int kIsolateStackGuardOffset =
|
static const int kIsolateFastApiCallTargetOffset =
|
||||||
kIsolateFastCCallCallerPcOffset + kApiSystemPointerSize;
|
kIsolateFastCCallCallerPcOffset + kApiSystemPointerSize;
|
||||||
|
static const int kIsolateStackGuardOffset =
|
||||||
|
kIsolateFastApiCallTargetOffset + kApiSystemPointerSize;
|
||||||
static const int kIsolateRootsOffset =
|
static const int kIsolateRootsOffset =
|
||||||
kIsolateStackGuardOffset + 7 * kApiSystemPointerSize;
|
kIsolateStackGuardOffset + 7 * kApiSystemPointerSize;
|
||||||
|
|
||||||
|
@ -826,6 +826,12 @@ ExternalReference ExternalReference::fast_c_call_caller_pc_address(
|
|||||||
isolate->isolate_data()->fast_c_call_caller_pc_address());
|
isolate->isolate_data()->fast_c_call_caller_pc_address());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ExternalReference ExternalReference::fast_api_call_target_address(
|
||||||
|
Isolate* isolate) {
|
||||||
|
return ExternalReference(
|
||||||
|
isolate->isolate_data()->fast_api_call_target_address());
|
||||||
|
}
|
||||||
|
|
||||||
ExternalReference ExternalReference::stack_is_iterable_address(
|
ExternalReference ExternalReference::stack_is_iterable_address(
|
||||||
Isolate* isolate) {
|
Isolate* isolate) {
|
||||||
return ExternalReference(
|
return ExternalReference(
|
||||||
|
@ -71,6 +71,7 @@ class StatsCounter;
|
|||||||
"IsolateData::fast_c_call_caller_fp_address") \
|
"IsolateData::fast_c_call_caller_fp_address") \
|
||||||
V(fast_c_call_caller_pc_address, \
|
V(fast_c_call_caller_pc_address, \
|
||||||
"IsolateData::fast_c_call_caller_pc_address") \
|
"IsolateData::fast_c_call_caller_pc_address") \
|
||||||
|
V(fast_api_call_target_address, "IsolateData::fast_api_call_target_address") \
|
||||||
V(stack_is_iterable_address, "IsolateData::stack_is_iterable_address") \
|
V(stack_is_iterable_address, "IsolateData::stack_is_iterable_address") \
|
||||||
V(address_of_regexp_stack_limit_address, \
|
V(address_of_regexp_stack_limit_address, \
|
||||||
"RegExpStack::limit_address_address()") \
|
"RegExpStack::limit_address_address()") \
|
||||||
|
@ -5085,9 +5085,16 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) {
|
|||||||
|
|
||||||
call_descriptor->SetCFunctionInfo(c_signature);
|
call_descriptor->SetCFunctionInfo(c_signature);
|
||||||
|
|
||||||
|
// CPU profiler support
|
||||||
|
Node* target_address = __ ExternalConstant(
|
||||||
|
ExternalReference::fast_api_call_target_address(isolate()));
|
||||||
|
__ Store(StoreRepresentation(MachineType::PointerRepresentation(),
|
||||||
|
kNoWriteBarrier),
|
||||||
|
target_address, 0, n.target());
|
||||||
|
|
||||||
Node** const inputs = graph()->zone()->NewArray<Node*>(
|
Node** const inputs = graph()->zone()->NewArray<Node*>(
|
||||||
c_arg_count + FastApiCallNode::kFastCallExtraInputCount);
|
c_arg_count + FastApiCallNode::kFastCallExtraInputCount);
|
||||||
inputs[0] = NodeProperties::GetValueInput(node, 0); // the target
|
inputs[0] = n.target();
|
||||||
for (int i = FastApiCallNode::kFastTargetInputCount;
|
for (int i = FastApiCallNode::kFastTargetInputCount;
|
||||||
i < c_arg_count + FastApiCallNode::kFastTargetInputCount; ++i) {
|
i < c_arg_count + FastApiCallNode::kFastTargetInputCount; ++i) {
|
||||||
if (c_signature->ArgumentInfo(i - 1).GetType() ==
|
if (c_signature->ArgumentInfo(i - 1).GetType() ==
|
||||||
@ -5099,12 +5106,17 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
inputs[c_arg_count + 1] = fast_api_call_stack_slot_;
|
inputs[c_arg_count + 1] = fast_api_call_stack_slot_;
|
||||||
|
|
||||||
inputs[c_arg_count + 2] = __ effect();
|
inputs[c_arg_count + 2] = __ effect();
|
||||||
inputs[c_arg_count + 3] = __ control();
|
inputs[c_arg_count + 3] = __ control();
|
||||||
|
|
||||||
__ Call(call_descriptor,
|
__ Call(call_descriptor,
|
||||||
c_arg_count + FastApiCallNode::kFastCallExtraInputCount, inputs);
|
c_arg_count + FastApiCallNode::kFastCallExtraInputCount, inputs);
|
||||||
|
|
||||||
|
__ Store(StoreRepresentation(MachineType::PointerRepresentation(),
|
||||||
|
kNoWriteBarrier),
|
||||||
|
target_address, 0, __ IntPtrConstant(0));
|
||||||
|
|
||||||
// Generate the load from `fast_api_call_stack_slot_`.
|
// Generate the load from `fast_api_call_stack_slot_`.
|
||||||
Node* load = __ Load(MachineType::Int32(), fast_api_call_stack_slot_, 0);
|
Node* load = __ Load(MachineType::Int32(), fast_api_call_stack_slot_, 0);
|
||||||
|
|
||||||
|
@ -301,13 +301,13 @@ SafeStackFrameIterator::SafeStackFrameIterator(Isolate* isolate, Address pc,
|
|||||||
frame_ = nullptr;
|
frame_ = nullptr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// 'Fast C calls' are a special type of C call where we call directly from JS
|
// 'Fast C calls' are a special type of C call where we call directly from
|
||||||
// to C without an exit frame inbetween. The CEntryStub is responsible for
|
// JS to C without an exit frame inbetween. The CEntryStub is responsible
|
||||||
// setting Isolate::c_entry_fp, meaning that it won't be set for fast C calls.
|
// for setting Isolate::c_entry_fp, meaning that it won't be set for fast C
|
||||||
// To keep the stack iterable, we store the FP and PC of the caller of the
|
// calls. To keep the stack iterable, we store the FP and PC of the caller
|
||||||
// fast C call on the isolate. This is guaranteed to be the topmost JS frame,
|
// of the fast C call on the isolate. This is guaranteed to be the topmost
|
||||||
// because fast C calls cannot call back into JS. We start iterating the stack
|
// JS frame, because fast C calls cannot call back into JS. We start
|
||||||
// from this topmost JS frame.
|
// iterating the stack from this topmost JS frame.
|
||||||
if (fast_c_fp) {
|
if (fast_c_fp) {
|
||||||
DCHECK_NE(kNullAddress, isolate->isolate_data()->fast_c_call_caller_pc());
|
DCHECK_NE(kNullAddress, isolate->isolate_data()->fast_c_call_caller_pc());
|
||||||
type = StackFrame::Type::OPTIMIZED;
|
type = StackFrame::Type::OPTIMIZED;
|
||||||
@ -402,6 +402,7 @@ void SafeStackFrameIterator::AdvanceOneFrame() {
|
|||||||
DCHECK(!done());
|
DCHECK(!done());
|
||||||
StackFrame* last_frame = frame_;
|
StackFrame* last_frame = frame_;
|
||||||
Address last_sp = last_frame->sp(), last_fp = last_frame->fp();
|
Address last_sp = last_frame->sp(), last_fp = last_frame->fp();
|
||||||
|
|
||||||
// Before advancing to the next stack frame, perform pointer validity tests.
|
// Before advancing to the next stack frame, perform pointer validity tests.
|
||||||
if (!IsValidFrame(last_frame) || !IsValidCaller(last_frame)) {
|
if (!IsValidFrame(last_frame) || !IsValidCaller(last_frame)) {
|
||||||
frame_ = nullptr;
|
frame_ = nullptr;
|
||||||
|
@ -75,6 +75,10 @@ class IsolateData final {
|
|||||||
return kFastCCallCallerPCOffset - kIsolateRootBias;
|
return kFastCCallCallerPCOffset - kIsolateRootBias;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr int fast_api_call_target_offset() {
|
||||||
|
return kFastApiCallTargetOffset - kIsolateRootBias;
|
||||||
|
}
|
||||||
|
|
||||||
// Root-register-relative offset of the given builtin table entry.
|
// Root-register-relative offset of the given builtin table entry.
|
||||||
// TODO(ishell): remove in favour of typified id version.
|
// TODO(ishell): remove in favour of typified id version.
|
||||||
static int builtin_slot_offset(int builtin_index) {
|
static int builtin_slot_offset(int builtin_index) {
|
||||||
@ -90,10 +94,14 @@ class IsolateData final {
|
|||||||
// The FP and PC that are saved right before TurboAssembler::CallCFunction.
|
// The FP and PC that are saved right before TurboAssembler::CallCFunction.
|
||||||
Address* fast_c_call_caller_fp_address() { return &fast_c_call_caller_fp_; }
|
Address* fast_c_call_caller_fp_address() { return &fast_c_call_caller_fp_; }
|
||||||
Address* fast_c_call_caller_pc_address() { return &fast_c_call_caller_pc_; }
|
Address* fast_c_call_caller_pc_address() { return &fast_c_call_caller_pc_; }
|
||||||
|
// The address of the fast API callback right before it's executed from
|
||||||
|
// generated code.
|
||||||
|
Address* fast_api_call_target_address() { return &fast_api_call_target_; }
|
||||||
StackGuard* stack_guard() { return &stack_guard_; }
|
StackGuard* stack_guard() { return &stack_guard_; }
|
||||||
uint8_t* stack_is_iterable_address() { return &stack_is_iterable_; }
|
uint8_t* stack_is_iterable_address() { return &stack_is_iterable_; }
|
||||||
Address fast_c_call_caller_fp() { return fast_c_call_caller_fp_; }
|
Address fast_c_call_caller_fp() { return fast_c_call_caller_fp_; }
|
||||||
Address fast_c_call_caller_pc() { return fast_c_call_caller_pc_; }
|
Address fast_c_call_caller_pc() { return fast_c_call_caller_pc_; }
|
||||||
|
Address fast_api_call_target() { return fast_api_call_target_; }
|
||||||
uint8_t stack_is_iterable() { return stack_is_iterable_; }
|
uint8_t stack_is_iterable() { return stack_is_iterable_; }
|
||||||
|
|
||||||
// Returns true if this address points to data stored in this instance.
|
// Returns true if this address points to data stored in this instance.
|
||||||
@ -130,6 +138,7 @@ class IsolateData final {
|
|||||||
V(kEmbedderDataOffset, Internals::kNumIsolateDataSlots* kSystemPointerSize) \
|
V(kEmbedderDataOffset, Internals::kNumIsolateDataSlots* kSystemPointerSize) \
|
||||||
V(kFastCCallCallerFPOffset, kSystemPointerSize) \
|
V(kFastCCallCallerFPOffset, kSystemPointerSize) \
|
||||||
V(kFastCCallCallerPCOffset, kSystemPointerSize) \
|
V(kFastCCallCallerPCOffset, kSystemPointerSize) \
|
||||||
|
V(kFastApiCallTargetOffset, kSystemPointerSize) \
|
||||||
V(kStackGuardOffset, StackGuard::kSizeInBytes) \
|
V(kStackGuardOffset, StackGuard::kSizeInBytes) \
|
||||||
V(kRootsTableOffset, RootsTable::kEntriesCount* kSystemPointerSize) \
|
V(kRootsTableOffset, RootsTable::kEntriesCount* kSystemPointerSize) \
|
||||||
V(kExternalReferenceTableOffset, ExternalReferenceTable::kSizeInBytes) \
|
V(kExternalReferenceTableOffset, ExternalReferenceTable::kSizeInBytes) \
|
||||||
@ -166,9 +175,10 @@ class IsolateData final {
|
|||||||
// instruction in compiled code.
|
// instruction in compiled code.
|
||||||
Address fast_c_call_caller_fp_ = kNullAddress;
|
Address fast_c_call_caller_fp_ = kNullAddress;
|
||||||
Address fast_c_call_caller_pc_ = kNullAddress;
|
Address fast_c_call_caller_pc_ = kNullAddress;
|
||||||
|
Address fast_api_call_target_ = kNullAddress;
|
||||||
|
|
||||||
// Fields related to the system and JS stack. In particular, this contains the
|
// Fields related to the system and JS stack. In particular, this contains
|
||||||
// stack limit used by stack checks in generated code.
|
// the stack limit used by stack checks in generated code.
|
||||||
StackGuard stack_guard_;
|
StackGuard stack_guard_;
|
||||||
|
|
||||||
RootsTable roots_;
|
RootsTable roots_;
|
||||||
@ -232,6 +242,8 @@ void IsolateData::AssertPredictableLayout() {
|
|||||||
kFastCCallCallerFPOffset);
|
kFastCCallCallerFPOffset);
|
||||||
STATIC_ASSERT(offsetof(IsolateData, fast_c_call_caller_pc_) ==
|
STATIC_ASSERT(offsetof(IsolateData, fast_c_call_caller_pc_) ==
|
||||||
kFastCCallCallerPCOffset);
|
kFastCCallCallerPCOffset);
|
||||||
|
STATIC_ASSERT(offsetof(IsolateData, fast_api_call_target_) ==
|
||||||
|
kFastApiCallTargetOffset);
|
||||||
STATIC_ASSERT(offsetof(IsolateData, stack_guard_) == kStackGuardOffset);
|
STATIC_ASSERT(offsetof(IsolateData, stack_guard_) == kStackGuardOffset);
|
||||||
#ifdef V8_HEAP_SANDBOX
|
#ifdef V8_HEAP_SANDBOX
|
||||||
STATIC_ASSERT(offsetof(IsolateData, external_pointer_table_) ==
|
STATIC_ASSERT(offsetof(IsolateData, external_pointer_table_) ==
|
||||||
|
@ -2299,8 +2299,14 @@ void ExistingCodeLogger::LogExistingFunction(
|
|||||||
#if USES_FUNCTION_DESCRIPTORS
|
#if USES_FUNCTION_DESCRIPTORS
|
||||||
entry_point = *FUNCTION_ENTRYPOINT_ADDRESS(entry_point);
|
entry_point = *FUNCTION_ENTRYPOINT_ADDRESS(entry_point);
|
||||||
#endif
|
#endif
|
||||||
CALL_CODE_EVENT_HANDLER(
|
Handle<String> fun_name(shared->DebugName(), isolate_);
|
||||||
CallbackEvent(handle(shared->DebugName(), isolate_), entry_point))
|
CALL_CODE_EVENT_HANDLER(CallbackEvent(fun_name, entry_point))
|
||||||
|
|
||||||
|
// Fast API function.
|
||||||
|
Address c_function = v8::ToCData<Address>(fun_data.GetCFunction());
|
||||||
|
if (c_function != kNullAddress) {
|
||||||
|
CALL_CODE_EVENT_HANDLER(CallbackEvent(fun_name, c_function))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -156,14 +156,17 @@ DISABLE_ASAN void TickSample::Init(Isolate* v8_isolate,
|
|||||||
SampleInfo info;
|
SampleInfo info;
|
||||||
RegisterState regs = reg_state;
|
RegisterState regs = reg_state;
|
||||||
if (!GetStackSample(v8_isolate, ®s, record_c_entry_frame, stack,
|
if (!GetStackSample(v8_isolate, ®s, record_c_entry_frame, stack,
|
||||||
kMaxFramesCount, &info, use_simulator_reg_state)) {
|
kMaxFramesCount, &info, &state,
|
||||||
|
use_simulator_reg_state)) {
|
||||||
// It is executing JS but failed to collect a stack trace.
|
// It is executing JS but failed to collect a stack trace.
|
||||||
// Mark the sample as spoiled.
|
// Mark the sample as spoiled.
|
||||||
pc = nullptr;
|
pc = nullptr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
state = info.vm_state;
|
if (state != StateTag::EXTERNAL) {
|
||||||
|
state = info.vm_state;
|
||||||
|
}
|
||||||
pc = regs.pc;
|
pc = regs.pc;
|
||||||
frames_count = static_cast<unsigned>(info.frames_count);
|
frames_count = static_cast<unsigned>(info.frames_count);
|
||||||
has_external_callback = info.external_callback_entry != nullptr;
|
has_external_callback = info.external_callback_entry != nullptr;
|
||||||
@ -193,6 +196,7 @@ bool TickSample::GetStackSample(Isolate* v8_isolate, RegisterState* regs,
|
|||||||
RecordCEntryFrame record_c_entry_frame,
|
RecordCEntryFrame record_c_entry_frame,
|
||||||
void** frames, size_t frames_limit,
|
void** frames, size_t frames_limit,
|
||||||
v8::SampleInfo* sample_info,
|
v8::SampleInfo* sample_info,
|
||||||
|
StateTag* out_state,
|
||||||
bool use_simulator_reg_state) {
|
bool use_simulator_reg_state) {
|
||||||
i::Isolate* isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
|
i::Isolate* isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
|
||||||
sample_info->frames_count = 0;
|
sample_info->frames_count = 0;
|
||||||
@ -243,6 +247,23 @@ bool TickSample::GetStackSample(Isolate* v8_isolate, RegisterState* regs,
|
|||||||
? nullptr
|
? nullptr
|
||||||
: reinterpret_cast<void*>(*external_callback_entry_ptr);
|
: reinterpret_cast<void*>(*external_callback_entry_ptr);
|
||||||
}
|
}
|
||||||
|
// 'Fast API calls' are similar to fast C calls (see frames.cc) in that
|
||||||
|
// they don't build an exit frame when entering C from JS. They have the
|
||||||
|
// added speciality of having separate "fast" and "default" callbacks, the
|
||||||
|
// latter being the regular API callback called before the JS function is
|
||||||
|
// optimized. When TurboFan optimizes the JS caller, the fast callback
|
||||||
|
// gets executed instead of the default one, therefore we need to store
|
||||||
|
// its address in the sample.
|
||||||
|
IsolateData* isolate_data = isolate->isolate_data();
|
||||||
|
Address fast_c_fp = isolate_data->fast_c_call_caller_fp();
|
||||||
|
if (fast_c_fp != kNullAddress &&
|
||||||
|
isolate_data->fast_api_call_target() != kNullAddress) {
|
||||||
|
sample_info->external_callback_entry =
|
||||||
|
reinterpret_cast<void*>(isolate_data->fast_api_call_target());
|
||||||
|
if (out_state) {
|
||||||
|
*out_state = StateTag::EXTERNAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
i::SafeStackFrameIterator it(isolate, reinterpret_cast<i::Address>(regs->pc),
|
i::SafeStackFrameIterator it(isolate, reinterpret_cast<i::Address>(regs->pc),
|
||||||
reinterpret_cast<i::Address>(regs->fp),
|
reinterpret_cast<i::Address>(regs->fp),
|
||||||
|
@ -56,6 +56,13 @@ struct V8_EXPORT TickSample {
|
|||||||
* \param sample_info The sample info is filled up by the function
|
* \param sample_info The sample info is filled up by the function
|
||||||
* provides number of actual captured stack frames and
|
* provides number of actual captured stack frames and
|
||||||
* the current VM state.
|
* the current VM state.
|
||||||
|
* \param out_state Output parameter. If non-nullptr pointer is provided,
|
||||||
|
* and the execution is currently in a fast API call,
|
||||||
|
* records StateTag::EXTERNAL to it. The caller could then
|
||||||
|
* use this as a marker to not take into account the actual
|
||||||
|
* VM state recorded in |sample_info|. In the case of fast
|
||||||
|
* API calls, the VM state must be EXTERNAL, as the callback
|
||||||
|
* is always an external C++ function.
|
||||||
* \param use_simulator_reg_state When set to true and V8 is running under a
|
* \param use_simulator_reg_state When set to true and V8 is running under a
|
||||||
* simulator, the method will use the simulator
|
* simulator, the method will use the simulator
|
||||||
* register state rather than the one provided
|
* register state rather than the one provided
|
||||||
@ -69,6 +76,7 @@ struct V8_EXPORT TickSample {
|
|||||||
RecordCEntryFrame record_c_entry_frame,
|
RecordCEntryFrame record_c_entry_frame,
|
||||||
void** frames, size_t frames_limit,
|
void** frames, size_t frames_limit,
|
||||||
v8::SampleInfo* sample_info,
|
v8::SampleInfo* sample_info,
|
||||||
|
StateTag* out_state = nullptr,
|
||||||
bool use_simulator_reg_state = true);
|
bool use_simulator_reg_state = true);
|
||||||
|
|
||||||
void print() const;
|
void print() const;
|
||||||
|
@ -406,3 +406,11 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
RegisterThreadedTest* RegisterThreadedTest::first_ = nullptr;
|
RegisterThreadedTest* RegisterThreadedTest::first_ = nullptr;
|
||||||
int RegisterThreadedTest::count_ = 0;
|
int RegisterThreadedTest::count_ = 0;
|
||||||
|
|
||||||
|
bool IsValidUnwrapObject(v8::Object* object) {
|
||||||
|
i::Address addr = *reinterpret_cast<i::Address*>(object);
|
||||||
|
auto instance_type = i::Internals::GetInstanceType(addr);
|
||||||
|
return (instance_type == i::Internals::kJSObjectType ||
|
||||||
|
instance_type == i::Internals::kJSApiObjectType ||
|
||||||
|
instance_type == i::Internals::kJSSpecialApiObjectType);
|
||||||
|
}
|
||||||
|
@ -812,4 +812,26 @@ class SimulatorHelper {
|
|||||||
};
|
};
|
||||||
#endif // USE_SIMULATOR
|
#endif // USE_SIMULATOR
|
||||||
|
|
||||||
|
// The following should correspond to Chromium's kV8DOMWrapperTypeIndex and
|
||||||
|
// kV8DOMWrapperObjectIndex.
|
||||||
|
static const int kV8WrapperTypeIndex = 0;
|
||||||
|
static const int kV8WrapperObjectIndex = 1;
|
||||||
|
|
||||||
|
enum class ApiCheckerResult : uint8_t {
|
||||||
|
kNotCalled = 0,
|
||||||
|
kSlowCalled = 1 << 0,
|
||||||
|
kFastCalled = 1 << 1,
|
||||||
|
};
|
||||||
|
using ApiCheckerResultFlags = v8::base::Flags<ApiCheckerResult>;
|
||||||
|
DEFINE_OPERATORS_FOR_FLAGS(ApiCheckerResultFlags)
|
||||||
|
|
||||||
|
bool IsValidUnwrapObject(v8::Object* object);
|
||||||
|
|
||||||
|
template <typename T, int offset>
|
||||||
|
T* GetInternalField(v8::Object* wrapper) {
|
||||||
|
assert(offset < wrapper->InternalFieldCount());
|
||||||
|
return reinterpret_cast<T*>(
|
||||||
|
wrapper->GetAlignedPointerFromInternalField(offset));
|
||||||
|
}
|
||||||
|
|
||||||
#endif // ifndef CCTEST_H_
|
#endif // ifndef CCTEST_H_
|
||||||
|
@ -66,6 +66,7 @@
|
|||||||
#include "src/objects/string-inl.h"
|
#include "src/objects/string-inl.h"
|
||||||
#include "src/objects/synthetic-module-inl.h"
|
#include "src/objects/synthetic-module-inl.h"
|
||||||
#include "src/profiler/cpu-profiler.h"
|
#include "src/profiler/cpu-profiler.h"
|
||||||
|
#include "src/profiler/symbolizer.h"
|
||||||
#include "src/strings/unicode-inl.h"
|
#include "src/strings/unicode-inl.h"
|
||||||
#include "src/utils/utils.h"
|
#include "src/utils/utils.h"
|
||||||
#include "test/cctest/heap/heap-tester.h"
|
#include "test/cctest/heap/heap-tester.h"
|
||||||
@ -27454,10 +27455,6 @@ UNINITIALIZED_TEST(NestedIsolates) {
|
|||||||
|
|
||||||
#ifndef V8_LITE_MODE
|
#ifndef V8_LITE_MODE
|
||||||
namespace {
|
namespace {
|
||||||
// The following should correspond to Chromium's kV8DOMWrapperObjectIndex.
|
|
||||||
static const int kV8WrapperTypeIndex = 0;
|
|
||||||
static const int kV8WrapperObjectIndex = 1;
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct ConvertJSValue {
|
struct ConvertJSValue {
|
||||||
static Maybe<T> Get(v8::Local<v8::Value> value,
|
static Maybe<T> Get(v8::Local<v8::Value> value,
|
||||||
@ -27578,14 +27575,6 @@ struct ConvertJSValue<bool> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class ApiCheckerResult : uint8_t {
|
|
||||||
kNotCalled = 0,
|
|
||||||
kSlowCalled = 1 << 0,
|
|
||||||
kFastCalled = 1 << 1,
|
|
||||||
};
|
|
||||||
using ApiCheckerResultFlags = v8::base::Flags<ApiCheckerResult>;
|
|
||||||
DEFINE_OPERATORS_FOR_FLAGS(ApiCheckerResultFlags)
|
|
||||||
|
|
||||||
template <typename Value, typename Impl>
|
template <typename Value, typename Impl>
|
||||||
struct BasicApiChecker {
|
struct BasicApiChecker {
|
||||||
static void FastCallback(v8::ApiObject receiver, Value argument,
|
static void FastCallback(v8::ApiObject receiver, Value argument,
|
||||||
@ -27606,22 +27595,6 @@ struct BasicApiChecker {
|
|||||||
ApiCheckerResultFlags result_ = ApiCheckerResult::kNotCalled;
|
ApiCheckerResultFlags result_ = ApiCheckerResult::kNotCalled;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool IsValidUnwrapObject(v8::Object* object) {
|
|
||||||
v8::internal::Address addr =
|
|
||||||
*reinterpret_cast<v8::internal::Address*>(object);
|
|
||||||
auto instance_type = v8::internal::Internals::GetInstanceType(addr);
|
|
||||||
return (instance_type == v8::internal::Internals::kJSObjectType ||
|
|
||||||
instance_type == v8::internal::Internals::kJSApiObjectType ||
|
|
||||||
instance_type == v8::internal::Internals::kJSSpecialApiObjectType);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, int offset>
|
|
||||||
T* GetInternalField(v8::Object* wrapper) {
|
|
||||||
assert(offset < wrapper->InternalFieldCount());
|
|
||||||
return reinterpret_cast<T*>(
|
|
||||||
wrapper->GetAlignedPointerFromInternalField(offset));
|
|
||||||
}
|
|
||||||
|
|
||||||
enum class Behavior {
|
enum class Behavior {
|
||||||
kNoException,
|
kNoException,
|
||||||
kException, // An exception should be thrown by the callback function.
|
kException, // An exception should be thrown by the callback function.
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "include/libplatform/v8-tracing.h"
|
#include "include/libplatform/v8-tracing.h"
|
||||||
|
#include "include/v8-fast-api-calls.h"
|
||||||
#include "include/v8-profiler.h"
|
#include "include/v8-profiler.h"
|
||||||
#include "src/api/api-inl.h"
|
#include "src/api/api-inl.h"
|
||||||
#include "src/base/platform/platform.h"
|
#include "src/base/platform/platform.h"
|
||||||
@ -52,6 +53,7 @@
|
|||||||
#include "test/cctest/cctest.h"
|
#include "test/cctest/cctest.h"
|
||||||
#include "test/cctest/heap/heap-utils.h"
|
#include "test/cctest/heap/heap-utils.h"
|
||||||
#include "test/cctest/profiler-extension.h"
|
#include "test/cctest/profiler-extension.h"
|
||||||
|
#include "test/common/flag-utils.h"
|
||||||
|
|
||||||
#ifdef V8_USE_PERFETTO
|
#ifdef V8_USE_PERFETTO
|
||||||
#include "protos/perfetto/trace/trace.pb.h"
|
#include "protos/perfetto/trace/trace.pb.h"
|
||||||
@ -3875,6 +3877,162 @@ UNINITIALIZED_TEST(DetailedSourcePositionAPI_Inlining) {
|
|||||||
isolate->Dispose();
|
isolate->Dispose();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct FastApiReceiver {
|
||||||
|
static void FastCallback(v8::ApiObject receiver, int argument,
|
||||||
|
int* fallback) {
|
||||||
|
v8::Object* receiver_obj = reinterpret_cast<v8::Object*>(&receiver);
|
||||||
|
if (!IsValidUnwrapObject(receiver_obj)) {
|
||||||
|
*fallback = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FastApiReceiver* receiver_ptr =
|
||||||
|
GetInternalField<FastApiReceiver, kV8WrapperObjectIndex>(receiver_obj);
|
||||||
|
|
||||||
|
receiver_ptr->result_ |= ApiCheckerResult::kFastCalled;
|
||||||
|
|
||||||
|
// Artificially slow down the callback with a predictable amount of time.
|
||||||
|
// This ensures the test has a relatively stable run time on various
|
||||||
|
// platforms and protects it from flakyness.
|
||||||
|
v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(100));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SlowCallback(const v8::FunctionCallbackInfo<v8::Value>& info) {
|
||||||
|
v8::Object* receiver_obj = v8::Object::Cast(*info.Holder());
|
||||||
|
if (!IsValidUnwrapObject(receiver_obj)) {
|
||||||
|
info.GetIsolate()->ThrowException(v8_str("Called with a non-object."));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FastApiReceiver* receiver =
|
||||||
|
GetInternalField<FastApiReceiver, kV8WrapperObjectIndex>(receiver_obj);
|
||||||
|
|
||||||
|
receiver->result_ |= ApiCheckerResult::kSlowCalled;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DidCallFast() const { return (result_ & ApiCheckerResult::kFastCalled); }
|
||||||
|
bool DidCallSlow() const { return (result_ & ApiCheckerResult::kSlowCalled); }
|
||||||
|
|
||||||
|
ApiCheckerResultFlags result_ = ApiCheckerResult::kNotCalled;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
v8::Local<v8::Function> CreateApiCode(LocalContext* env) {
|
||||||
|
const char* foo_name = "foo";
|
||||||
|
const char* script =
|
||||||
|
"function foo(arg) {"
|
||||||
|
" for (let i = 0; i < arg; ++i) { receiver.api_func(i); }"
|
||||||
|
"}"
|
||||||
|
"%PrepareFunctionForOptimization(foo);"
|
||||||
|
"foo(42); foo(42);"
|
||||||
|
"%OptimizeFunctionOnNextCall(foo);";
|
||||||
|
CompileRun(script);
|
||||||
|
|
||||||
|
return GetFunction(env->local(), foo_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FastApiCPUProfiler) {
|
||||||
|
#if !defined(V8_LITE_MODE) && !defined(USE_SIMULATOR)
|
||||||
|
if (i::FLAG_jitless) return;
|
||||||
|
if (i::FLAG_turboprop) return;
|
||||||
|
|
||||||
|
FLAG_SCOPE_EXTERNAL(opt);
|
||||||
|
FLAG_SCOPE_EXTERNAL(turbo_fast_api_calls);
|
||||||
|
FLAG_SCOPE_EXTERNAL(allow_natives_syntax);
|
||||||
|
// Disable --always_opt, otherwise we haven't generated the necessary
|
||||||
|
// feedback to go down the "best optimization" path for the fast call.
|
||||||
|
UNFLAG_SCOPE_EXTERNAL(always_opt);
|
||||||
|
UNFLAG_SCOPE_EXTERNAL(prof_browser_mode);
|
||||||
|
|
||||||
|
CcTest::InitializeVM();
|
||||||
|
LocalContext env;
|
||||||
|
v8::Isolate* isolate = CcTest::isolate();
|
||||||
|
i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
|
||||||
|
i_isolate->set_embedder_wrapper_type_index(kV8WrapperTypeIndex);
|
||||||
|
i_isolate->set_embedder_wrapper_object_index(kV8WrapperObjectIndex);
|
||||||
|
|
||||||
|
i::HandleScope scope(i_isolate);
|
||||||
|
|
||||||
|
// Setup the fast call.
|
||||||
|
FastApiReceiver receiver;
|
||||||
|
|
||||||
|
v8::TryCatch try_catch(isolate);
|
||||||
|
|
||||||
|
v8::CFunction c_func =
|
||||||
|
v8::CFunction::MakeWithFallbackSupport(FastApiReceiver::FastCallback);
|
||||||
|
|
||||||
|
Local<v8::FunctionTemplate> receiver_templ = v8::FunctionTemplate::New(
|
||||||
|
isolate, FastApiReceiver::SlowCallback, v8::Local<v8::Value>(),
|
||||||
|
v8::Local<v8::Signature>(), 1, v8::ConstructorBehavior::kAllow,
|
||||||
|
v8::SideEffectType::kHasSideEffect, &c_func);
|
||||||
|
|
||||||
|
v8::Local<v8::ObjectTemplate> object_template =
|
||||||
|
v8::ObjectTemplate::New(isolate);
|
||||||
|
object_template->SetInternalFieldCount(kV8WrapperObjectIndex + 1);
|
||||||
|
const char* api_func_str = "api_func";
|
||||||
|
object_template->Set(isolate, api_func_str, receiver_templ);
|
||||||
|
|
||||||
|
v8::Local<v8::Object> object =
|
||||||
|
object_template->NewInstance(env.local()).ToLocalChecked();
|
||||||
|
object->SetAlignedPointerInInternalField(kV8WrapperObjectIndex,
|
||||||
|
reinterpret_cast<void*>(&receiver));
|
||||||
|
|
||||||
|
int num_runs_arg = 100;
|
||||||
|
env->Global()->Set(env.local(), v8_str("receiver"), object).Check();
|
||||||
|
|
||||||
|
// Prepare the code.
|
||||||
|
v8::Local<v8::Function> function = CreateApiCode(&env);
|
||||||
|
|
||||||
|
// Setup and start CPU profiler.
|
||||||
|
v8::Local<v8::Value> args[] = {
|
||||||
|
v8::Integer::New(env->GetIsolate(), num_runs_arg)};
|
||||||
|
ProfilerHelper helper(env.local());
|
||||||
|
// TODO(mslekova): We could tweak the following count to reduce test
|
||||||
|
// runtime, while still keeping the test stable.
|
||||||
|
unsigned external_samples = 1000;
|
||||||
|
v8::CpuProfile* profile =
|
||||||
|
helper.Run(function, args, arraysize(args), 0, external_samples);
|
||||||
|
|
||||||
|
// Check if the fast and slow callbacks got executed.
|
||||||
|
CHECK(receiver.DidCallFast());
|
||||||
|
CHECK(receiver.DidCallSlow());
|
||||||
|
CHECK(!try_catch.HasCaught());
|
||||||
|
|
||||||
|
// Check that generated profile has the expected structure.
|
||||||
|
const v8::CpuProfileNode* root = profile->GetTopDownRoot();
|
||||||
|
const v8::CpuProfileNode* foo_node = GetChild(env.local(), root, "foo");
|
||||||
|
const v8::CpuProfileNode* api_func_node =
|
||||||
|
GetChild(env.local(), foo_node, api_func_str);
|
||||||
|
CHECK_NOT_NULL(api_func_node);
|
||||||
|
CHECK_EQ(api_func_node->GetSourceType(), CpuProfileNode::kCallback);
|
||||||
|
|
||||||
|
// Check that the CodeEntry is the expected one, i.e. the fast callback.
|
||||||
|
CodeEntry* code_entry =
|
||||||
|
reinterpret_cast<const ProfileNode*>(api_func_node)->entry();
|
||||||
|
CodeMap* code_map = reinterpret_cast<CpuProfile*>(profile)
|
||||||
|
->cpu_profiler()
|
||||||
|
->code_map_for_test();
|
||||||
|
CodeEntry* expected_code_entry =
|
||||||
|
code_map->FindEntry(reinterpret_cast<Address>(c_func.GetAddress()));
|
||||||
|
CHECK_EQ(code_entry, expected_code_entry);
|
||||||
|
|
||||||
|
int foo_ticks = foo_node->GetHitCount();
|
||||||
|
int api_func_ticks = api_func_node->GetHitCount();
|
||||||
|
// Check that at least 80% of the samples in foo hit the fast callback.
|
||||||
|
CHECK_LE(foo_ticks, api_func_ticks * 0.2);
|
||||||
|
// The following constant in the CHECK is because above we expect at least
|
||||||
|
// 1000 samples with EXTERNAL type (see external_samples). Since the only
|
||||||
|
// thing that generates those kind of samples is the fast callback, then
|
||||||
|
// we're supposed to have close to 1000 ticks in its node. Since the CPU
|
||||||
|
// profiler is nondeterministic, we've allowed for some slack, otherwise
|
||||||
|
// this could be 1000 instead of 800.
|
||||||
|
CHECK_GE(api_func_ticks, 800);
|
||||||
|
|
||||||
|
profile->Delete();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace test_cpu_profiler
|
} // namespace test_cpu_profiler
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace v8
|
} // namespace v8
|
||||||
|
Loading…
Reference in New Issue
Block a user