From 7a62cceb72cd9d9b95cf2517accb29c665a60330 Mon Sep 17 00:00:00 2001 From: Maya Lekova Date: Wed, 18 Nov 2020 12:19:56 +0100 Subject: [PATCH] [fastcall] Add CPU profiler support for fast calls This CL introduces a new fast_api_call_target field on the isolate, which is set by Turbofan before making the fast call. It then uses the field when creating a stack sample and stores it in the existing external_callback_entry used for regular API callbacks. The CL also adds a cctest with simple usage scenario and introduces a minor refactoring in test-api.cc. Design doc: https://docs.google.com/document/d/1r32qlPzGz0P7nieisJ5h2qfSnWOs40Cigt0LXPipejE/edit Bug: chromium:1052746 Change-Id: I2dab1bc395ccab0c14088f7c354fb52b08df8d32 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488683 Commit-Queue: Maya Lekova Reviewed-by: Georg Neis Reviewed-by: Peter Marshall Cr-Commit-Position: refs/heads/master@{#71254} --- include/v8-internal.h | 4 +- src/codegen/external-reference.cc | 6 + src/codegen/external-reference.h | 1 + src/compiler/effect-control-linearizer.cc | 14 +- src/execution/frames.cc | 15 +- src/execution/isolate-data.h | 16 ++- src/logging/log.cc | 10 +- src/profiler/tick-sample.cc | 25 +++- src/profiler/tick-sample.h | 8 ++ test/cctest/cctest.cc | 8 ++ test/cctest/cctest.h | 22 +++ test/cctest/test-api.cc | 29 +--- test/cctest/test-cpu-profiler.cc | 158 ++++++++++++++++++++++ 13 files changed, 273 insertions(+), 43 deletions(-) diff --git a/include/v8-internal.h b/include/v8-internal.h index 06846d7005..8abbcfb416 100644 --- a/include/v8-internal.h +++ b/include/v8-internal.h @@ -207,8 +207,10 @@ class Internals { kNumIsolateDataSlots * kApiSystemPointerSize; static const int kIsolateFastCCallCallerPcOffset = kIsolateFastCCallCallerFpOffset + kApiSystemPointerSize; - static const int kIsolateStackGuardOffset = + static const int kIsolateFastApiCallTargetOffset = kIsolateFastCCallCallerPcOffset + kApiSystemPointerSize; + static const int kIsolateStackGuardOffset = + kIsolateFastApiCallTargetOffset + kApiSystemPointerSize; static const int kIsolateRootsOffset = kIsolateStackGuardOffset + 7 * kApiSystemPointerSize; diff --git a/src/codegen/external-reference.cc b/src/codegen/external-reference.cc index ac04a25ae9..5f0313bbce 100644 --- a/src/codegen/external-reference.cc +++ b/src/codegen/external-reference.cc @@ -826,6 +826,12 @@ ExternalReference ExternalReference::fast_c_call_caller_pc_address( isolate->isolate_data()->fast_c_call_caller_pc_address()); } +ExternalReference ExternalReference::fast_api_call_target_address( + Isolate* isolate) { + return ExternalReference( + isolate->isolate_data()->fast_api_call_target_address()); +} + ExternalReference ExternalReference::stack_is_iterable_address( Isolate* isolate) { return ExternalReference( diff --git a/src/codegen/external-reference.h b/src/codegen/external-reference.h index 72a3397007..d2291ec14d 100644 --- a/src/codegen/external-reference.h +++ b/src/codegen/external-reference.h @@ -71,6 +71,7 @@ class StatsCounter; "IsolateData::fast_c_call_caller_fp_address") \ V(fast_c_call_caller_pc_address, \ "IsolateData::fast_c_call_caller_pc_address") \ + V(fast_api_call_target_address, "IsolateData::fast_api_call_target_address") \ V(stack_is_iterable_address, "IsolateData::stack_is_iterable_address") \ V(address_of_regexp_stack_limit_address, \ "RegExpStack::limit_address_address()") \ diff --git a/src/compiler/effect-control-linearizer.cc b/src/compiler/effect-control-linearizer.cc index fd8d128726..236ba121f0 100644 --- a/src/compiler/effect-control-linearizer.cc +++ b/src/compiler/effect-control-linearizer.cc @@ -5085,9 +5085,16 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) { call_descriptor->SetCFunctionInfo(c_signature); + // CPU profiler support + Node* target_address = __ ExternalConstant( + ExternalReference::fast_api_call_target_address(isolate())); + __ Store(StoreRepresentation(MachineType::PointerRepresentation(), + kNoWriteBarrier), + target_address, 0, n.target()); + Node** const inputs = graph()->zone()->NewArray( c_arg_count + FastApiCallNode::kFastCallExtraInputCount); - inputs[0] = NodeProperties::GetValueInput(node, 0); // the target + inputs[0] = n.target(); for (int i = FastApiCallNode::kFastTargetInputCount; i < c_arg_count + FastApiCallNode::kFastTargetInputCount; ++i) { if (c_signature->ArgumentInfo(i - 1).GetType() == @@ -5099,12 +5106,17 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) { } } inputs[c_arg_count + 1] = fast_api_call_stack_slot_; + inputs[c_arg_count + 2] = __ effect(); inputs[c_arg_count + 3] = __ control(); __ Call(call_descriptor, c_arg_count + FastApiCallNode::kFastCallExtraInputCount, inputs); + __ Store(StoreRepresentation(MachineType::PointerRepresentation(), + kNoWriteBarrier), + target_address, 0, __ IntPtrConstant(0)); + // Generate the load from `fast_api_call_stack_slot_`. Node* load = __ Load(MachineType::Int32(), fast_api_call_stack_slot_, 0); diff --git a/src/execution/frames.cc b/src/execution/frames.cc index 9eba6a5f5b..8c95813f5f 100644 --- a/src/execution/frames.cc +++ b/src/execution/frames.cc @@ -301,13 +301,13 @@ SafeStackFrameIterator::SafeStackFrameIterator(Isolate* isolate, Address pc, frame_ = nullptr; return; } - // 'Fast C calls' are a special type of C call where we call directly from JS - // to C without an exit frame inbetween. The CEntryStub is responsible for - // setting Isolate::c_entry_fp, meaning that it won't be set for fast C calls. - // To keep the stack iterable, we store the FP and PC of the caller of the - // fast C call on the isolate. This is guaranteed to be the topmost JS frame, - // because fast C calls cannot call back into JS. We start iterating the stack - // from this topmost JS frame. + // 'Fast C calls' are a special type of C call where we call directly from + // JS to C without an exit frame inbetween. The CEntryStub is responsible + // for setting Isolate::c_entry_fp, meaning that it won't be set for fast C + // calls. To keep the stack iterable, we store the FP and PC of the caller + // of the fast C call on the isolate. This is guaranteed to be the topmost + // JS frame, because fast C calls cannot call back into JS. We start + // iterating the stack from this topmost JS frame. if (fast_c_fp) { DCHECK_NE(kNullAddress, isolate->isolate_data()->fast_c_call_caller_pc()); type = StackFrame::Type::OPTIMIZED; @@ -402,6 +402,7 @@ void SafeStackFrameIterator::AdvanceOneFrame() { DCHECK(!done()); StackFrame* last_frame = frame_; Address last_sp = last_frame->sp(), last_fp = last_frame->fp(); + // Before advancing to the next stack frame, perform pointer validity tests. if (!IsValidFrame(last_frame) || !IsValidCaller(last_frame)) { frame_ = nullptr; diff --git a/src/execution/isolate-data.h b/src/execution/isolate-data.h index c875d92f09..146fcbbeb1 100644 --- a/src/execution/isolate-data.h +++ b/src/execution/isolate-data.h @@ -75,6 +75,10 @@ class IsolateData final { return kFastCCallCallerPCOffset - kIsolateRootBias; } + static constexpr int fast_api_call_target_offset() { + return kFastApiCallTargetOffset - kIsolateRootBias; + } + // Root-register-relative offset of the given builtin table entry. // TODO(ishell): remove in favour of typified id version. static int builtin_slot_offset(int builtin_index) { @@ -90,10 +94,14 @@ class IsolateData final { // The FP and PC that are saved right before TurboAssembler::CallCFunction. Address* fast_c_call_caller_fp_address() { return &fast_c_call_caller_fp_; } Address* fast_c_call_caller_pc_address() { return &fast_c_call_caller_pc_; } + // The address of the fast API callback right before it's executed from + // generated code. + Address* fast_api_call_target_address() { return &fast_api_call_target_; } StackGuard* stack_guard() { return &stack_guard_; } uint8_t* stack_is_iterable_address() { return &stack_is_iterable_; } Address fast_c_call_caller_fp() { return fast_c_call_caller_fp_; } Address fast_c_call_caller_pc() { return fast_c_call_caller_pc_; } + Address fast_api_call_target() { return fast_api_call_target_; } uint8_t stack_is_iterable() { return stack_is_iterable_; } // Returns true if this address points to data stored in this instance. @@ -130,6 +138,7 @@ class IsolateData final { V(kEmbedderDataOffset, Internals::kNumIsolateDataSlots* kSystemPointerSize) \ V(kFastCCallCallerFPOffset, kSystemPointerSize) \ V(kFastCCallCallerPCOffset, kSystemPointerSize) \ + V(kFastApiCallTargetOffset, kSystemPointerSize) \ V(kStackGuardOffset, StackGuard::kSizeInBytes) \ V(kRootsTableOffset, RootsTable::kEntriesCount* kSystemPointerSize) \ V(kExternalReferenceTableOffset, ExternalReferenceTable::kSizeInBytes) \ @@ -166,9 +175,10 @@ class IsolateData final { // instruction in compiled code. Address fast_c_call_caller_fp_ = kNullAddress; Address fast_c_call_caller_pc_ = kNullAddress; + Address fast_api_call_target_ = kNullAddress; - // Fields related to the system and JS stack. In particular, this contains the - // stack limit used by stack checks in generated code. + // Fields related to the system and JS stack. In particular, this contains + // the stack limit used by stack checks in generated code. StackGuard stack_guard_; RootsTable roots_; @@ -232,6 +242,8 @@ void IsolateData::AssertPredictableLayout() { kFastCCallCallerFPOffset); STATIC_ASSERT(offsetof(IsolateData, fast_c_call_caller_pc_) == kFastCCallCallerPCOffset); + STATIC_ASSERT(offsetof(IsolateData, fast_api_call_target_) == + kFastApiCallTargetOffset); STATIC_ASSERT(offsetof(IsolateData, stack_guard_) == kStackGuardOffset); #ifdef V8_HEAP_SANDBOX STATIC_ASSERT(offsetof(IsolateData, external_pointer_table_) == diff --git a/src/logging/log.cc b/src/logging/log.cc index a3936acd52..2e00b6b53b 100644 --- a/src/logging/log.cc +++ b/src/logging/log.cc @@ -2299,8 +2299,14 @@ void ExistingCodeLogger::LogExistingFunction( #if USES_FUNCTION_DESCRIPTORS entry_point = *FUNCTION_ENTRYPOINT_ADDRESS(entry_point); #endif - CALL_CODE_EVENT_HANDLER( - CallbackEvent(handle(shared->DebugName(), isolate_), entry_point)) + Handle fun_name(shared->DebugName(), isolate_); + CALL_CODE_EVENT_HANDLER(CallbackEvent(fun_name, entry_point)) + + // Fast API function. + Address c_function = v8::ToCData
(fun_data.GetCFunction()); + if (c_function != kNullAddress) { + CALL_CODE_EVENT_HANDLER(CallbackEvent(fun_name, c_function)) + } } } } diff --git a/src/profiler/tick-sample.cc b/src/profiler/tick-sample.cc index 45fc911856..7cffbd3cda 100644 --- a/src/profiler/tick-sample.cc +++ b/src/profiler/tick-sample.cc @@ -156,14 +156,17 @@ DISABLE_ASAN void TickSample::Init(Isolate* v8_isolate, SampleInfo info; RegisterState regs = reg_state; if (!GetStackSample(v8_isolate, ®s, record_c_entry_frame, stack, - kMaxFramesCount, &info, use_simulator_reg_state)) { + kMaxFramesCount, &info, &state, + use_simulator_reg_state)) { // It is executing JS but failed to collect a stack trace. // Mark the sample as spoiled. pc = nullptr; return; } - state = info.vm_state; + if (state != StateTag::EXTERNAL) { + state = info.vm_state; + } pc = regs.pc; frames_count = static_cast(info.frames_count); has_external_callback = info.external_callback_entry != nullptr; @@ -193,6 +196,7 @@ bool TickSample::GetStackSample(Isolate* v8_isolate, RegisterState* regs, RecordCEntryFrame record_c_entry_frame, void** frames, size_t frames_limit, v8::SampleInfo* sample_info, + StateTag* out_state, bool use_simulator_reg_state) { i::Isolate* isolate = reinterpret_cast(v8_isolate); sample_info->frames_count = 0; @@ -243,6 +247,23 @@ bool TickSample::GetStackSample(Isolate* v8_isolate, RegisterState* regs, ? nullptr : reinterpret_cast(*external_callback_entry_ptr); } + // 'Fast API calls' are similar to fast C calls (see frames.cc) in that + // they don't build an exit frame when entering C from JS. They have the + // added speciality of having separate "fast" and "default" callbacks, the + // latter being the regular API callback called before the JS function is + // optimized. When TurboFan optimizes the JS caller, the fast callback + // gets executed instead of the default one, therefore we need to store + // its address in the sample. + IsolateData* isolate_data = isolate->isolate_data(); + Address fast_c_fp = isolate_data->fast_c_call_caller_fp(); + if (fast_c_fp != kNullAddress && + isolate_data->fast_api_call_target() != kNullAddress) { + sample_info->external_callback_entry = + reinterpret_cast(isolate_data->fast_api_call_target()); + if (out_state) { + *out_state = StateTag::EXTERNAL; + } + } i::SafeStackFrameIterator it(isolate, reinterpret_cast(regs->pc), reinterpret_cast(regs->fp), diff --git a/src/profiler/tick-sample.h b/src/profiler/tick-sample.h index 69cedef0ba..777c3d192d 100644 --- a/src/profiler/tick-sample.h +++ b/src/profiler/tick-sample.h @@ -56,6 +56,13 @@ struct V8_EXPORT TickSample { * \param sample_info The sample info is filled up by the function * provides number of actual captured stack frames and * the current VM state. + * \param out_state Output parameter. If non-nullptr pointer is provided, + * and the execution is currently in a fast API call, + * records StateTag::EXTERNAL to it. The caller could then + * use this as a marker to not take into account the actual + * VM state recorded in |sample_info|. In the case of fast + * API calls, the VM state must be EXTERNAL, as the callback + * is always an external C++ function. * \param use_simulator_reg_state When set to true and V8 is running under a * simulator, the method will use the simulator * register state rather than the one provided @@ -69,6 +76,7 @@ struct V8_EXPORT TickSample { RecordCEntryFrame record_c_entry_frame, void** frames, size_t frames_limit, v8::SampleInfo* sample_info, + StateTag* out_state = nullptr, bool use_simulator_reg_state = true); void print() const; diff --git a/test/cctest/cctest.cc b/test/cctest/cctest.cc index 44a0763f6b..438ce9162d 100644 --- a/test/cctest/cctest.cc +++ b/test/cctest/cctest.cc @@ -406,3 +406,11 @@ int main(int argc, char* argv[]) { RegisterThreadedTest* RegisterThreadedTest::first_ = nullptr; int RegisterThreadedTest::count_ = 0; + +bool IsValidUnwrapObject(v8::Object* object) { + i::Address addr = *reinterpret_cast(object); + auto instance_type = i::Internals::GetInstanceType(addr); + return (instance_type == i::Internals::kJSObjectType || + instance_type == i::Internals::kJSApiObjectType || + instance_type == i::Internals::kJSSpecialApiObjectType); +} diff --git a/test/cctest/cctest.h b/test/cctest/cctest.h index 50cccab689..517aba3238 100644 --- a/test/cctest/cctest.h +++ b/test/cctest/cctest.h @@ -812,4 +812,26 @@ class SimulatorHelper { }; #endif // USE_SIMULATOR +// The following should correspond to Chromium's kV8DOMWrapperTypeIndex and +// kV8DOMWrapperObjectIndex. +static const int kV8WrapperTypeIndex = 0; +static const int kV8WrapperObjectIndex = 1; + +enum class ApiCheckerResult : uint8_t { + kNotCalled = 0, + kSlowCalled = 1 << 0, + kFastCalled = 1 << 1, +}; +using ApiCheckerResultFlags = v8::base::Flags; +DEFINE_OPERATORS_FOR_FLAGS(ApiCheckerResultFlags) + +bool IsValidUnwrapObject(v8::Object* object); + +template +T* GetInternalField(v8::Object* wrapper) { + assert(offset < wrapper->InternalFieldCount()); + return reinterpret_cast( + wrapper->GetAlignedPointerFromInternalField(offset)); +} + #endif // ifndef CCTEST_H_ diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc index d273c530ed..a27e0355e5 100644 --- a/test/cctest/test-api.cc +++ b/test/cctest/test-api.cc @@ -66,6 +66,7 @@ #include "src/objects/string-inl.h" #include "src/objects/synthetic-module-inl.h" #include "src/profiler/cpu-profiler.h" +#include "src/profiler/symbolizer.h" #include "src/strings/unicode-inl.h" #include "src/utils/utils.h" #include "test/cctest/heap/heap-tester.h" @@ -27454,10 +27455,6 @@ UNINITIALIZED_TEST(NestedIsolates) { #ifndef V8_LITE_MODE namespace { -// The following should correspond to Chromium's kV8DOMWrapperObjectIndex. -static const int kV8WrapperTypeIndex = 0; -static const int kV8WrapperObjectIndex = 1; - template struct ConvertJSValue { static Maybe Get(v8::Local value, @@ -27578,14 +27575,6 @@ struct ConvertJSValue { } }; -enum class ApiCheckerResult : uint8_t { - kNotCalled = 0, - kSlowCalled = 1 << 0, - kFastCalled = 1 << 1, -}; -using ApiCheckerResultFlags = v8::base::Flags; -DEFINE_OPERATORS_FOR_FLAGS(ApiCheckerResultFlags) - template struct BasicApiChecker { static void FastCallback(v8::ApiObject receiver, Value argument, @@ -27606,22 +27595,6 @@ struct BasicApiChecker { ApiCheckerResultFlags result_ = ApiCheckerResult::kNotCalled; }; -bool IsValidUnwrapObject(v8::Object* object) { - v8::internal::Address addr = - *reinterpret_cast(object); - auto instance_type = v8::internal::Internals::GetInstanceType(addr); - return (instance_type == v8::internal::Internals::kJSObjectType || - instance_type == v8::internal::Internals::kJSApiObjectType || - instance_type == v8::internal::Internals::kJSSpecialApiObjectType); -} - -template -T* GetInternalField(v8::Object* wrapper) { - assert(offset < wrapper->InternalFieldCount()); - return reinterpret_cast( - wrapper->GetAlignedPointerFromInternalField(offset)); -} - enum class Behavior { kNoException, kException, // An exception should be thrown by the callback function. diff --git a/test/cctest/test-cpu-profiler.cc b/test/cctest/test-cpu-profiler.cc index 1db0a7b426..5a9ca2c2d2 100644 --- a/test/cctest/test-cpu-profiler.cc +++ b/test/cctest/test-cpu-profiler.cc @@ -31,6 +31,7 @@ #include #include "include/libplatform/v8-tracing.h" +#include "include/v8-fast-api-calls.h" #include "include/v8-profiler.h" #include "src/api/api-inl.h" #include "src/base/platform/platform.h" @@ -52,6 +53,7 @@ #include "test/cctest/cctest.h" #include "test/cctest/heap/heap-utils.h" #include "test/cctest/profiler-extension.h" +#include "test/common/flag-utils.h" #ifdef V8_USE_PERFETTO #include "protos/perfetto/trace/trace.pb.h" @@ -3875,6 +3877,162 @@ UNINITIALIZED_TEST(DetailedSourcePositionAPI_Inlining) { isolate->Dispose(); } +namespace { + +struct FastApiReceiver { + static void FastCallback(v8::ApiObject receiver, int argument, + int* fallback) { + v8::Object* receiver_obj = reinterpret_cast(&receiver); + if (!IsValidUnwrapObject(receiver_obj)) { + *fallback = 1; + return; + } + FastApiReceiver* receiver_ptr = + GetInternalField(receiver_obj); + + receiver_ptr->result_ |= ApiCheckerResult::kFastCalled; + + // Artificially slow down the callback with a predictable amount of time. + // This ensures the test has a relatively stable run time on various + // platforms and protects it from flakyness. + v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(100)); + } + + static void SlowCallback(const v8::FunctionCallbackInfo& info) { + v8::Object* receiver_obj = v8::Object::Cast(*info.Holder()); + if (!IsValidUnwrapObject(receiver_obj)) { + info.GetIsolate()->ThrowException(v8_str("Called with a non-object.")); + return; + } + FastApiReceiver* receiver = + GetInternalField(receiver_obj); + + receiver->result_ |= ApiCheckerResult::kSlowCalled; + } + + bool DidCallFast() const { return (result_ & ApiCheckerResult::kFastCalled); } + bool DidCallSlow() const { return (result_ & ApiCheckerResult::kSlowCalled); } + + ApiCheckerResultFlags result_ = ApiCheckerResult::kNotCalled; +}; + +} // namespace + +v8::Local CreateApiCode(LocalContext* env) { + const char* foo_name = "foo"; + const char* script = + "function foo(arg) {" + " for (let i = 0; i < arg; ++i) { receiver.api_func(i); }" + "}" + "%PrepareFunctionForOptimization(foo);" + "foo(42); foo(42);" + "%OptimizeFunctionOnNextCall(foo);"; + CompileRun(script); + + return GetFunction(env->local(), foo_name); +} + +TEST(FastApiCPUProfiler) { +#if !defined(V8_LITE_MODE) && !defined(USE_SIMULATOR) + if (i::FLAG_jitless) return; + if (i::FLAG_turboprop) return; + + FLAG_SCOPE_EXTERNAL(opt); + FLAG_SCOPE_EXTERNAL(turbo_fast_api_calls); + FLAG_SCOPE_EXTERNAL(allow_natives_syntax); + // Disable --always_opt, otherwise we haven't generated the necessary + // feedback to go down the "best optimization" path for the fast call. + UNFLAG_SCOPE_EXTERNAL(always_opt); + UNFLAG_SCOPE_EXTERNAL(prof_browser_mode); + + CcTest::InitializeVM(); + LocalContext env; + v8::Isolate* isolate = CcTest::isolate(); + i::Isolate* i_isolate = reinterpret_cast(isolate); + i_isolate->set_embedder_wrapper_type_index(kV8WrapperTypeIndex); + i_isolate->set_embedder_wrapper_object_index(kV8WrapperObjectIndex); + + i::HandleScope scope(i_isolate); + + // Setup the fast call. + FastApiReceiver receiver; + + v8::TryCatch try_catch(isolate); + + v8::CFunction c_func = + v8::CFunction::MakeWithFallbackSupport(FastApiReceiver::FastCallback); + + Local receiver_templ = v8::FunctionTemplate::New( + isolate, FastApiReceiver::SlowCallback, v8::Local(), + v8::Local(), 1, v8::ConstructorBehavior::kAllow, + v8::SideEffectType::kHasSideEffect, &c_func); + + v8::Local object_template = + v8::ObjectTemplate::New(isolate); + object_template->SetInternalFieldCount(kV8WrapperObjectIndex + 1); + const char* api_func_str = "api_func"; + object_template->Set(isolate, api_func_str, receiver_templ); + + v8::Local object = + object_template->NewInstance(env.local()).ToLocalChecked(); + object->SetAlignedPointerInInternalField(kV8WrapperObjectIndex, + reinterpret_cast(&receiver)); + + int num_runs_arg = 100; + env->Global()->Set(env.local(), v8_str("receiver"), object).Check(); + + // Prepare the code. + v8::Local function = CreateApiCode(&env); + + // Setup and start CPU profiler. + v8::Local args[] = { + v8::Integer::New(env->GetIsolate(), num_runs_arg)}; + ProfilerHelper helper(env.local()); + // TODO(mslekova): We could tweak the following count to reduce test + // runtime, while still keeping the test stable. + unsigned external_samples = 1000; + v8::CpuProfile* profile = + helper.Run(function, args, arraysize(args), 0, external_samples); + + // Check if the fast and slow callbacks got executed. + CHECK(receiver.DidCallFast()); + CHECK(receiver.DidCallSlow()); + CHECK(!try_catch.HasCaught()); + + // Check that generated profile has the expected structure. + const v8::CpuProfileNode* root = profile->GetTopDownRoot(); + const v8::CpuProfileNode* foo_node = GetChild(env.local(), root, "foo"); + const v8::CpuProfileNode* api_func_node = + GetChild(env.local(), foo_node, api_func_str); + CHECK_NOT_NULL(api_func_node); + CHECK_EQ(api_func_node->GetSourceType(), CpuProfileNode::kCallback); + + // Check that the CodeEntry is the expected one, i.e. the fast callback. + CodeEntry* code_entry = + reinterpret_cast(api_func_node)->entry(); + CodeMap* code_map = reinterpret_cast(profile) + ->cpu_profiler() + ->code_map_for_test(); + CodeEntry* expected_code_entry = + code_map->FindEntry(reinterpret_cast
(c_func.GetAddress())); + CHECK_EQ(code_entry, expected_code_entry); + + int foo_ticks = foo_node->GetHitCount(); + int api_func_ticks = api_func_node->GetHitCount(); + // Check that at least 80% of the samples in foo hit the fast callback. + CHECK_LE(foo_ticks, api_func_ticks * 0.2); + // The following constant in the CHECK is because above we expect at least + // 1000 samples with EXTERNAL type (see external_samples). Since the only + // thing that generates those kind of samples is the fast callback, then + // we're supposed to have close to 1000 ticks in its node. Since the CPU + // profiler is nondeterministic, we've allowed for some slack, otherwise + // this could be 1000 instead of 800. + CHECK_GE(api_func_ticks, 800); + + profile->Delete(); +#endif +} + } // namespace test_cpu_profiler } // namespace internal } // namespace v8