[fastcall] Add CPU profiler support for fast calls
This CL introduces a new fast_api_call_target field on the isolate, which is set by Turbofan before making the fast call. It then uses the field when creating a stack sample and stores it in the existing external_callback_entry used for regular API callbacks. The CL also adds a cctest with simple usage scenario and introduces a minor refactoring in test-api.cc. Design doc: https://docs.google.com/document/d/1r32qlPzGz0P7nieisJ5h2qfSnWOs40Cigt0LXPipejE/edit Bug: chromium:1052746 Change-Id: I2dab1bc395ccab0c14088f7c354fb52b08df8d32 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2488683 Commit-Queue: Maya Lekova <mslekova@chromium.org> Reviewed-by: Georg Neis <neis@chromium.org> Reviewed-by: Peter Marshall <petermarshall@chromium.org> Cr-Commit-Position: refs/heads/master@{#71254}
This commit is contained in:
parent
18cbf05e8e
commit
7a62cceb72
@ -207,8 +207,10 @@ class Internals {
|
||||
kNumIsolateDataSlots * kApiSystemPointerSize;
|
||||
static const int kIsolateFastCCallCallerPcOffset =
|
||||
kIsolateFastCCallCallerFpOffset + kApiSystemPointerSize;
|
||||
static const int kIsolateStackGuardOffset =
|
||||
static const int kIsolateFastApiCallTargetOffset =
|
||||
kIsolateFastCCallCallerPcOffset + kApiSystemPointerSize;
|
||||
static const int kIsolateStackGuardOffset =
|
||||
kIsolateFastApiCallTargetOffset + kApiSystemPointerSize;
|
||||
static const int kIsolateRootsOffset =
|
||||
kIsolateStackGuardOffset + 7 * kApiSystemPointerSize;
|
||||
|
||||
|
@ -826,6 +826,12 @@ ExternalReference ExternalReference::fast_c_call_caller_pc_address(
|
||||
isolate->isolate_data()->fast_c_call_caller_pc_address());
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::fast_api_call_target_address(
|
||||
Isolate* isolate) {
|
||||
return ExternalReference(
|
||||
isolate->isolate_data()->fast_api_call_target_address());
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::stack_is_iterable_address(
|
||||
Isolate* isolate) {
|
||||
return ExternalReference(
|
||||
|
@ -71,6 +71,7 @@ class StatsCounter;
|
||||
"IsolateData::fast_c_call_caller_fp_address") \
|
||||
V(fast_c_call_caller_pc_address, \
|
||||
"IsolateData::fast_c_call_caller_pc_address") \
|
||||
V(fast_api_call_target_address, "IsolateData::fast_api_call_target_address") \
|
||||
V(stack_is_iterable_address, "IsolateData::stack_is_iterable_address") \
|
||||
V(address_of_regexp_stack_limit_address, \
|
||||
"RegExpStack::limit_address_address()") \
|
||||
|
@ -5085,9 +5085,16 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) {
|
||||
|
||||
call_descriptor->SetCFunctionInfo(c_signature);
|
||||
|
||||
// CPU profiler support
|
||||
Node* target_address = __ ExternalConstant(
|
||||
ExternalReference::fast_api_call_target_address(isolate()));
|
||||
__ Store(StoreRepresentation(MachineType::PointerRepresentation(),
|
||||
kNoWriteBarrier),
|
||||
target_address, 0, n.target());
|
||||
|
||||
Node** const inputs = graph()->zone()->NewArray<Node*>(
|
||||
c_arg_count + FastApiCallNode::kFastCallExtraInputCount);
|
||||
inputs[0] = NodeProperties::GetValueInput(node, 0); // the target
|
||||
inputs[0] = n.target();
|
||||
for (int i = FastApiCallNode::kFastTargetInputCount;
|
||||
i < c_arg_count + FastApiCallNode::kFastTargetInputCount; ++i) {
|
||||
if (c_signature->ArgumentInfo(i - 1).GetType() ==
|
||||
@ -5099,12 +5106,17 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) {
|
||||
}
|
||||
}
|
||||
inputs[c_arg_count + 1] = fast_api_call_stack_slot_;
|
||||
|
||||
inputs[c_arg_count + 2] = __ effect();
|
||||
inputs[c_arg_count + 3] = __ control();
|
||||
|
||||
__ Call(call_descriptor,
|
||||
c_arg_count + FastApiCallNode::kFastCallExtraInputCount, inputs);
|
||||
|
||||
__ Store(StoreRepresentation(MachineType::PointerRepresentation(),
|
||||
kNoWriteBarrier),
|
||||
target_address, 0, __ IntPtrConstant(0));
|
||||
|
||||
// Generate the load from `fast_api_call_stack_slot_`.
|
||||
Node* load = __ Load(MachineType::Int32(), fast_api_call_stack_slot_, 0);
|
||||
|
||||
|
@ -301,13 +301,13 @@ SafeStackFrameIterator::SafeStackFrameIterator(Isolate* isolate, Address pc,
|
||||
frame_ = nullptr;
|
||||
return;
|
||||
}
|
||||
// 'Fast C calls' are a special type of C call where we call directly from JS
|
||||
// to C without an exit frame inbetween. The CEntryStub is responsible for
|
||||
// setting Isolate::c_entry_fp, meaning that it won't be set for fast C calls.
|
||||
// To keep the stack iterable, we store the FP and PC of the caller of the
|
||||
// fast C call on the isolate. This is guaranteed to be the topmost JS frame,
|
||||
// because fast C calls cannot call back into JS. We start iterating the stack
|
||||
// from this topmost JS frame.
|
||||
// 'Fast C calls' are a special type of C call where we call directly from
|
||||
// JS to C without an exit frame inbetween. The CEntryStub is responsible
|
||||
// for setting Isolate::c_entry_fp, meaning that it won't be set for fast C
|
||||
// calls. To keep the stack iterable, we store the FP and PC of the caller
|
||||
// of the fast C call on the isolate. This is guaranteed to be the topmost
|
||||
// JS frame, because fast C calls cannot call back into JS. We start
|
||||
// iterating the stack from this topmost JS frame.
|
||||
if (fast_c_fp) {
|
||||
DCHECK_NE(kNullAddress, isolate->isolate_data()->fast_c_call_caller_pc());
|
||||
type = StackFrame::Type::OPTIMIZED;
|
||||
@ -402,6 +402,7 @@ void SafeStackFrameIterator::AdvanceOneFrame() {
|
||||
DCHECK(!done());
|
||||
StackFrame* last_frame = frame_;
|
||||
Address last_sp = last_frame->sp(), last_fp = last_frame->fp();
|
||||
|
||||
// Before advancing to the next stack frame, perform pointer validity tests.
|
||||
if (!IsValidFrame(last_frame) || !IsValidCaller(last_frame)) {
|
||||
frame_ = nullptr;
|
||||
|
@ -75,6 +75,10 @@ class IsolateData final {
|
||||
return kFastCCallCallerPCOffset - kIsolateRootBias;
|
||||
}
|
||||
|
||||
static constexpr int fast_api_call_target_offset() {
|
||||
return kFastApiCallTargetOffset - kIsolateRootBias;
|
||||
}
|
||||
|
||||
// Root-register-relative offset of the given builtin table entry.
|
||||
// TODO(ishell): remove in favour of typified id version.
|
||||
static int builtin_slot_offset(int builtin_index) {
|
||||
@ -90,10 +94,14 @@ class IsolateData final {
|
||||
// The FP and PC that are saved right before TurboAssembler::CallCFunction.
|
||||
Address* fast_c_call_caller_fp_address() { return &fast_c_call_caller_fp_; }
|
||||
Address* fast_c_call_caller_pc_address() { return &fast_c_call_caller_pc_; }
|
||||
// The address of the fast API callback right before it's executed from
|
||||
// generated code.
|
||||
Address* fast_api_call_target_address() { return &fast_api_call_target_; }
|
||||
StackGuard* stack_guard() { return &stack_guard_; }
|
||||
uint8_t* stack_is_iterable_address() { return &stack_is_iterable_; }
|
||||
Address fast_c_call_caller_fp() { return fast_c_call_caller_fp_; }
|
||||
Address fast_c_call_caller_pc() { return fast_c_call_caller_pc_; }
|
||||
Address fast_api_call_target() { return fast_api_call_target_; }
|
||||
uint8_t stack_is_iterable() { return stack_is_iterable_; }
|
||||
|
||||
// Returns true if this address points to data stored in this instance.
|
||||
@ -130,6 +138,7 @@ class IsolateData final {
|
||||
V(kEmbedderDataOffset, Internals::kNumIsolateDataSlots* kSystemPointerSize) \
|
||||
V(kFastCCallCallerFPOffset, kSystemPointerSize) \
|
||||
V(kFastCCallCallerPCOffset, kSystemPointerSize) \
|
||||
V(kFastApiCallTargetOffset, kSystemPointerSize) \
|
||||
V(kStackGuardOffset, StackGuard::kSizeInBytes) \
|
||||
V(kRootsTableOffset, RootsTable::kEntriesCount* kSystemPointerSize) \
|
||||
V(kExternalReferenceTableOffset, ExternalReferenceTable::kSizeInBytes) \
|
||||
@ -166,9 +175,10 @@ class IsolateData final {
|
||||
// instruction in compiled code.
|
||||
Address fast_c_call_caller_fp_ = kNullAddress;
|
||||
Address fast_c_call_caller_pc_ = kNullAddress;
|
||||
Address fast_api_call_target_ = kNullAddress;
|
||||
|
||||
// Fields related to the system and JS stack. In particular, this contains the
|
||||
// stack limit used by stack checks in generated code.
|
||||
// Fields related to the system and JS stack. In particular, this contains
|
||||
// the stack limit used by stack checks in generated code.
|
||||
StackGuard stack_guard_;
|
||||
|
||||
RootsTable roots_;
|
||||
@ -232,6 +242,8 @@ void IsolateData::AssertPredictableLayout() {
|
||||
kFastCCallCallerFPOffset);
|
||||
STATIC_ASSERT(offsetof(IsolateData, fast_c_call_caller_pc_) ==
|
||||
kFastCCallCallerPCOffset);
|
||||
STATIC_ASSERT(offsetof(IsolateData, fast_api_call_target_) ==
|
||||
kFastApiCallTargetOffset);
|
||||
STATIC_ASSERT(offsetof(IsolateData, stack_guard_) == kStackGuardOffset);
|
||||
#ifdef V8_HEAP_SANDBOX
|
||||
STATIC_ASSERT(offsetof(IsolateData, external_pointer_table_) ==
|
||||
|
@ -2299,8 +2299,14 @@ void ExistingCodeLogger::LogExistingFunction(
|
||||
#if USES_FUNCTION_DESCRIPTORS
|
||||
entry_point = *FUNCTION_ENTRYPOINT_ADDRESS(entry_point);
|
||||
#endif
|
||||
CALL_CODE_EVENT_HANDLER(
|
||||
CallbackEvent(handle(shared->DebugName(), isolate_), entry_point))
|
||||
Handle<String> fun_name(shared->DebugName(), isolate_);
|
||||
CALL_CODE_EVENT_HANDLER(CallbackEvent(fun_name, entry_point))
|
||||
|
||||
// Fast API function.
|
||||
Address c_function = v8::ToCData<Address>(fun_data.GetCFunction());
|
||||
if (c_function != kNullAddress) {
|
||||
CALL_CODE_EVENT_HANDLER(CallbackEvent(fun_name, c_function))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -156,14 +156,17 @@ DISABLE_ASAN void TickSample::Init(Isolate* v8_isolate,
|
||||
SampleInfo info;
|
||||
RegisterState regs = reg_state;
|
||||
if (!GetStackSample(v8_isolate, ®s, record_c_entry_frame, stack,
|
||||
kMaxFramesCount, &info, use_simulator_reg_state)) {
|
||||
kMaxFramesCount, &info, &state,
|
||||
use_simulator_reg_state)) {
|
||||
// It is executing JS but failed to collect a stack trace.
|
||||
// Mark the sample as spoiled.
|
||||
pc = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
state = info.vm_state;
|
||||
if (state != StateTag::EXTERNAL) {
|
||||
state = info.vm_state;
|
||||
}
|
||||
pc = regs.pc;
|
||||
frames_count = static_cast<unsigned>(info.frames_count);
|
||||
has_external_callback = info.external_callback_entry != nullptr;
|
||||
@ -193,6 +196,7 @@ bool TickSample::GetStackSample(Isolate* v8_isolate, RegisterState* regs,
|
||||
RecordCEntryFrame record_c_entry_frame,
|
||||
void** frames, size_t frames_limit,
|
||||
v8::SampleInfo* sample_info,
|
||||
StateTag* out_state,
|
||||
bool use_simulator_reg_state) {
|
||||
i::Isolate* isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
|
||||
sample_info->frames_count = 0;
|
||||
@ -243,6 +247,23 @@ bool TickSample::GetStackSample(Isolate* v8_isolate, RegisterState* regs,
|
||||
? nullptr
|
||||
: reinterpret_cast<void*>(*external_callback_entry_ptr);
|
||||
}
|
||||
// 'Fast API calls' are similar to fast C calls (see frames.cc) in that
|
||||
// they don't build an exit frame when entering C from JS. They have the
|
||||
// added speciality of having separate "fast" and "default" callbacks, the
|
||||
// latter being the regular API callback called before the JS function is
|
||||
// optimized. When TurboFan optimizes the JS caller, the fast callback
|
||||
// gets executed instead of the default one, therefore we need to store
|
||||
// its address in the sample.
|
||||
IsolateData* isolate_data = isolate->isolate_data();
|
||||
Address fast_c_fp = isolate_data->fast_c_call_caller_fp();
|
||||
if (fast_c_fp != kNullAddress &&
|
||||
isolate_data->fast_api_call_target() != kNullAddress) {
|
||||
sample_info->external_callback_entry =
|
||||
reinterpret_cast<void*>(isolate_data->fast_api_call_target());
|
||||
if (out_state) {
|
||||
*out_state = StateTag::EXTERNAL;
|
||||
}
|
||||
}
|
||||
|
||||
i::SafeStackFrameIterator it(isolate, reinterpret_cast<i::Address>(regs->pc),
|
||||
reinterpret_cast<i::Address>(regs->fp),
|
||||
|
@ -56,6 +56,13 @@ struct V8_EXPORT TickSample {
|
||||
* \param sample_info The sample info is filled up by the function
|
||||
* provides number of actual captured stack frames and
|
||||
* the current VM state.
|
||||
* \param out_state Output parameter. If non-nullptr pointer is provided,
|
||||
* and the execution is currently in a fast API call,
|
||||
* records StateTag::EXTERNAL to it. The caller could then
|
||||
* use this as a marker to not take into account the actual
|
||||
* VM state recorded in |sample_info|. In the case of fast
|
||||
* API calls, the VM state must be EXTERNAL, as the callback
|
||||
* is always an external C++ function.
|
||||
* \param use_simulator_reg_state When set to true and V8 is running under a
|
||||
* simulator, the method will use the simulator
|
||||
* register state rather than the one provided
|
||||
@ -69,6 +76,7 @@ struct V8_EXPORT TickSample {
|
||||
RecordCEntryFrame record_c_entry_frame,
|
||||
void** frames, size_t frames_limit,
|
||||
v8::SampleInfo* sample_info,
|
||||
StateTag* out_state = nullptr,
|
||||
bool use_simulator_reg_state = true);
|
||||
|
||||
void print() const;
|
||||
|
@ -406,3 +406,11 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
RegisterThreadedTest* RegisterThreadedTest::first_ = nullptr;
|
||||
int RegisterThreadedTest::count_ = 0;
|
||||
|
||||
bool IsValidUnwrapObject(v8::Object* object) {
|
||||
i::Address addr = *reinterpret_cast<i::Address*>(object);
|
||||
auto instance_type = i::Internals::GetInstanceType(addr);
|
||||
return (instance_type == i::Internals::kJSObjectType ||
|
||||
instance_type == i::Internals::kJSApiObjectType ||
|
||||
instance_type == i::Internals::kJSSpecialApiObjectType);
|
||||
}
|
||||
|
@ -812,4 +812,26 @@ class SimulatorHelper {
|
||||
};
|
||||
#endif // USE_SIMULATOR
|
||||
|
||||
// The following should correspond to Chromium's kV8DOMWrapperTypeIndex and
|
||||
// kV8DOMWrapperObjectIndex.
|
||||
static const int kV8WrapperTypeIndex = 0;
|
||||
static const int kV8WrapperObjectIndex = 1;
|
||||
|
||||
enum class ApiCheckerResult : uint8_t {
|
||||
kNotCalled = 0,
|
||||
kSlowCalled = 1 << 0,
|
||||
kFastCalled = 1 << 1,
|
||||
};
|
||||
using ApiCheckerResultFlags = v8::base::Flags<ApiCheckerResult>;
|
||||
DEFINE_OPERATORS_FOR_FLAGS(ApiCheckerResultFlags)
|
||||
|
||||
bool IsValidUnwrapObject(v8::Object* object);
|
||||
|
||||
template <typename T, int offset>
|
||||
T* GetInternalField(v8::Object* wrapper) {
|
||||
assert(offset < wrapper->InternalFieldCount());
|
||||
return reinterpret_cast<T*>(
|
||||
wrapper->GetAlignedPointerFromInternalField(offset));
|
||||
}
|
||||
|
||||
#endif // ifndef CCTEST_H_
|
||||
|
@ -66,6 +66,7 @@
|
||||
#include "src/objects/string-inl.h"
|
||||
#include "src/objects/synthetic-module-inl.h"
|
||||
#include "src/profiler/cpu-profiler.h"
|
||||
#include "src/profiler/symbolizer.h"
|
||||
#include "src/strings/unicode-inl.h"
|
||||
#include "src/utils/utils.h"
|
||||
#include "test/cctest/heap/heap-tester.h"
|
||||
@ -27454,10 +27455,6 @@ UNINITIALIZED_TEST(NestedIsolates) {
|
||||
|
||||
#ifndef V8_LITE_MODE
|
||||
namespace {
|
||||
// The following should correspond to Chromium's kV8DOMWrapperObjectIndex.
|
||||
static const int kV8WrapperTypeIndex = 0;
|
||||
static const int kV8WrapperObjectIndex = 1;
|
||||
|
||||
template <typename T>
|
||||
struct ConvertJSValue {
|
||||
static Maybe<T> Get(v8::Local<v8::Value> value,
|
||||
@ -27578,14 +27575,6 @@ struct ConvertJSValue<bool> {
|
||||
}
|
||||
};
|
||||
|
||||
enum class ApiCheckerResult : uint8_t {
|
||||
kNotCalled = 0,
|
||||
kSlowCalled = 1 << 0,
|
||||
kFastCalled = 1 << 1,
|
||||
};
|
||||
using ApiCheckerResultFlags = v8::base::Flags<ApiCheckerResult>;
|
||||
DEFINE_OPERATORS_FOR_FLAGS(ApiCheckerResultFlags)
|
||||
|
||||
template <typename Value, typename Impl>
|
||||
struct BasicApiChecker {
|
||||
static void FastCallback(v8::ApiObject receiver, Value argument,
|
||||
@ -27606,22 +27595,6 @@ struct BasicApiChecker {
|
||||
ApiCheckerResultFlags result_ = ApiCheckerResult::kNotCalled;
|
||||
};
|
||||
|
||||
bool IsValidUnwrapObject(v8::Object* object) {
|
||||
v8::internal::Address addr =
|
||||
*reinterpret_cast<v8::internal::Address*>(object);
|
||||
auto instance_type = v8::internal::Internals::GetInstanceType(addr);
|
||||
return (instance_type == v8::internal::Internals::kJSObjectType ||
|
||||
instance_type == v8::internal::Internals::kJSApiObjectType ||
|
||||
instance_type == v8::internal::Internals::kJSSpecialApiObjectType);
|
||||
}
|
||||
|
||||
template <typename T, int offset>
|
||||
T* GetInternalField(v8::Object* wrapper) {
|
||||
assert(offset < wrapper->InternalFieldCount());
|
||||
return reinterpret_cast<T*>(
|
||||
wrapper->GetAlignedPointerFromInternalField(offset));
|
||||
}
|
||||
|
||||
enum class Behavior {
|
||||
kNoException,
|
||||
kException, // An exception should be thrown by the callback function.
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <memory>
|
||||
|
||||
#include "include/libplatform/v8-tracing.h"
|
||||
#include "include/v8-fast-api-calls.h"
|
||||
#include "include/v8-profiler.h"
|
||||
#include "src/api/api-inl.h"
|
||||
#include "src/base/platform/platform.h"
|
||||
@ -52,6 +53,7 @@
|
||||
#include "test/cctest/cctest.h"
|
||||
#include "test/cctest/heap/heap-utils.h"
|
||||
#include "test/cctest/profiler-extension.h"
|
||||
#include "test/common/flag-utils.h"
|
||||
|
||||
#ifdef V8_USE_PERFETTO
|
||||
#include "protos/perfetto/trace/trace.pb.h"
|
||||
@ -3875,6 +3877,162 @@ UNINITIALIZED_TEST(DetailedSourcePositionAPI_Inlining) {
|
||||
isolate->Dispose();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct FastApiReceiver {
|
||||
static void FastCallback(v8::ApiObject receiver, int argument,
|
||||
int* fallback) {
|
||||
v8::Object* receiver_obj = reinterpret_cast<v8::Object*>(&receiver);
|
||||
if (!IsValidUnwrapObject(receiver_obj)) {
|
||||
*fallback = 1;
|
||||
return;
|
||||
}
|
||||
FastApiReceiver* receiver_ptr =
|
||||
GetInternalField<FastApiReceiver, kV8WrapperObjectIndex>(receiver_obj);
|
||||
|
||||
receiver_ptr->result_ |= ApiCheckerResult::kFastCalled;
|
||||
|
||||
// Artificially slow down the callback with a predictable amount of time.
|
||||
// This ensures the test has a relatively stable run time on various
|
||||
// platforms and protects it from flakyness.
|
||||
v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(100));
|
||||
}
|
||||
|
||||
static void SlowCallback(const v8::FunctionCallbackInfo<v8::Value>& info) {
|
||||
v8::Object* receiver_obj = v8::Object::Cast(*info.Holder());
|
||||
if (!IsValidUnwrapObject(receiver_obj)) {
|
||||
info.GetIsolate()->ThrowException(v8_str("Called with a non-object."));
|
||||
return;
|
||||
}
|
||||
FastApiReceiver* receiver =
|
||||
GetInternalField<FastApiReceiver, kV8WrapperObjectIndex>(receiver_obj);
|
||||
|
||||
receiver->result_ |= ApiCheckerResult::kSlowCalled;
|
||||
}
|
||||
|
||||
bool DidCallFast() const { return (result_ & ApiCheckerResult::kFastCalled); }
|
||||
bool DidCallSlow() const { return (result_ & ApiCheckerResult::kSlowCalled); }
|
||||
|
||||
ApiCheckerResultFlags result_ = ApiCheckerResult::kNotCalled;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
v8::Local<v8::Function> CreateApiCode(LocalContext* env) {
|
||||
const char* foo_name = "foo";
|
||||
const char* script =
|
||||
"function foo(arg) {"
|
||||
" for (let i = 0; i < arg; ++i) { receiver.api_func(i); }"
|
||||
"}"
|
||||
"%PrepareFunctionForOptimization(foo);"
|
||||
"foo(42); foo(42);"
|
||||
"%OptimizeFunctionOnNextCall(foo);";
|
||||
CompileRun(script);
|
||||
|
||||
return GetFunction(env->local(), foo_name);
|
||||
}
|
||||
|
||||
TEST(FastApiCPUProfiler) {
|
||||
#if !defined(V8_LITE_MODE) && !defined(USE_SIMULATOR)
|
||||
if (i::FLAG_jitless) return;
|
||||
if (i::FLAG_turboprop) return;
|
||||
|
||||
FLAG_SCOPE_EXTERNAL(opt);
|
||||
FLAG_SCOPE_EXTERNAL(turbo_fast_api_calls);
|
||||
FLAG_SCOPE_EXTERNAL(allow_natives_syntax);
|
||||
// Disable --always_opt, otherwise we haven't generated the necessary
|
||||
// feedback to go down the "best optimization" path for the fast call.
|
||||
UNFLAG_SCOPE_EXTERNAL(always_opt);
|
||||
UNFLAG_SCOPE_EXTERNAL(prof_browser_mode);
|
||||
|
||||
CcTest::InitializeVM();
|
||||
LocalContext env;
|
||||
v8::Isolate* isolate = CcTest::isolate();
|
||||
i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
|
||||
i_isolate->set_embedder_wrapper_type_index(kV8WrapperTypeIndex);
|
||||
i_isolate->set_embedder_wrapper_object_index(kV8WrapperObjectIndex);
|
||||
|
||||
i::HandleScope scope(i_isolate);
|
||||
|
||||
// Setup the fast call.
|
||||
FastApiReceiver receiver;
|
||||
|
||||
v8::TryCatch try_catch(isolate);
|
||||
|
||||
v8::CFunction c_func =
|
||||
v8::CFunction::MakeWithFallbackSupport(FastApiReceiver::FastCallback);
|
||||
|
||||
Local<v8::FunctionTemplate> receiver_templ = v8::FunctionTemplate::New(
|
||||
isolate, FastApiReceiver::SlowCallback, v8::Local<v8::Value>(),
|
||||
v8::Local<v8::Signature>(), 1, v8::ConstructorBehavior::kAllow,
|
||||
v8::SideEffectType::kHasSideEffect, &c_func);
|
||||
|
||||
v8::Local<v8::ObjectTemplate> object_template =
|
||||
v8::ObjectTemplate::New(isolate);
|
||||
object_template->SetInternalFieldCount(kV8WrapperObjectIndex + 1);
|
||||
const char* api_func_str = "api_func";
|
||||
object_template->Set(isolate, api_func_str, receiver_templ);
|
||||
|
||||
v8::Local<v8::Object> object =
|
||||
object_template->NewInstance(env.local()).ToLocalChecked();
|
||||
object->SetAlignedPointerInInternalField(kV8WrapperObjectIndex,
|
||||
reinterpret_cast<void*>(&receiver));
|
||||
|
||||
int num_runs_arg = 100;
|
||||
env->Global()->Set(env.local(), v8_str("receiver"), object).Check();
|
||||
|
||||
// Prepare the code.
|
||||
v8::Local<v8::Function> function = CreateApiCode(&env);
|
||||
|
||||
// Setup and start CPU profiler.
|
||||
v8::Local<v8::Value> args[] = {
|
||||
v8::Integer::New(env->GetIsolate(), num_runs_arg)};
|
||||
ProfilerHelper helper(env.local());
|
||||
// TODO(mslekova): We could tweak the following count to reduce test
|
||||
// runtime, while still keeping the test stable.
|
||||
unsigned external_samples = 1000;
|
||||
v8::CpuProfile* profile =
|
||||
helper.Run(function, args, arraysize(args), 0, external_samples);
|
||||
|
||||
// Check if the fast and slow callbacks got executed.
|
||||
CHECK(receiver.DidCallFast());
|
||||
CHECK(receiver.DidCallSlow());
|
||||
CHECK(!try_catch.HasCaught());
|
||||
|
||||
// Check that generated profile has the expected structure.
|
||||
const v8::CpuProfileNode* root = profile->GetTopDownRoot();
|
||||
const v8::CpuProfileNode* foo_node = GetChild(env.local(), root, "foo");
|
||||
const v8::CpuProfileNode* api_func_node =
|
||||
GetChild(env.local(), foo_node, api_func_str);
|
||||
CHECK_NOT_NULL(api_func_node);
|
||||
CHECK_EQ(api_func_node->GetSourceType(), CpuProfileNode::kCallback);
|
||||
|
||||
// Check that the CodeEntry is the expected one, i.e. the fast callback.
|
||||
CodeEntry* code_entry =
|
||||
reinterpret_cast<const ProfileNode*>(api_func_node)->entry();
|
||||
CodeMap* code_map = reinterpret_cast<CpuProfile*>(profile)
|
||||
->cpu_profiler()
|
||||
->code_map_for_test();
|
||||
CodeEntry* expected_code_entry =
|
||||
code_map->FindEntry(reinterpret_cast<Address>(c_func.GetAddress()));
|
||||
CHECK_EQ(code_entry, expected_code_entry);
|
||||
|
||||
int foo_ticks = foo_node->GetHitCount();
|
||||
int api_func_ticks = api_func_node->GetHitCount();
|
||||
// Check that at least 80% of the samples in foo hit the fast callback.
|
||||
CHECK_LE(foo_ticks, api_func_ticks * 0.2);
|
||||
// The following constant in the CHECK is because above we expect at least
|
||||
// 1000 samples with EXTERNAL type (see external_samples). Since the only
|
||||
// thing that generates those kind of samples is the fast callback, then
|
||||
// we're supposed to have close to 1000 ticks in its node. Since the CPU
|
||||
// profiler is nondeterministic, we've allowed for some slack, otherwise
|
||||
// this could be 1000 instead of 800.
|
||||
CHECK_GE(api_func_ticks, 800);
|
||||
|
||||
profile->Delete();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace test_cpu_profiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
Loading…
Reference in New Issue
Block a user