Support higher CPU profiler sampling rate on posix systems

New flag is added that allows to specify CPU profiler sampling rate in microseconds as command line argument. It was tested to work fine with 100us interval(currently it is 1ms). Default values are kept the same as in the current implementation. The new implementation is enabled only on POSIX platforms which use signals to collect samples. Other platforms that pause thread being sampled are to follow.

SIGPROF signals are now sent on the profiler event processor thread to make sure that the processing thread does fall far behind the sampling.

The patch is based on the previous one that was rolled out in r13851. The main difference is that the circular queue is not modified for now.

On Linux sampling for CPU profiler is initiated on the profiler event processor thread, other platforms to follow.

CPU profiler continues to use SamplingCircularQueue, we will probably replace it with a single sample buffer when Mac and Win ports support profiling on the event processing thread.

When --prof option is specified profiling is initiated either on the profiler event processor thread if CPU profiler is on or on the SignalSender thread as it used to be if no CPU profiles are being collected.

ProfilerEventsProcessor::ProcessEventsAndDoSample now waits in a tight loop, processing collected samples until sampling interval expires. To save CPU resources I'm planning to change that to use nanosleep as only one sample is expected in the queue at any point.

BUG=v8:2814
R=bmeurer@chromium.org

Review URL: https://codereview.chromium.org/21101002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@16310 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yurys@chromium.org 2013-08-26 07:17:12 +00:00
parent bb19c48faa
commit 4db1f475ec
6 changed files with 115 additions and 23 deletions

View File

@ -43,10 +43,15 @@ namespace internal {
static const int kProfilerStackSize = 64 * KB;
ProfilerEventsProcessor::ProfilerEventsProcessor(ProfileGenerator* generator)
ProfilerEventsProcessor::ProfilerEventsProcessor(
ProfileGenerator* generator,
Sampler* sampler,
int period_in_useconds)
: Thread(Thread::Options("v8:ProfEvntProc", kProfilerStackSize)),
generator_(generator),
sampler_(sampler),
running_(true),
period_in_useconds_(period_in_useconds),
last_code_event_id_(0), last_processed_code_event_id_(0) {
}
@ -118,15 +123,39 @@ bool ProfilerEventsProcessor::ProcessTicks() {
}
void ProfilerEventsProcessor::Run() {
while (running_) {
// Process ticks until we have any.
void ProfilerEventsProcessor::ProcessEventsAndDoSample() {
int64_t stop_time = OS::Ticks() + period_in_useconds_;
// Keep processing existing events until we need to do next sample.
while (OS::Ticks() < stop_time) {
if (ProcessTicks()) {
// All ticks of the current last_processed_code_event_id_ are processed,
// All ticks of the current dequeue_order are processed,
// proceed to the next code event.
ProcessCodeEvent();
}
YieldCPU();
}
// Schedule next sample. sampler_ is NULL in tests.
if (sampler_) sampler_->DoSample();
}
void ProfilerEventsProcessor::ProcessEventsAndYield() {
// Process ticks until we have any.
if (ProcessTicks()) {
// All ticks of the current dequeue_order are processed,
// proceed to the next code event.
ProcessCodeEvent();
}
YieldCPU();
}
void ProfilerEventsProcessor::Run() {
while (running_) {
if (Sampler::CanSampleOnProfilerEventsProcessorThread()) {
ProcessEventsAndDoSample();
} else {
ProcessEventsAndYield();
}
}
// Process remaining tick events.
@ -403,7 +432,9 @@ void CpuProfiler::StartProcessorIfNotStarted() {
saved_logging_nesting_ = logger->logging_nesting_;
logger->logging_nesting_ = 0;
generator_ = new ProfileGenerator(profiles_);
processor_ = new ProfilerEventsProcessor(generator_);
Sampler* sampler = logger->sampler();
processor_ = new ProfilerEventsProcessor(
generator_, sampler, FLAG_cpu_profiler_sampling_interval);
is_profiling_ = true;
processor_->StartSynchronously();
// Enumerate stuff we already have in the heap.
@ -415,7 +446,9 @@ void CpuProfiler::StartProcessorIfNotStarted() {
logger->LogAccessorCallbacks();
LogBuiltins();
// Enable stack sampling.
Sampler* sampler = logger->sampler();
if (Sampler::CanSampleOnProfilerEventsProcessorThread()) {
sampler->SetHasProcessingThread(true);
}
sampler->IncreaseProfilingDepth();
if (!sampler->IsActive()) {
sampler->Start();
@ -453,16 +486,19 @@ void CpuProfiler::StopProcessor() {
Logger* logger = isolate_->logger();
Sampler* sampler = reinterpret_cast<Sampler*>(logger->ticker_);
sampler->DecreaseProfilingDepth();
if (need_to_stop_sampler_) {
sampler->Stop();
need_to_stop_sampler_ = false;
}
is_profiling_ = false;
processor_->StopSynchronously();
delete processor_;
delete generator_;
processor_ = NULL;
generator_ = NULL;
if (Sampler::CanSampleOnProfilerEventsProcessorThread()) {
sampler->SetHasProcessingThread(false);
}
if (need_to_stop_sampler_) {
sampler->Stop();
need_to_stop_sampler_ = false;
}
logger->logging_nesting_ = saved_logging_nesting_;
}

View File

@ -136,7 +136,9 @@ class CodeEventsContainer {
// methods called by event producers: VM and stack sampler threads.
class ProfilerEventsProcessor : public Thread {
public:
explicit ProfilerEventsProcessor(ProfileGenerator* generator);
ProfilerEventsProcessor(ProfileGenerator* generator,
Sampler* sampler,
int period_in_useconds);
virtual ~ProfilerEventsProcessor() {}
// Thread control.
@ -160,8 +162,14 @@ class ProfilerEventsProcessor : public Thread {
bool ProcessCodeEvent();
bool ProcessTicks();
void ProcessEventsAndDoSample();
void ProcessEventsAndYield();
ProfileGenerator* generator_;
Sampler* sampler_;
bool running_;
// Sampling period in microseconds.
const int period_in_useconds_;
UnboundQueue<CodeEventsContainer> events_buffer_;
static const size_t kTickSampleBufferSize = 1 * MB;
static const size_t kTickSampleQueueLength =

View File

@ -462,6 +462,18 @@ DEFINE_bool(compilation_cache, true, "enable compilation cache")
DEFINE_bool(cache_prototype_transitions, true, "cache prototype transitions")
// cpu-profiler.cc
#if defined(ANDROID)
// Phones and tablets have processors that are much slower than desktop
// and laptop computers for which current heuristics are tuned.
#define DEFAULT_INTERVAL 5000
#else
#define DEFAULT_INTERVAL 1000
#endif
DEFINE_int(cpu_profiler_sampling_interval, DEFAULT_INTERVAL,
"CPU profiler sampling interval in microseconds")
#undef DEFAULT_INTERVAL
// debug.cc
DEFINE_bool(trace_debug_json, false, "trace debugging JSON request/response")
DEFINE_bool(trace_js_array_abuse, false,

View File

@ -174,7 +174,8 @@ class PlatformDataCommon : public Malloced {
class Sampler::PlatformData : public PlatformDataCommon {
public:
PlatformData() : vm_tid_(pthread_self()) {}
pthread_t vm_tid() const { return vm_tid_; }
void SendProfilingSignal() const;
private:
pthread_t vm_tid_;
@ -492,9 +493,7 @@ class SamplerThread : public Thread {
#if defined(USE_SIGNALS)
void SampleContext(Sampler* sampler) {
if (!SignalHandler::Installed()) return;
pthread_t tid = sampler->platform_data()->vm_tid();
pthread_kill(tid, SIGPROF);
sampler->platform_data()->SendProfilingSignal();
}
#elif defined(__MACH__)
@ -607,6 +606,14 @@ Mutex* SamplerThread::mutex_ = NULL;
SamplerThread* SamplerThread::instance_ = NULL;
#if defined(USE_SIGNALS)
void Sampler::PlatformData::SendProfilingSignal() const {
if (!SignalHandler::Installed()) return;
pthread_kill(vm_tid_, SIGPROF);
}
#endif
//
// StackTracer implementation
//
@ -665,6 +672,7 @@ Sampler::Sampler(Isolate* isolate, int interval)
: isolate_(isolate),
interval_(interval),
profiling_(false),
has_processing_thread_(false),
active_(false),
is_counting_samples_(false),
js_and_external_sample_count_(0) {
@ -708,4 +716,20 @@ void Sampler::SampleStack(const RegisterState& state) {
}
}
bool Sampler::CanSampleOnProfilerEventsProcessorThread() {
#if defined(USE_SIGNALS)
return true;
#else
return false;
#endif
}
void Sampler::DoSample() {
#if defined(USE_SIGNALS)
platform_data()->SendProfilingSignal();
#endif
}
} } // namespace v8::internal

View File

@ -94,14 +94,25 @@ class Sampler {
void Start();
void Stop();
// Is the sampler used for profiling?
bool IsProfiling() const { return NoBarrier_Load(&profiling_) > 0; }
// Whether the sampling thread should use this Sampler for CPU profiling?
bool IsProfiling() const {
return NoBarrier_Load(&profiling_) > 0 &&
!NoBarrier_Load(&has_processing_thread_);
}
void IncreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, 1); }
void DecreaseProfilingDepth() { NoBarrier_AtomicIncrement(&profiling_, -1); }
// Whether the sampler is running (that is, consumes resources).
bool IsActive() const { return NoBarrier_Load(&active_); }
// If true next sample must be initiated on the profiler event processor
// thread right after latest sample is processed.
static bool CanSampleOnProfilerEventsProcessorThread();
void DoSample();
void SetHasProcessingThread(bool value) {
NoBarrier_Store(&has_processing_thread_, value);
}
// Used in tests to make sure that stack sampling is performed.
unsigned js_and_external_sample_count() const {
return js_and_external_sample_count_;
@ -125,6 +136,7 @@ class Sampler {
Isolate* isolate_;
const int interval_;
Atomic32 profiling_;
Atomic32 has_processing_thread_;
Atomic32 active_;
PlatformData* data_; // Platform specific data.
bool is_counting_samples_;

View File

@ -51,7 +51,7 @@ TEST(StartStop) {
CpuProfilesCollection profiles;
ProfileGenerator generator(&profiles);
SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator));
new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start();
processor->StopSynchronously();
}
@ -143,7 +143,7 @@ TEST(CodeEvents) {
profiles->StartProfiling("", 1, false);
ProfileGenerator generator(profiles);
SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator));
new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start();
CpuProfiler profiler(isolate, profiles, &generator, *processor);
@ -205,7 +205,7 @@ TEST(TickEvents) {
profiles->StartProfiling("", 1, false);
ProfileGenerator generator(profiles);
SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator));
new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start();
CpuProfiler profiler(isolate, profiles, &generator, *processor);
@ -274,7 +274,7 @@ TEST(Issue1398) {
profiles->StartProfiling("", 1, false);
ProfileGenerator generator(profiles);
SmartPointer<ProfilerEventsProcessor> processor(
new ProfilerEventsProcessor(&generator));
new ProfilerEventsProcessor(&generator, NULL, 100));
processor->Start();
CpuProfiler profiler(isolate, profiles, &generator, *processor);