CPU profiler: make code events handling scalable.

I changed the implementation of a queue between the VM and processor
thread to be unbounded and lock-free, using Herb Sutter's example from
DDJ article: http://www.ddj.com/high-performance-computing/210604448
This had brought back profiling overhead to a minimum for the page
from Chromium's issue 16184.

BUG=714

Review URL: http://codereview.chromium.org/2091019

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4706 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
mikhail.naganov@gmail.com 2010-05-22 05:27:19 +00:00
parent 2c48283185
commit decd0fed78
21 changed files with 285 additions and 123 deletions

View File

@ -34,54 +34,6 @@ namespace v8 {
namespace internal {
template<typename Record>
CircularQueue<Record>::CircularQueue(int desired_buffer_size_in_bytes)
: buffer_(NewArray<Record>(desired_buffer_size_in_bytes / sizeof(Record))),
buffer_end_(buffer_ + desired_buffer_size_in_bytes / sizeof(Record)),
enqueue_semaphore_(
OS::CreateSemaphore(static_cast<int>(buffer_end_ - buffer_) - 1)),
enqueue_pos_(buffer_),
dequeue_pos_(buffer_) {
// To be able to distinguish between a full and an empty queue
// state, the queue must be capable of containing at least 2
// records.
ASSERT((buffer_end_ - buffer_) >= 2);
}
template<typename Record>
CircularQueue<Record>::~CircularQueue() {
DeleteArray(buffer_);
delete enqueue_semaphore_;
}
template<typename Record>
void CircularQueue<Record>::Dequeue(Record* rec) {
ASSERT(!IsEmpty());
*rec = *dequeue_pos_;
dequeue_pos_ = Next(dequeue_pos_);
// Tell we have a spare record.
enqueue_semaphore_->Signal();
}
template<typename Record>
void CircularQueue<Record>::Enqueue(const Record& rec) {
// Wait until we have at least one spare record.
enqueue_semaphore_->Wait();
ASSERT(Next(enqueue_pos_) != dequeue_pos_);
*enqueue_pos_ = rec;
enqueue_pos_ = Next(enqueue_pos_);
}
template<typename Record>
Record* CircularQueue<Record>::Next(Record* curr) {
return ++curr != buffer_end_ ? curr : buffer_;
}
void* SamplingCircularQueue::Enqueue() {
WrapPositionIfNeeded(&producer_pos_->enqueue_pos);
void* result = producer_pos_->enqueue_pos;

View File

@ -32,32 +32,6 @@ namespace v8 {
namespace internal {
// Lock-based blocking circular queue for small records. Intended for
// transfer of small records between a single producer and a single
// consumer. Blocks on enqueue operation if the queue is full.
template<typename Record>
class CircularQueue {
public:
inline explicit CircularQueue(int desired_buffer_size_in_bytes);
inline ~CircularQueue();
INLINE(void Dequeue(Record* rec));
INLINE(void Enqueue(const Record& rec));
INLINE(bool IsEmpty()) { return enqueue_pos_ == dequeue_pos_; }
private:
INLINE(Record* Next(Record* curr));
Record* buffer_;
Record* const buffer_end_;
Semaphore* enqueue_semaphore_;
Record* enqueue_pos_;
Record* dequeue_pos_;
DISALLOW_COPY_AND_ASSIGN(CircularQueue);
};
// Lock-free cache-friendly sampling circular queue for large
// records. Intended for fast transfer of large records between a
// single producer and a single consumer. If the queue is full,

View File

@ -34,6 +34,7 @@
#include "circular-queue-inl.h"
#include "profile-generator-inl.h"
#include "unbound-queue-inl.h"
namespace v8 {
namespace internal {

View File

@ -46,7 +46,6 @@ static const int kTickSamplesBufferChunksCount = 16;
ProfilerEventsProcessor::ProfilerEventsProcessor(ProfileGenerator* generator)
: generator_(generator),
running_(false),
events_buffer_(kEventsBufferSize),
ticks_buffer_(sizeof(TickSampleEventRecord),
kTickSamplesBufferChunkSize,
kTickSamplesBufferChunksCount),

View File

@ -31,6 +31,7 @@
#ifdef ENABLE_LOGGING_AND_PROFILING
#include "circular-queue.h"
#include "unbound-queue.h"
namespace v8 {
namespace internal {
@ -181,7 +182,7 @@ class ProfilerEventsProcessor : public Thread {
ProfileGenerator* generator_;
bool running_;
CircularQueue<CodeEventsContainer> events_buffer_;
UnboundQueue<CodeEventsContainer> events_buffer_;
SamplingCircularQueue ticks_buffer_;
unsigned enqueue_order_;
};

View File

@ -165,6 +165,28 @@ int OS::ActivationFrameAlignment() {
}
#ifdef V8_TARGET_ARCH_ARM
// 0xffff0fa0 is the hard coded address of a function provided by
// the kernel which implements a memory barrier. On older
// ARM architecture revisions (pre-v6) this may be implemented using
// a syscall. This address is stable, and in active use (hard coded)
// by at least glibc-2.7 and the Android C library.
typedef void (*LinuxKernelMemoryBarrierFunc)(void);
LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
(LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
#endif
void OS::ReleaseStore(volatile AtomicWord* ptr, AtomicWord value) {
#ifdef V8_TARGET_ARCH_ARM
pLinuxKernelMemoryBarrier();
#else
__asm__ __volatile__("" : : : "memory");
// An x86 store acts as a release barrier.
#endif
*ptr = value;
}
const char* OS::LocalTimezone(double time) {
if (isnan(time)) return "";
time_t tv = static_cast<time_t>(floor(time/msPerSecond));

View File

@ -39,6 +39,7 @@
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <libkern/OSAtomic.h>
#include <mach/mach.h>
#include <mach/semaphore.h>
#include <mach/task.h>
@ -259,6 +260,12 @@ int OS::ActivationFrameAlignment() {
}
void OS::ReleaseStore(volatile AtomicWord* ptr, AtomicWord value) {
OSMemoryBarrier();
*ptr = value;
}
const char* OS::LocalTimezone(double time) {
if (isnan(time)) return "";
time_t tv = static_cast<time_t>(floor(time/msPerSecond));

View File

@ -1340,6 +1340,12 @@ int OS::ActivationFrameAlignment() {
}
void OS::ReleaseStore(volatile AtomicWord* ptr, AtomicWord value) {
MemoryBarrier();
*ptr = value;
}
bool VirtualMemory::IsReserved() {
return address_ != NULL;
}

View File

@ -277,6 +277,8 @@ class OS {
// the platform doesn't care. Guaranteed to be a power of two.
static int ActivationFrameAlignment();
static void ReleaseStore(volatile AtomicWord* ptr, AtomicWord value);
private:
static const int msPerSecond = 1000;

87
src/unbound-queue-inl.h Normal file
View File

@ -0,0 +1,87 @@
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_UNBOUND_QUEUE_INL_H_
#define V8_UNBOUND_QUEUE_INL_H_
#include "unbound-queue.h"
namespace v8 {
namespace internal {
template<typename Record>
struct UnboundQueue<Record>::Node: public Malloced {
explicit Node(const Record& value)
: value(value), next(NULL) {
}
Record value;
Node* next;
};
template<typename Record>
UnboundQueue<Record>::UnboundQueue() {
first_ = new Node(Record());
divider_ = last_ = reinterpret_cast<AtomicWord>(first_);
}
template<typename Record>
UnboundQueue<Record>::~UnboundQueue() {
while (first_ != NULL) DeleteFirst();
}
template<typename Record>
void UnboundQueue<Record>::DeleteFirst() {
Node* tmp = first_;
first_ = tmp->next;
delete tmp;
}
template<typename Record>
void UnboundQueue<Record>::Dequeue(Record* rec) {
ASSERT(divider_ != last_);
Node* next = reinterpret_cast<Node*>(divider_)->next;
*rec = next->value;
OS::ReleaseStore(&divider_, reinterpret_cast<AtomicWord>(next));
}
template<typename Record>
void UnboundQueue<Record>::Enqueue(const Record& rec) {
Node*& next = reinterpret_cast<Node*>(last_)->next;
next = new Node(rec);
OS::ReleaseStore(&last_, reinterpret_cast<AtomicWord>(next));
while (first_ != reinterpret_cast<Node*>(divider_)) DeleteFirst();
}
} } // namespace v8::internal
#endif // V8_UNBOUND_QUEUE_INL_H_

66
src/unbound-queue.h Normal file
View File

@ -0,0 +1,66 @@
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef V8_UNBOUND_QUEUE_
#define V8_UNBOUND_QUEUE_
namespace v8 {
namespace internal {
// Lock-free unbound queue for small records. Intended for
// transferring small records between a Single producer and a Single
// consumer. Doesn't have restrictions on the number of queued
// elements, so producer never blocks. Implemented after Herb
// Sutter's article:
// http://www.ddj.com/high-performance-computing/210604448
template<typename Record>
class UnboundQueue BASE_EMBEDDED {
public:
inline UnboundQueue();
inline ~UnboundQueue();
INLINE(void Dequeue(Record* rec));
INLINE(void Enqueue(const Record& rec));
INLINE(bool IsEmpty()) { return divider_ == last_; }
private:
INLINE(void DeleteFirst());
struct Node;
Node* first_;
AtomicWord divider_; // Node*
AtomicWord last_; // Node*
DISALLOW_COPY_AND_ASSIGN(UnboundQueue);
};
} } // namespace v8::internal
#endif // V8_UNBOUND_QUEUE_

View File

@ -149,10 +149,10 @@ void V8::TearDown() {
Top::TearDown();
CpuProfiler::TearDown();
Heap::TearDown();
CpuProfiler::TearDown();
Logger::TearDown();
is_running_ = false;

View File

@ -71,6 +71,7 @@ SOURCES = {
'test-strings.cc',
'test-threads.cc',
'test-thread-termination.cc',
'test-unbound-queue.cc',
'test-utils.cc',
'test-version.cc'
],

View File

@ -1,6 +1,6 @@
// Copyright 2010 the V8 project authors. All rights reserved.
//
// Tests of circular queues.
// Tests of the circular queue.
#include "v8.h"
#include "circular-queue-inl.h"
@ -8,53 +8,9 @@
namespace i = v8::internal;
using i::CircularQueue;
using i::SamplingCircularQueue;
TEST(SingleRecordCircularQueue) {
typedef int Record;
CircularQueue<Record> cq(sizeof(Record) * 2);
CHECK(cq.IsEmpty());
cq.Enqueue(1);
CHECK(!cq.IsEmpty());
Record rec = 0;
cq.Dequeue(&rec);
CHECK_EQ(1, rec);
CHECK(cq.IsEmpty());
}
TEST(MultipleRecordsCircularQueue) {
typedef int Record;
const int kQueueSize = 10;
CircularQueue<Record> cq(sizeof(Record) * (kQueueSize + 1));
CHECK(cq.IsEmpty());
cq.Enqueue(1);
CHECK(!cq.IsEmpty());
for (int i = 2; i <= 5; ++i) {
cq.Enqueue(i);
CHECK(!cq.IsEmpty());
}
Record rec = 0;
for (int i = 1; i <= 4; ++i) {
CHECK(!cq.IsEmpty());
cq.Dequeue(&rec);
CHECK_EQ(i, rec);
}
for (int i = 6; i <= 12; ++i) {
cq.Enqueue(i);
CHECK(!cq.IsEmpty());
}
for (int i = 5; i <= 12; ++i) {
CHECK(!cq.IsEmpty());
cq.Dequeue(&rec);
CHECK_EQ(i, rec);
}
CHECK(cq.IsEmpty());
}
TEST(SamplingCircularQueue) {
typedef SamplingCircularQueue::Cell Record;
const int kRecordsPerChunk = 4;

View File

@ -0,0 +1,54 @@
// Copyright 2010 the V8 project authors. All rights reserved.
//
// Tests of the unbound queue.
#include "v8.h"
#include "unbound-queue-inl.h"
#include "cctest.h"
namespace i = v8::internal;
using i::UnboundQueue;
TEST(SingleRecord) {
typedef int Record;
UnboundQueue<Record> cq;
CHECK(cq.IsEmpty());
cq.Enqueue(1);
CHECK(!cq.IsEmpty());
Record rec = 0;
cq.Dequeue(&rec);
CHECK_EQ(1, rec);
CHECK(cq.IsEmpty());
}
TEST(MultipleRecords) {
typedef int Record;
UnboundQueue<Record> cq;
CHECK(cq.IsEmpty());
cq.Enqueue(1);
CHECK(!cq.IsEmpty());
for (int i = 2; i <= 5; ++i) {
cq.Enqueue(i);
CHECK(!cq.IsEmpty());
}
Record rec = 0;
for (int i = 1; i <= 4; ++i) {
CHECK(!cq.IsEmpty());
cq.Dequeue(&rec);
CHECK_EQ(i, rec);
}
for (int i = 6; i <= 12; ++i) {
cq.Enqueue(i);
CHECK(!cq.IsEmpty());
}
for (int i = 5; i <= 12; ++i) {
CHECK(!cq.IsEmpty());
cq.Dequeue(&rec);
CHECK_EQ(i, rec);
}
CHECK(cq.IsEmpty());
}

View File

@ -412,6 +412,8 @@
'../../src/top.h',
'../../src/type-info.cc',
'../../src/type-info.h',
'../../src/unbound-queue-inl.h',
'../../src/unbound-queue.h',
'../../src/unicode-inl.h',
'../../src/unicode.cc',
'../../src/unicode.h',

View File

@ -627,6 +627,8 @@
9FBE03E410BD412600F8BFBA /* fast-codegen-arm.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "fast-codegen-arm.cc"; path = "arm/fast-codegen-arm.cc"; sourceTree = "<group>"; };
9FC86ABB0F5FEDAC00F22668 /* oprofile-agent.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "oprofile-agent.cc"; sourceTree = "<group>"; };
9FC86ABC0F5FEDAC00F22668 /* oprofile-agent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "oprofile-agent.h"; sourceTree = "<group>"; };
9FF7A28211A642EA0051B8F2 /* unbound-queue-inl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "unbound-queue-inl.h"; sourceTree = "<group>"; };
9FF7A28311A642EA0051B8F2 /* unbound-queue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "unbound-queue.h"; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -970,6 +972,8 @@
897FF1910E719B8F00D62E90 /* top.h */,
9FA38BAE1175B2D200C4CD55 /* type-info.cc */,
9FA38BAF1175B2D200C4CD55 /* type-info.h */,
9FF7A28211A642EA0051B8F2 /* unbound-queue-inl.h */,
9FF7A28311A642EA0051B8F2 /* unbound-queue.h */,
897FF1920E719B8F00D62E90 /* unicode-inl.h */,
897FF1930E719B8F00D62E90 /* unicode.cc */,
897FF1940E719B8F00D62E90 /* unicode.h */,

View File

@ -960,6 +960,14 @@
RelativePath="..\..\src\type-info.h"
>
</File>
<File
RelativePath="..\..\src\unbound-queue-inl.h"
>
</File>
<File
RelativePath="..\..\src\unbound-queue.h"
>
</File>
<File
RelativePath="..\..\src\unicode-inl.h"
>

View File

@ -952,6 +952,14 @@
RelativePath="..\..\src\type-info.h"
>
</File>
<File
RelativePath="..\..\src\unbound-queue-inl.h"
>
</File>
<File
RelativePath="..\..\src\unbound-queue.h"
>
</File>
<File
RelativePath="..\..\src\unicode-inl.h"
>

View File

@ -937,6 +937,14 @@
RelativePath="..\..\src\type-info.h"
>
</File>
<File
RelativePath="..\..\src\unbound-queue-inl.h"
>
</File>
<File
RelativePath="..\..\src\unbound-queue.h"
>
</File>
<File
RelativePath="..\..\src\unicode-inl.h"
>

View File

@ -247,6 +247,10 @@
RelativePath="..\..\test\cctest\test-strings.cc"
>
</File>
<File
RelativePath="..\..\test\cctest\test-unbound-queue.cc"
>
</File>
<File
RelativePath="..\..\test\cctest\test-utils.cc"
>