Improve queuing for concurrent OSR.
Specifically, this includes: - Encapsulating data structure for osr buffer into CyclicBuffer - Use the new CyclicQueue instead of UnboundedQueue to queue new jobs. We can enqueue and dequeue a CyclicQueue on both ends in O(1). This allows us to add OSR jobs to the front for lower compile latency. - Dispose osr buffer by one stale job per GC to avoid leak R=titzer@chromium.org BUG= Review URL: https://codereview.chromium.org/25505002 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17244 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
0a6d2dec9f
commit
b8dd056fa2
@ -450,6 +450,10 @@ void Heap::GarbageCollectionPrologue() {
|
|||||||
#endif // DEBUG
|
#endif // DEBUG
|
||||||
|
|
||||||
store_buffer()->GCPrologue();
|
store_buffer()->GCPrologue();
|
||||||
|
|
||||||
|
if (FLAG_concurrent_osr) {
|
||||||
|
isolate()->optimizing_compiler_thread()->AgeBufferedOsrJobs();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,12 +93,20 @@ void OptimizingCompilerThread::Run() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
RecompileJob* OptimizingCompilerThread::NextInput() {
|
||||||
|
LockGuard<Mutex> access_input_queue_(&input_queue_mutex_);
|
||||||
|
if (input_queue_length_ == 0) return NULL;
|
||||||
|
RecompileJob* job = input_queue_[InputQueueIndex(0)];
|
||||||
|
ASSERT_NE(NULL, job);
|
||||||
|
input_queue_shift_ = InputQueueIndex(1);
|
||||||
|
input_queue_length_--;
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void OptimizingCompilerThread::CompileNext() {
|
void OptimizingCompilerThread::CompileNext() {
|
||||||
RecompileJob* job = NULL;
|
RecompileJob* job = NextInput();
|
||||||
bool result = input_queue_.Dequeue(&job);
|
ASSERT_NE(NULL, job);
|
||||||
USE(result);
|
|
||||||
ASSERT(result);
|
|
||||||
Barrier_AtomicIncrement(&queue_length_, static_cast<Atomic32>(-1));
|
|
||||||
|
|
||||||
// The function may have already been optimized by OSR. Simply continue.
|
// The function may have already been optimized by OSR. Simply continue.
|
||||||
RecompileJob::Status status = job->OptimizeGraph();
|
RecompileJob::Status status = job->OptimizeGraph();
|
||||||
@ -131,7 +139,7 @@ static void DisposeRecompileJob(RecompileJob* job,
|
|||||||
|
|
||||||
void OptimizingCompilerThread::FlushInputQueue(bool restore_function_code) {
|
void OptimizingCompilerThread::FlushInputQueue(bool restore_function_code) {
|
||||||
RecompileJob* job;
|
RecompileJob* job;
|
||||||
while (input_queue_.Dequeue(&job)) {
|
while ((job = NextInput())) {
|
||||||
// This should not block, since we have one signal on the input queue
|
// This should not block, since we have one signal on the input queue
|
||||||
// semaphore corresponding to each element in the input queue.
|
// semaphore corresponding to each element in the input queue.
|
||||||
input_queue_semaphore_.Wait();
|
input_queue_semaphore_.Wait();
|
||||||
@ -140,7 +148,6 @@ void OptimizingCompilerThread::FlushInputQueue(bool restore_function_code) {
|
|||||||
DisposeRecompileJob(job, restore_function_code);
|
DisposeRecompileJob(job, restore_function_code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Release_Store(&queue_length_, static_cast<AtomicWord>(0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -156,12 +163,12 @@ void OptimizingCompilerThread::FlushOutputQueue(bool restore_function_code) {
|
|||||||
|
|
||||||
|
|
||||||
void OptimizingCompilerThread::FlushOsrBuffer(bool restore_function_code) {
|
void OptimizingCompilerThread::FlushOsrBuffer(bool restore_function_code) {
|
||||||
RecompileJob* job;
|
for (int i = 0; i < osr_buffer_capacity_; i++) {
|
||||||
for (int i = 0; i < osr_buffer_size_; i++) {
|
if (osr_buffer_[i] != NULL) {
|
||||||
job = osr_buffer_[i];
|
DisposeRecompileJob(osr_buffer_[i], restore_function_code);
|
||||||
if (job != NULL) DisposeRecompileJob(job, restore_function_code);
|
osr_buffer_[i] = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
osr_cursor_ = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -187,10 +194,9 @@ void OptimizingCompilerThread::Stop() {
|
|||||||
stop_semaphore_.Wait();
|
stop_semaphore_.Wait();
|
||||||
|
|
||||||
if (FLAG_concurrent_recompilation_delay != 0) {
|
if (FLAG_concurrent_recompilation_delay != 0) {
|
||||||
// Barrier when loading queue length is not necessary since the write
|
// At this point the optimizing compiler thread's event loop has stopped.
|
||||||
// happens in CompileNext on the same thread.
|
// There is no need for a mutex when reading input_queue_length_.
|
||||||
// This is used only for testing.
|
while (input_queue_length_ > 0) CompileNext();
|
||||||
while (NoBarrier_Load(&queue_length_) > 0) CompileNext();
|
|
||||||
InstallOptimizedFunctions();
|
InstallOptimizedFunctions();
|
||||||
} else {
|
} else {
|
||||||
FlushInputQueue(false);
|
FlushInputQueue(false);
|
||||||
@ -239,7 +245,6 @@ void OptimizingCompilerThread::InstallOptimizedFunctions() {
|
|||||||
void OptimizingCompilerThread::QueueForOptimization(RecompileJob* job) {
|
void OptimizingCompilerThread::QueueForOptimization(RecompileJob* job) {
|
||||||
ASSERT(IsQueueAvailable());
|
ASSERT(IsQueueAvailable());
|
||||||
ASSERT(!IsOptimizerThread());
|
ASSERT(!IsOptimizerThread());
|
||||||
Barrier_AtomicIncrement(&queue_length_, static_cast<Atomic32>(1));
|
|
||||||
CompilationInfo* info = job->info();
|
CompilationInfo* info = job->info();
|
||||||
if (info->is_osr()) {
|
if (info->is_osr()) {
|
||||||
if (FLAG_trace_concurrent_recompilation) {
|
if (FLAG_trace_concurrent_recompilation) {
|
||||||
@ -247,13 +252,24 @@ void OptimizingCompilerThread::QueueForOptimization(RecompileJob* job) {
|
|||||||
info->closure()->PrintName();
|
info->closure()->PrintName();
|
||||||
PrintF(" for concurrent on-stack replacement.\n");
|
PrintF(" for concurrent on-stack replacement.\n");
|
||||||
}
|
}
|
||||||
AddToOsrBuffer(job);
|
|
||||||
osr_attempts_++;
|
osr_attempts_++;
|
||||||
BackEdgeTable::AddStackCheck(info);
|
BackEdgeTable::AddStackCheck(info);
|
||||||
|
AddToOsrBuffer(job);
|
||||||
|
// Add job to the front of the input queue.
|
||||||
|
LockGuard<Mutex> access_input_queue(&input_queue_mutex_);
|
||||||
|
ASSERT_LT(input_queue_length_, input_queue_capacity_);
|
||||||
|
// Move shift_ back by one.
|
||||||
|
input_queue_shift_ = InputQueueIndex(input_queue_capacity_ - 1);
|
||||||
|
input_queue_[InputQueueIndex(0)] = job;
|
||||||
|
input_queue_length_++;
|
||||||
} else {
|
} else {
|
||||||
info->closure()->MarkInRecompileQueue();
|
info->closure()->MarkInRecompileQueue();
|
||||||
|
// Add job to the back of the input queue.
|
||||||
|
LockGuard<Mutex> access_input_queue(&input_queue_mutex_);
|
||||||
|
ASSERT_LT(input_queue_length_, input_queue_capacity_);
|
||||||
|
input_queue_[InputQueueIndex(input_queue_length_)] = job;
|
||||||
|
input_queue_length_++;
|
||||||
}
|
}
|
||||||
input_queue_.Enqueue(job);
|
|
||||||
if (FLAG_block_concurrent_recompilation) {
|
if (FLAG_block_concurrent_recompilation) {
|
||||||
blocked_jobs_++;
|
blocked_jobs_++;
|
||||||
} else {
|
} else {
|
||||||
@ -274,15 +290,14 @@ void OptimizingCompilerThread::Unblock() {
|
|||||||
RecompileJob* OptimizingCompilerThread::FindReadyOSRCandidate(
|
RecompileJob* OptimizingCompilerThread::FindReadyOSRCandidate(
|
||||||
Handle<JSFunction> function, uint32_t osr_pc_offset) {
|
Handle<JSFunction> function, uint32_t osr_pc_offset) {
|
||||||
ASSERT(!IsOptimizerThread());
|
ASSERT(!IsOptimizerThread());
|
||||||
RecompileJob* result = NULL;
|
for (int i = 0; i < osr_buffer_capacity_; i++) {
|
||||||
for (int i = 0; i < osr_buffer_size_; i++) {
|
RecompileJob* current = osr_buffer_[i];
|
||||||
result = osr_buffer_[i];
|
if (current != NULL &&
|
||||||
if (result == NULL) continue;
|
current->IsWaitingForInstall() &&
|
||||||
if (result->IsWaitingForInstall() &&
|
current->info()->HasSameOsrEntry(function, osr_pc_offset)) {
|
||||||
result->info()->HasSameOsrEntry(function, osr_pc_offset)) {
|
|
||||||
osr_hits_++;
|
osr_hits_++;
|
||||||
osr_buffer_[i] = NULL;
|
osr_buffer_[i] = NULL;
|
||||||
return result;
|
return current;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -292,10 +307,11 @@ RecompileJob* OptimizingCompilerThread::FindReadyOSRCandidate(
|
|||||||
bool OptimizingCompilerThread::IsQueuedForOSR(Handle<JSFunction> function,
|
bool OptimizingCompilerThread::IsQueuedForOSR(Handle<JSFunction> function,
|
||||||
uint32_t osr_pc_offset) {
|
uint32_t osr_pc_offset) {
|
||||||
ASSERT(!IsOptimizerThread());
|
ASSERT(!IsOptimizerThread());
|
||||||
for (int i = 0; i < osr_buffer_size_; i++) {
|
for (int i = 0; i < osr_buffer_capacity_; i++) {
|
||||||
if (osr_buffer_[i] != NULL &&
|
RecompileJob* current = osr_buffer_[i];
|
||||||
osr_buffer_[i]->info()->HasSameOsrEntry(function, osr_pc_offset)) {
|
if (current != NULL &&
|
||||||
return !osr_buffer_[i]->IsWaitingForInstall();
|
current->info()->HasSameOsrEntry(function, osr_pc_offset)) {
|
||||||
|
return !current->IsWaitingForInstall();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -304,10 +320,10 @@ bool OptimizingCompilerThread::IsQueuedForOSR(Handle<JSFunction> function,
|
|||||||
|
|
||||||
bool OptimizingCompilerThread::IsQueuedForOSR(JSFunction* function) {
|
bool OptimizingCompilerThread::IsQueuedForOSR(JSFunction* function) {
|
||||||
ASSERT(!IsOptimizerThread());
|
ASSERT(!IsOptimizerThread());
|
||||||
for (int i = 0; i < osr_buffer_size_; i++) {
|
for (int i = 0; i < osr_buffer_capacity_; i++) {
|
||||||
if (osr_buffer_[i] != NULL &&
|
RecompileJob* current = osr_buffer_[i];
|
||||||
*osr_buffer_[i]->info()->closure() == function) {
|
if (current != NULL && *current->info()->closure() == function) {
|
||||||
return !osr_buffer_[i]->IsWaitingForInstall();
|
return !current->IsWaitingForInstall();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -316,27 +332,27 @@ bool OptimizingCompilerThread::IsQueuedForOSR(JSFunction* function) {
|
|||||||
|
|
||||||
void OptimizingCompilerThread::AddToOsrBuffer(RecompileJob* job) {
|
void OptimizingCompilerThread::AddToOsrBuffer(RecompileJob* job) {
|
||||||
ASSERT(!IsOptimizerThread());
|
ASSERT(!IsOptimizerThread());
|
||||||
// Store into next empty slot or replace next stale OSR job that's waiting
|
// Find the next slot that is empty or has a stale job.
|
||||||
// in vain. Dispose in the latter case.
|
|
||||||
RecompileJob* stale;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
stale = osr_buffer_[osr_cursor_];
|
RecompileJob* stale = osr_buffer_[osr_buffer_cursor_];
|
||||||
if (stale == NULL) break;
|
if (stale == NULL || stale->IsWaitingForInstall()) break;
|
||||||
if (stale->IsWaitingForInstall()) {
|
osr_buffer_cursor_ = (osr_buffer_cursor_ + 1) % osr_buffer_capacity_;
|
||||||
CompilationInfo* info = stale->info();
|
|
||||||
if (FLAG_trace_osr) {
|
|
||||||
PrintF("[COSR - Discarded ");
|
|
||||||
info->closure()->PrintName();
|
|
||||||
PrintF(", AST id %d]\n", info->osr_ast_id().ToInt());
|
|
||||||
}
|
|
||||||
DisposeRecompileJob(stale, false);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
AdvanceOsrCursor();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
osr_buffer_[osr_cursor_] = job;
|
// Add to found slot and dispose the evicted job.
|
||||||
AdvanceOsrCursor();
|
RecompileJob* evicted = osr_buffer_[osr_buffer_cursor_];
|
||||||
|
if (evicted != NULL) {
|
||||||
|
ASSERT(evicted->IsWaitingForInstall());
|
||||||
|
CompilationInfo* info = evicted->info();
|
||||||
|
if (FLAG_trace_osr) {
|
||||||
|
PrintF("[COSR - Discarded ");
|
||||||
|
info->closure()->PrintName();
|
||||||
|
PrintF(", AST id %d]\n", info->osr_ast_id().ToInt());
|
||||||
|
}
|
||||||
|
DisposeRecompileJob(evicted, false);
|
||||||
|
}
|
||||||
|
osr_buffer_[osr_buffer_cursor_] = job;
|
||||||
|
osr_buffer_cursor_ = (osr_buffer_cursor_ + 1) % osr_buffer_capacity_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,21 +53,29 @@ class OptimizingCompilerThread : public Thread {
|
|||||||
isolate_(isolate),
|
isolate_(isolate),
|
||||||
stop_semaphore_(0),
|
stop_semaphore_(0),
|
||||||
input_queue_semaphore_(0),
|
input_queue_semaphore_(0),
|
||||||
osr_cursor_(0),
|
input_queue_capacity_(FLAG_concurrent_recompilation_queue_length),
|
||||||
|
input_queue_length_(0),
|
||||||
|
input_queue_shift_(0),
|
||||||
|
osr_buffer_capacity_(FLAG_concurrent_recompilation_queue_length + 4),
|
||||||
|
osr_buffer_cursor_(0),
|
||||||
osr_hits_(0),
|
osr_hits_(0),
|
||||||
osr_attempts_(0),
|
osr_attempts_(0),
|
||||||
blocked_jobs_(0) {
|
blocked_jobs_(0) {
|
||||||
NoBarrier_Store(&stop_thread_, static_cast<AtomicWord>(CONTINUE));
|
NoBarrier_Store(&stop_thread_, static_cast<AtomicWord>(CONTINUE));
|
||||||
NoBarrier_Store(&queue_length_, static_cast<AtomicWord>(0));
|
input_queue_ = NewArray<RecompileJob*>(input_queue_capacity_);
|
||||||
if (FLAG_concurrent_osr) {
|
osr_buffer_ = NewArray<RecompileJob*>(osr_buffer_capacity_);
|
||||||
osr_buffer_size_ = FLAG_concurrent_recompilation_queue_length + 4;
|
// Mark OSR buffer slots as empty.
|
||||||
osr_buffer_ = NewArray<RecompileJob*>(osr_buffer_size_);
|
for (int i = 0; i < osr_buffer_capacity_; i++) osr_buffer_[i] = NULL;
|
||||||
for (int i = 0; i < osr_buffer_size_; i++) osr_buffer_[i] = NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
~OptimizingCompilerThread() {
|
~OptimizingCompilerThread() {
|
||||||
if (FLAG_concurrent_osr) DeleteArray(osr_buffer_);
|
ASSERT_EQ(0, input_queue_length_);
|
||||||
|
#ifdef DEBUG
|
||||||
|
for (int i = 0; i < osr_buffer_capacity_; i++) {
|
||||||
|
CHECK_EQ(NULL, osr_buffer_[i]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
DeleteArray(osr_buffer_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Run();
|
void Run();
|
||||||
@ -83,17 +91,15 @@ class OptimizingCompilerThread : public Thread {
|
|||||||
bool IsQueuedForOSR(JSFunction* function);
|
bool IsQueuedForOSR(JSFunction* function);
|
||||||
|
|
||||||
inline bool IsQueueAvailable() {
|
inline bool IsQueueAvailable() {
|
||||||
// We don't need a barrier since we have a data dependency right
|
LockGuard<Mutex> access_input_queue(&input_queue_mutex_);
|
||||||
// after.
|
return input_queue_length_ < input_queue_capacity_;
|
||||||
Atomic32 current_length = NoBarrier_Load(&queue_length_);
|
}
|
||||||
|
|
||||||
// This can be queried only from the execution thread.
|
inline void AgeBufferedOsrJobs() {
|
||||||
ASSERT(!IsOptimizerThread());
|
// Advance cursor of the cyclic buffer to next empty slot or stale OSR job.
|
||||||
// Since only the execution thread increments queue_length_ and
|
// Dispose said OSR job in the latter case. Calling this on every GC
|
||||||
// only one thread can run inside an Isolate at one time, a direct
|
// should make sure that we do not hold onto stale jobs indefinitely.
|
||||||
// doesn't introduce a race -- queue_length_ may decreased in
|
AddToOsrBuffer(NULL);
|
||||||
// meantime, but not increased.
|
|
||||||
return (current_length < FLAG_concurrent_recompilation_queue_length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -107,12 +113,17 @@ class OptimizingCompilerThread : public Thread {
|
|||||||
void FlushOutputQueue(bool restore_function_code);
|
void FlushOutputQueue(bool restore_function_code);
|
||||||
void FlushOsrBuffer(bool restore_function_code);
|
void FlushOsrBuffer(bool restore_function_code);
|
||||||
void CompileNext();
|
void CompileNext();
|
||||||
|
RecompileJob* NextInput();
|
||||||
|
|
||||||
// Add a recompilation task for OSR to the cyclic buffer, awaiting OSR entry.
|
// Add a recompilation task for OSR to the cyclic buffer, awaiting OSR entry.
|
||||||
// Tasks evicted from the cyclic buffer are discarded.
|
// Tasks evicted from the cyclic buffer are discarded.
|
||||||
void AddToOsrBuffer(RecompileJob* compiler);
|
void AddToOsrBuffer(RecompileJob* compiler);
|
||||||
void AdvanceOsrCursor() {
|
|
||||||
osr_cursor_ = (osr_cursor_ + 1) % osr_buffer_size_;
|
inline int InputQueueIndex(int i) {
|
||||||
|
int result = (i + input_queue_shift_) % input_queue_capacity_;
|
||||||
|
ASSERT_LE(0, result);
|
||||||
|
ASSERT_LT(result, input_queue_capacity_);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -124,20 +135,22 @@ class OptimizingCompilerThread : public Thread {
|
|||||||
Semaphore stop_semaphore_;
|
Semaphore stop_semaphore_;
|
||||||
Semaphore input_queue_semaphore_;
|
Semaphore input_queue_semaphore_;
|
||||||
|
|
||||||
// Queue of incoming recompilation tasks (including OSR).
|
// Circular queue of incoming recompilation tasks (including OSR).
|
||||||
UnboundQueue<RecompileJob*> input_queue_;
|
RecompileJob** input_queue_;
|
||||||
|
int input_queue_capacity_;
|
||||||
|
int input_queue_length_;
|
||||||
|
int input_queue_shift_;
|
||||||
|
Mutex input_queue_mutex_;
|
||||||
|
|
||||||
// Queue of recompilation tasks ready to be installed (excluding OSR).
|
// Queue of recompilation tasks ready to be installed (excluding OSR).
|
||||||
UnboundQueue<RecompileJob*> output_queue_;
|
UnboundQueue<RecompileJob*> output_queue_;
|
||||||
|
|
||||||
// Cyclic buffer of recompilation tasks for OSR.
|
// Cyclic buffer of recompilation tasks for OSR.
|
||||||
// TODO(yangguo): This may keep zombie tasks indefinitely, holding on to
|
|
||||||
// a lot of memory. Fix this.
|
|
||||||
RecompileJob** osr_buffer_;
|
RecompileJob** osr_buffer_;
|
||||||
// Cursor for the cyclic buffer.
|
int osr_buffer_capacity_;
|
||||||
int osr_cursor_;
|
int osr_buffer_cursor_;
|
||||||
int osr_buffer_size_;
|
|
||||||
|
|
||||||
volatile AtomicWord stop_thread_;
|
volatile AtomicWord stop_thread_;
|
||||||
volatile Atomic32 queue_length_;
|
|
||||||
TimeDelta time_spent_compiling_;
|
TimeDelta time_spent_compiling_;
|
||||||
TimeDelta time_spent_total_;
|
TimeDelta time_spent_total_;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user