From 4ee49181959795c83bcef4358e7b421ed4185092 Mon Sep 17 00:00:00 2001 From: Bill Budge Date: Mon, 19 Jun 2017 23:06:35 +0000 Subject: [PATCH] Revert "[wasm] Throttle the amount of unfinished work to avoid OOM" This reverts commit 1280954d3a2f1775e77964c1ef530b246839383a. Reason for revert: Speculative, GC stress bots started taking much longer after this change. Original change's description: > [wasm] Throttle the amount of unfinished work to avoid OOM > > It is possible that the foreground task is unable to clear the > scheduled unfinished work, eventually leading to an OOM. > > We use either code_range on 64 bit, or the capacity of the code space, > as a heuristic for how much memory to use for compilation. > > Bug: v8:6492, chromium:732010 > Change-Id: I1e4c0825351a42fa0b8369ccc41800ac3445563d > Reviewed-on: https://chromium-review.googlesource.com/535017 > Commit-Queue: Brad Nelson > Reviewed-by: Brad Nelson > Cr-Commit-Position: refs/heads/master@{#46017} TBR=bradnelson@chromium.org,mtrofin@chromium.org,ahaas@chromium.org Change-Id: I8883cee7f77667530bc50f91bfb468c485e6f7f2 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: v8:6492, chromium:732010 Reviewed-on: https://chromium-review.googlesource.com/540270 Reviewed-by: Bill Budge Commit-Queue: Bill Budge Cr-Commit-Position: refs/heads/master@{#46020} --- src/compiler.h | 1 - src/compiler/pipeline.cc | 6 -- src/compiler/wasm-compiler.cc | 6 -- src/compiler/wasm-compiler.h | 5 +- src/compiler/zone-stats.cc | 6 +- src/compiler/zone-stats.h | 6 +- src/wasm/module-compiler.cc | 116 +++++++--------------------------- src/wasm/module-compiler.h | 18 +----- 8 files changed, 32 insertions(+), 132 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index de1ffbcc0d..b51b272bfc 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -208,7 +208,6 @@ class V8_EXPORT_PRIVATE CompilationJob { State state() const { return state_; } CompilationInfo* info() const { return info_; } Isolate* isolate() const; - virtual size_t AllocatedMemory() const { return 0; } protected: // Overridden by the actual implementation. diff --git a/src/compiler/pipeline.cc b/src/compiler/pipeline.cc index 0802744aa0..306479c1c1 100644 --- a/src/compiler/pipeline.cc +++ b/src/compiler/pipeline.cc @@ -690,8 +690,6 @@ class PipelineWasmCompilationJob final : public CompilationJob { Status FinalizeJobImpl() final; private: - size_t AllocatedMemory() const override; - ZoneStats zone_stats_; std::unique_ptr pipeline_statistics_; PipelineData data_; @@ -738,10 +736,6 @@ PipelineWasmCompilationJob::ExecuteJobImpl() { return SUCCEEDED; } -size_t PipelineWasmCompilationJob::AllocatedMemory() const { - return pipeline_.data_->zone_stats()->GetCurrentAllocatedBytes(); -} - PipelineWasmCompilationJob::Status PipelineWasmCompilationJob::FinalizeJobImpl() { pipeline_.AssembleCode(&linkage_); diff --git a/src/compiler/wasm-compiler.cc b/src/compiler/wasm-compiler.cc index 23dfb88af2..8b841a8ef9 100644 --- a/src/compiler/wasm-compiler.cc +++ b/src/compiler/wasm-compiler.cc @@ -3975,12 +3975,6 @@ void WasmCompilationUnit::ExecuteCompilation() { isolate_->counters()->wasm_compile_function_time()); } ExecuteCompilationInternal(); - // Record the memory cost this unit places on the system until - // it is finalized. That may be "0" in error cases. - if (job_) { - size_t cost = job_->AllocatedMemory(); - set_memory_cost(cost); - } } void WasmCompilationUnit::ExecuteCompilationInternal() { diff --git a/src/compiler/wasm-compiler.h b/src/compiler/wasm-compiler.h index 47b65387e5..afda0156f9 100644 --- a/src/compiler/wasm-compiler.h +++ b/src/compiler/wasm-compiler.h @@ -64,9 +64,6 @@ class WasmCompilationUnit final { wasm::ModuleBytesEnv* module_env, const wasm::WasmFunction* function); - void set_memory_cost(size_t memory_cost) { memory_cost_ = memory_cost; } - size_t memory_cost() const { return memory_cost_; } - private: SourcePositionTable* BuildGraphForWasmFunction(double* decode_ms); @@ -88,7 +85,7 @@ class WasmCompilationUnit final { int func_index_; wasm::Result graph_construction_result_; bool ok_ = true; - size_t memory_cost_ = 0; + void ExecuteCompilationInternal(); DISALLOW_COPY_AND_ASSIGN(WasmCompilationUnit); diff --git a/src/compiler/zone-stats.cc b/src/compiler/zone-stats.cc index 626ad4072c..8942df5555 100644 --- a/src/compiler/zone-stats.cc +++ b/src/compiler/zone-stats.cc @@ -68,11 +68,11 @@ ZoneStats::~ZoneStats() { DCHECK(stats_.empty()); } -size_t ZoneStats::GetMaxAllocatedBytes() const { +size_t ZoneStats::GetMaxAllocatedBytes() { return std::max(max_allocated_bytes_, GetCurrentAllocatedBytes()); } -size_t ZoneStats::GetCurrentAllocatedBytes() const { +size_t ZoneStats::GetCurrentAllocatedBytes() { size_t total = 0; for (Zone* zone : zones_) { total += static_cast(zone->allocation_size()); @@ -80,7 +80,7 @@ size_t ZoneStats::GetCurrentAllocatedBytes() const { return total; } -size_t ZoneStats::GetTotalAllocatedBytes() const { +size_t ZoneStats::GetTotalAllocatedBytes() { return total_deleted_bytes_ + GetCurrentAllocatedBytes(); } diff --git a/src/compiler/zone-stats.h b/src/compiler/zone-stats.h index 6e0cd5fe4e..39adca3693 100644 --- a/src/compiler/zone-stats.h +++ b/src/compiler/zone-stats.h @@ -66,9 +66,9 @@ class V8_EXPORT_PRIVATE ZoneStats final { explicit ZoneStats(AccountingAllocator* allocator); ~ZoneStats(); - size_t GetMaxAllocatedBytes() const; - size_t GetTotalAllocatedBytes() const; - size_t GetCurrentAllocatedBytes() const; + size_t GetMaxAllocatedBytes(); + size_t GetTotalAllocatedBytes(); + size_t GetCurrentAllocatedBytes(); private: Zone* NewEmptyZone(const char* zone_name); diff --git a/src/wasm/module-compiler.cc b/src/wasm/module-compiler.cc index 64ca5dfb17..ad7d552765 100644 --- a/src/wasm/module-compiler.cc +++ b/src/wasm/module-compiler.cc @@ -39,31 +39,14 @@ namespace internal { namespace wasm { ModuleCompiler::CodeGenerationSchedule::CodeGenerationSchedule( - base::RandomNumberGenerator* random_number_generator, size_t max_memory) - : random_number_generator_(random_number_generator), - max_memory_(max_memory) { + base::RandomNumberGenerator* random_number_generator) + : random_number_generator_(random_number_generator) { DCHECK_NOT_NULL(random_number_generator_); - DCHECK_GT(max_memory_, 0); } void ModuleCompiler::CodeGenerationSchedule::Schedule( std::unique_ptr&& item) { - size_t cost = item->memory_cost(); schedule_.push_back(std::move(item)); - allocated_memory_.Increment(cost); -} - -void ModuleCompiler::CodeGenerationSchedule::WaitUntilCanWork() { - if (!throttle_ || allocated_memory_.Value() <= max_memory_) return; - - base::LockGuard guard(&accept_work_mutex_); - while (throttle_ && allocated_memory_.Value() > max_memory_) { - accept_work_.Wait(&accept_work_mutex_); - } - // We waited for the used memory to drop, and maybe other threads were, too. - // Notify another one. See note about threshold being overpassed by more than - // one thread in {FetchAndExecuteCompilationUnit} - accept_work_.NotifyOne(); } std::unique_ptr @@ -73,12 +56,6 @@ ModuleCompiler::CodeGenerationSchedule::GetNext() { auto ret = std::move(schedule_[index]); std::swap(schedule_[schedule_.size() - 1], schedule_[index]); schedule_.pop_back(); - if (throttle_ && - allocated_memory_.Decrement(ret->memory_cost()) <= max_memory_) { - // There may be threads waiting. Notify one. In turn, it will notify another - // one. This avoids all waiting the threads contending for the mutex. - accept_work_.NotifyOne(); - } return ret; } @@ -96,12 +73,7 @@ ModuleCompiler::ModuleCompiler(Isolate* isolate, module_(std::move(module)), counters_shared_(isolate->counters_shared()), is_sync_(is_sync), - executed_units_( - isolate->random_number_generator(), - (isolate->heap()->memory_allocator()->code_range()->valid() - ? isolate->heap()->memory_allocator()->code_range()->size() - : isolate->heap()->code_space()->Capacity()) / - 2) { + executed_units_(isolate->random_number_generator()) { counters_ = counters_shared_.get(); } @@ -115,32 +87,10 @@ void ModuleCompiler::CompilationTask::RunInternal() { compiler_->module_->pending_tasks.get()->Signal(); } -void ModuleCompiler::CompileOnMainThread() { - DisallowHeapAllocation no_allocation; - DisallowHandleAllocation no_handles; - DisallowHandleDereference no_deref; - DisallowCodeDependencyChange no_dependency_change; - - // - 1 because AtomicIncrement returns the value after the atomic increment. - size_t index = next_unit_.Increment(1) - 1; - if (index >= compilation_units_.size()) { - return; - } - - std::unique_ptr unit = - std::move(compilation_units_.at(index)); - unit->ExecuteCompilation(); - - // Schedule the result, bypassing the threshold. - { - base::LockGuard guard(&result_mutex_); - executed_units_.Schedule(std::move(unit)); - } -} - -// Run by each compilation task The no_finisher_callback is called -// within the result_mutex_ lock when no finishing task is running, -// i.e. when the finisher_is_running_ flag is not set. +// Run by each compilation task and by the main thread. The +// no_finisher_callback is called within the result_mutex_ lock when no +// finishing task is running, i.e. when the finisher_is_running_ flag is not +// set. bool ModuleCompiler::FetchAndExecuteCompilationUnit( std::function no_finisher_callback) { DisallowHeapAllocation no_allocation; @@ -149,28 +99,11 @@ bool ModuleCompiler::FetchAndExecuteCompilationUnit( DisallowCodeDependencyChange no_dependency_change; // - 1 because AtomicIncrement returns the value after the atomic increment. - // Bail out fast if there's no work to do. size_t index = next_unit_.Increment(1) - 1; if (index >= compilation_units_.size()) { return false; } - // TODO(wasm): this is a no-op for async. Hopefully we can - // avoid it if we unify the parallel and async pipelines. - - // The thread observes if the memory threshold wasn't passed. - // It is possible that a few threads make this observation concurrently, - // then only one ends up being sufficient to go over the threshold. - // We don't know ahead of time how much memory will be allocated at compile - // time, and we don't want to span a critical section until we find out - - // since the whole point is for compilation to happen in parallel. So, we - // tolerate some elasticity in the system: it's OK if the threshold is passed - // because a number of units are scheduled like that - the extra amount - // is bound by the number of threads. - // We can lower the threshold if the current margin heuristics proves too - // high. - executed_units_.WaitUntilCanWork(); - std::unique_ptr unit = std::move(compilation_units_.at(index)); unit->ExecuteCompilation(); @@ -207,8 +140,6 @@ uint32_t* ModuleCompiler::StartCompilationTasks() { num_background_tasks_ = Min(static_cast(FLAG_wasm_num_compilation_tasks), V8::GetCurrentPlatform()->NumberOfAvailableBackgroundThreads()); - executed_units_.EnableThrottling(); - uint32_t* task_ids = new uint32_t[num_background_tasks_]; for (size_t i = 0; i < num_background_tasks_; ++i) { CompilationTask* task = new CompilationTask(this); @@ -230,19 +161,16 @@ void ModuleCompiler::WaitForCompilationTasks(uint32_t* task_ids) { } } -size_t ModuleCompiler::FinishCompilationUnits( - std::vector>& results, ErrorThrower* thrower) { - size_t finished = 0; +void ModuleCompiler::FinishCompilationUnits(std::vector>& results, + ErrorThrower* thrower) { SetFinisherIsRunning(true); while (true) { int func_index = -1; Handle result = FinishCompilationUnit(thrower, &func_index); if (func_index < 0) break; results[func_index] = result; - ++finished; } SetFinisherIsRunning(false); - return finished; } void ModuleCompiler::SetFinisherIsRunning(bool value) { @@ -293,30 +221,30 @@ void ModuleCompiler::CompileInParallel(ModuleBytesEnv* module_env, // and stores them in the vector {compilation_units}. InitializeParallelCompilation(module->functions, *module_env); + // Objects for the synchronization with the background threads. + base::AtomicNumber next_unit( + static_cast(FLAG_skip_compiling_wasm_funcs)); + // 2) The main thread spawns {CompilationTask} instances which run on // the background threads. std::unique_ptr task_ids(StartCompilationTasks()); - size_t finished_functions = 0; - while (finished_functions < compilation_units_.size()) { - // 3.a) The background threads and the main thread pick one compilation - // unit at a time and execute the parallel phase of the compilation - // unit. After finishing the execution of the parallel phase, the - // result is enqueued in {executed_units}. - // The foreground task bypasses waiting on memory threshold, because - // its results will immediately be converted to code (below). - CompileOnMainThread(); - + // 3.a) The background threads and the main thread pick one compilation + // unit at a time and execute the parallel phase of the compilation + // unit. After finishing the execution of the parallel phase, the + // result is enqueued in {executed_units}. + while (FetchAndExecuteCompilationUnit()) { // 3.b) If {executed_units} contains a compilation unit, the main thread // dequeues it and finishes the compilation unit. Compilation units // are finished concurrently to the background threads to save // memory. - finished_functions += FinishCompilationUnits(results, thrower); + FinishCompilationUnits(results, thrower); } // 4) After the parallel phase of all compilation units has started, the - // main thread waits for all {CompilationTask} instances to finish - which - // happens once they all realize there's no next work item to process. + // main thread waits for all {CompilationTask} instances to finish. WaitForCompilationTasks(task_ids.get()); + // Finish the compilation of the remaining compilation units. + FinishCompilationUnits(results, thrower); } void ModuleCompiler::CompileSequentially(ModuleBytesEnv* module_env, diff --git a/src/wasm/module-compiler.h b/src/wasm/module-compiler.h index 79a1618d02..de4948f30e 100644 --- a/src/wasm/module-compiler.h +++ b/src/wasm/module-compiler.h @@ -42,8 +42,7 @@ class ModuleCompiler { class CodeGenerationSchedule { public: explicit CodeGenerationSchedule( - base::RandomNumberGenerator* random_number_generator, - size_t max_memory = 0); + base::RandomNumberGenerator* random_number_generator); void Schedule(std::unique_ptr&& item); @@ -51,20 +50,11 @@ class ModuleCompiler { std::unique_ptr GetNext(); - void WaitUntilCanWork(); - - void EnableThrottling() { throttle_ = true; } - private: size_t GetRandomIndexInSchedule(); base::RandomNumberGenerator* random_number_generator_ = nullptr; std::vector> schedule_; - base::Mutex accept_work_mutex_; - base::ConditionVariable accept_work_; - const size_t max_memory_; - bool throttle_ = false; - base::AtomicNumber allocated_memory_{0}; }; Isolate* isolate_; @@ -88,8 +78,6 @@ class ModuleCompiler { bool FetchAndExecuteCompilationUnit( std::function no_finisher_callback = [] {}); - void CompileOnMainThread(); - size_t InitializeParallelCompilation( const std::vector& functions, ModuleBytesEnv& module_env); @@ -97,8 +85,8 @@ class ModuleCompiler { void WaitForCompilationTasks(uint32_t* task_ids); - size_t FinishCompilationUnits(std::vector>& results, - ErrorThrower* thrower); + void FinishCompilationUnits(std::vector>& results, + ErrorThrower* thrower); void SetFinisherIsRunning(bool value);