SkExecutor

Refactoring to refamiliarize myself with SkTaskGroup and SkThreadPool.

This adds an SkExecutor interface to describe how we use SkThreadPool,
with a global setter and getter for a default instance.  Then I rewrote
SkTaskGroup to work with any executor, the global default by default.

I also think I've made the SkTaskGroup::wait() borrow logic clearer
with the addition of SkSemaphore::try_wait().  This lets me keep the
semaphore count and actual count of work in perfect sync.

Change-Id: I6bbdfaeb0e2c3a43daff6192d34bc4a3f7210178
Reviewed-on: https://skia-review.googlesource.com/8836
Reviewed-by: Mike Reed <reed@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Mike Klein 2017-02-21 22:53:16 -05:00 committed by Skia Commit-Bot
parent d44dd4c356
commit 384b90af5c
7 changed files with 210 additions and 203 deletions

View File

@ -127,6 +127,7 @@ skia_core_sources = [
"$_src/core/SkEdgeClipper.h",
"$_src/core/SkEmptyShader.h",
"$_src/core/SkEndian.h",
"$_src/core/SkExecutor.cpp",
"$_src/core/SkAnalyticEdge.cpp",
"$_src/core/SkFDot6Constants.h",
"$_src/core/SkEdge.cpp",

32
include/core/SkExecutor.h Normal file
View File

@ -0,0 +1,32 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkExecutor_DEFINED
#define SkExecutor_DEFINED
#include <functional>
#include <memory>
class SkExecutor {
public:
virtual ~SkExecutor();
// Create a thread pool SkExecutor with a fixed thread count, by default the number of cores.
static std::unique_ptr<SkExecutor> MakeThreadPool(int threads = 0);
// There is always a default SkExecutor available by calling SkExecutor::GetDefault().
static SkExecutor& GetDefault();
static void SetDefault(SkExecutor*); // Does not take ownership. Not thread safe.
// Add work to execute.
virtual void add(std::function<void(void)>) = 0;
// If it makes sense for this executor, use this thread to execute work for a little while.
virtual void borrow() {}
};
#endif//SkExecutor_DEFINED

View File

@ -25,6 +25,9 @@ public:
// then if the counter is <= 0, sleep this thread until the counter is > 0.
void wait();
// If the counter is positive, decrement it by 1 and return true, otherwise return false.
bool try_wait();
// SkBaseSemaphore has no destructor. Call this to clean it up.
void cleanup();

124
src/core/SkExecutor.cpp Normal file
View File

@ -0,0 +1,124 @@
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkExecutor.h"
#include "SkMakeUnique.h"
#include "SkMutex.h"
#include "SkSemaphore.h"
#include "SkSpinlock.h"
#include "SkTArray.h"
#include "SkThreadUtils.h"
#if defined(_MSC_VER)
#include <windows.h>
static int num_cores() {
SYSTEM_INFO sysinfo;
GetNativeSystemInfo(&sysinfo);
return (int)sysinfo.dwNumberOfProcessors;
}
#else
#include <unistd.h>
static int num_cores() {
return (int)sysconf(_SC_NPROCESSORS_ONLN);
}
#endif
SkExecutor::~SkExecutor() {}
// The default default SkExecutor is an SkTrivialExecutor, which just runs the work right away.
class SkTrivialExecutor final : public SkExecutor {
void add(std::function<void(void)> work) override {
work();
}
};
static SkTrivialExecutor gTrivial;
static SkExecutor* gDefaultExecutor = &gTrivial;
SkExecutor& SkExecutor::GetDefault() {
return *gDefaultExecutor;
}
void SkExecutor::SetDefault(SkExecutor* executor) {
gDefaultExecutor = executor ? executor : &gTrivial;
}
// An SkThreadPool is an executor that runs work on a fixed pool of OS threads.
class SkThreadPool final : public SkExecutor {
public:
explicit SkThreadPool(int threads) {
for (int i = 0; i < threads; i++) {
fThreads.emplace_back(new SkThread(&Loop, this));
fThreads.back()->start();
}
}
~SkThreadPool() {
// Signal each thread that it's time to shut down.
for (int i = 0; i < fThreads.count(); i++) {
this->add(nullptr);
}
// Wait for each thread to shut down.
for (int i = 0; i < fThreads.count(); i++) {
fThreads[i]->join();
}
}
virtual void add(std::function<void(void)> work) override {
// Add some work to our pile of work to do.
{
SkAutoExclusive lock(fWorkLock);
fWork.emplace_back(std::move(work));
}
// Tell the Loop() threads to pick it up.
fWorkAvailable.signal(1);
}
virtual void borrow() override {
// If there is work waiting, do it.
if (fWorkAvailable.try_wait()) {
SkAssertResult(this->do_work());
}
}
private:
// This method should be called only when fWorkAvailable indicates there's work to do.
bool do_work() {
std::function<void(void)> work;
{
SkAutoExclusive lock(fWorkLock);
SkASSERT(!fWork.empty()); // TODO: if (fWork.empty()) { return true; } ?
work = std::move(fWork.back());
fWork.pop_back();
}
if (!work) {
return false; // This is Loop()'s signal to shut down.
}
work();
return true;
}
static void Loop(void* ctx) {
auto pool = (SkThreadPool*)ctx;
do {
pool->fWorkAvailable.wait();
} while (pool->do_work());
}
// Both SkMutex and SkSpinlock can work here.
using Lock = SkMutex;
SkTArray<std::unique_ptr<SkThread>> fThreads;
SkTArray<std::function<void(void)>> fWork;
Lock fWorkLock;
SkSemaphore fWorkAvailable;
};
std::unique_ptr<SkExecutor> SkExecutor::MakeThreadPool(int threads) {
return skstd::make_unique<SkThreadPool>(threads > 0 ? threads : num_cores());
}

View File

@ -71,3 +71,11 @@ void SkBaseSemaphore::osWait() {
void SkBaseSemaphore::cleanup() {
delete fOSSemaphore;
}
bool SkBaseSemaphore::try_wait() {
int count = fCount.load(std::memory_order_relaxed);
if (count > 0) {
return fCount.compare_exchange_weak(count, count-1, std::memory_order_acquire);
}
return false;
}

View File

@ -5,206 +5,43 @@
* found in the LICENSE file.
*/
#include "SkLeanWindows.h"
#include "SkOnce.h"
#include "SkSemaphore.h"
#include "SkSpinlock.h"
#include "SkTArray.h"
#include "SkTDArray.h"
#include "SkExecutor.h"
#include "SkTaskGroup.h"
#include "SkThreadUtils.h"
#if defined(SK_BUILD_FOR_WIN32)
static void query_num_cores(int* cores) {
SYSTEM_INFO sysinfo;
GetNativeSystemInfo(&sysinfo);
*cores = sysinfo.dwNumberOfProcessors;
}
#else
#include <unistd.h>
static void query_num_cores(int* cores) {
*cores = (int)sysconf(_SC_NPROCESSORS_ONLN);
}
#endif
SkTaskGroup::SkTaskGroup(SkExecutor& executor) : fPending(0), fExecutor(executor) {}
static int num_cores() {
// We cache num_cores() so we only query the OS once.
static int cores = 0;
static SkOnce once;
once(query_num_cores, &cores);
SkASSERT(cores > 0);
return cores;
void SkTaskGroup::add(std::function<void(void)> fn) {
fPending.fetch_add(+1, sk_memory_order_relaxed);
fExecutor.add([=] {
fn();
fPending.fetch_add(-1, sk_memory_order_release);
});
}
namespace {
class ThreadPool : SkNoncopyable {
public:
static void Add(std::function<void(void)> fn, SkAtomic<int32_t>* pending) {
if (!gGlobal) {
return fn();
}
gGlobal->add(fn, pending);
void SkTaskGroup::batch(int N, std::function<void(int)> fn) {
// TODO: I really thought we had some sort of more clever chunking logic.
fPending.fetch_add(+N, sk_memory_order_relaxed);
for (int i = 0; i < N; i++) {
fExecutor.add([=] {
fn(i);
fPending.fetch_add(-1, sk_memory_order_release);
});
}
}
static void Batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
if (!gGlobal) {
for (int i = 0; i < N; i++) { fn(i); }
return;
}
gGlobal->batch(N, fn, pending);
void SkTaskGroup::wait() {
// Actively help the executor do work until our task group is done.
// This lets SkTaskGroups nest arbitrarily deep on a single SkExecutor:
// no thread ever blocks waiting for others to do its work.
// (We may end up doing work that's not part of our task group. That's fine.)
while (fPending.load(sk_memory_order_acquire) > 0) {
fExecutor.borrow();
}
static void Wait(SkAtomic<int32_t>* pending) {
if (!gGlobal) { // If we have no threads, the work must already be done.
SkASSERT(pending->load(sk_memory_order_relaxed) == 0);
return;
}
// Acquire pairs with decrement release here or in Loop.
while (pending->load(sk_memory_order_acquire) > 0) {
// Lend a hand until our SkTaskGroup of interest is done.
Work work;
{
// We're stealing work opportunistically,
// so we never call fWorkAvailable.wait(), which could sleep us if there's no work.
// This means fWorkAvailable is only an upper bound on fWork.count().
AutoLock lock(&gGlobal->fWorkLock);
if (gGlobal->fWork.empty()) {
// Someone has picked up all the work (including ours). How nice of them!
// (They may still be working on it, so we can't assert *pending == 0 here.)
continue;
}
work = gGlobal->fWork.back();
gGlobal->fWork.pop_back();
}
// This Work isn't necessarily part of our SkTaskGroup of interest, but that's fine.
// We threads gotta stick together. We're always making forward progress.
work.fn();
work.pending->fetch_add(-1, sk_memory_order_release); // Pairs with load above.
}
}
private:
struct AutoLock {
AutoLock(SkSpinlock* lock) : fLock(lock) { fLock->acquire(); }
~AutoLock() { fLock->release(); }
private:
SkSpinlock* fLock;
};
struct Work {
std::function<void(void)> fn; // A function to call
SkAtomic<int32_t>* pending; // then decrement pending afterwards.
};
explicit ThreadPool(int threads) {
if (threads == -1) {
threads = num_cores();
}
for (int i = 0; i < threads; i++) {
fThreads.push(new SkThread(&ThreadPool::Loop, this));
fThreads.top()->start();
}
}
~ThreadPool() {
SkASSERT(fWork.empty()); // All SkTaskGroups should be destroyed by now.
// Send a poison pill to each thread.
SkAtomic<int> dummy(0);
for (int i = 0; i < fThreads.count(); i++) {
this->add(nullptr, &dummy);
}
// Wait for them all to swallow the pill and die.
for (int i = 0; i < fThreads.count(); i++) {
fThreads[i]->join();
}
SkASSERT(fWork.empty()); // Can't hurt to double check.
fThreads.deleteAll();
}
void add(std::function<void(void)> fn, SkAtomic<int32_t>* pending) {
Work work = { fn, pending };
pending->fetch_add(+1, sk_memory_order_relaxed); // No barrier needed.
{
AutoLock lock(&fWorkLock);
fWork.push_back(work);
}
fWorkAvailable.signal(1);
}
void batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
pending->fetch_add(+N, sk_memory_order_relaxed); // No barrier needed.
{
AutoLock lock(&fWorkLock);
for (int i = 0; i < N; i++) {
Work work = { [i, fn]() { fn(i); }, pending };
fWork.push_back(work);
}
}
fWorkAvailable.signal(N);
}
static void Loop(void* arg) {
ThreadPool* pool = (ThreadPool*)arg;
Work work;
while (true) {
// Sleep until there's work available, and claim one unit of Work as we wake.
pool->fWorkAvailable.wait();
{
AutoLock lock(&pool->fWorkLock);
if (pool->fWork.empty()) {
// Someone in Wait() stole our work (fWorkAvailable is an upper bound).
// Well, that's fine, back to sleep for us.
continue;
}
work = pool->fWork.back();
pool->fWork.pop_back();
}
if (!work.fn) {
return; // Poison pill. Time... to die.
}
work.fn();
work.pending->fetch_add(-1, sk_memory_order_release); // Pairs with load in Wait().
}
}
// fWorkLock must be held when reading or modifying fWork.
SkSpinlock fWorkLock;
SkTArray<Work> fWork;
// A thread-safe upper bound for fWork.count().
//
// We'd have it be an exact count but for the loop in Wait():
// we never want that to block, so it can't call fWorkAvailable.wait(),
// and that's the only way to decrement fWorkAvailable.
// So fWorkAvailable may overcount actual the work available.
// We make do, but this means some worker threads may wake spuriously.
SkSemaphore fWorkAvailable;
// These are only changed in a single-threaded context.
SkTDArray<SkThread*> fThreads;
static ThreadPool* gGlobal;
friend struct SkTaskGroup::Enabler;
};
ThreadPool* ThreadPool::gGlobal = nullptr;
} // namespace
}
SkTaskGroup::Enabler::Enabler(int threads) {
SkASSERT(ThreadPool::gGlobal == nullptr);
if (threads != 0) {
ThreadPool::gGlobal = new ThreadPool(threads);
if (threads) {
fThreadPool = SkExecutor::MakeThreadPool(threads);
SkExecutor::SetDefault(fThreadPool.get());
}
}
SkTaskGroup::Enabler::~Enabler() { delete ThreadPool::gGlobal; }
SkTaskGroup::SkTaskGroup() : fPending(0) {}
void SkTaskGroup::wait() { ThreadPool::Wait(&fPending); }
void SkTaskGroup::add(std::function<void(void)> fn) { ThreadPool::Add(fn, &fPending); }
void SkTaskGroup::batch(int N, std::function<void(int)> fn) {
ThreadPool::Batch(N, fn, &fPending);
}

View File

@ -8,24 +8,18 @@
#ifndef SkTaskGroup_DEFINED
#define SkTaskGroup_DEFINED
#include <functional>
#include "SkTypes.h"
#include "SkAtomics.h"
#include "SkTemplates.h"
#include "SkExecutor.h"
#include "SkTypes.h"
#include <functional>
class SkTaskGroup : SkNoncopyable {
public:
// Create one of these in main() to enable SkTaskGroups globally.
struct Enabler : SkNoncopyable {
explicit Enabler(int threads = -1); // Default is system-reported core count.
~Enabler();
};
SkTaskGroup();
// Tasks added to this SkTaskGroup will run on its executor.
explicit SkTaskGroup(SkExecutor& executor = SkExecutor::GetDefault());
~SkTaskGroup() { this->wait(); }
// Add a task to this SkTaskGroup. It will likely run on another thread.
// Add a task to this SkTaskGroup.
void add(std::function<void(void)> fn);
// Add a batch of N tasks, all calling fn with different arguments.
@ -35,8 +29,16 @@ public:
// You may safely reuse this SkTaskGroup after wait() returns.
void wait();
// A convenience for testing tools.
// Creates and owns a thread pool, and passes it to SkExecutor::SetDefault().
struct Enabler {
explicit Enabler(int threads = 1); // -1 -> num_cores, 0 -> noop
std::unique_ptr<SkExecutor> fThreadPool;
};
private:
SkAtomic<int32_t> fPending;
SkExecutor& fExecutor;
};
#endif//SkTaskGroup_DEFINED