SkExecutor

Refactoring to refamiliarize myself with SkTaskGroup and SkThreadPool. This adds an SkExecutor interface to describe how we use SkThreadPool, with a global setter and getter for a default instance. Then I rewrote SkTaskGroup to work with any executor, the global default by default. I also think I've made the SkTaskGroup::wait() borrow logic clearer with the addition of SkSemaphore::try_wait(). This lets me keep the semaphore count and actual count of work in perfect sync. Change-Id: I6bbdfaeb0e2c3a43daff6192d34bc4a3f7210178 Reviewed-on: https://skia-review.googlesource.com/8836 Reviewed-by: Mike Reed <reed@google.com> Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
2017-02-21 22:53:16 -05:00 · 2017-02-21 22:53:16 -05:00 · 384b90af5c
commit 384b90af5c
parent d44dd4c356
7 changed files with 210 additions and 203 deletions
--- a/gn/core.gni
+++ b/gn/core.gni
@ -127,6 +127,7 @@ skia_core_sources = [
  "$_src/core/SkEdgeClipper.h",
  "$_src/core/SkEmptyShader.h",
  "$_src/core/SkEndian.h",
+  "$_src/core/SkExecutor.cpp",
  "$_src/core/SkAnalyticEdge.cpp",
  "$_src/core/SkFDot6Constants.h",
  "$_src/core/SkEdge.cpp",
--- a/include/core/SkExecutor.h
+++ b/include/core/SkExecutor.h
@ -0,0 +1,32 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkExecutor_DEFINED
+#define SkExecutor_DEFINED
+
+#include <functional>
+#include <memory>
+
+class SkExecutor {
+public:
+    virtual ~SkExecutor();
+
+    // Create a thread pool SkExecutor with a fixed thread count, by default the number of cores.
+    static std::unique_ptr<SkExecutor> MakeThreadPool(int threads = 0);
+
+    // There is always a default SkExecutor available by calling SkExecutor::GetDefault().
+    static SkExecutor& GetDefault();
+    static void SetDefault(SkExecutor*);  // Does not take ownership.  Not thread safe.
+
+    // Add work to execute.
+    virtual void add(std::function<void(void)>) = 0;
+
+    // If it makes sense for this executor, use this thread to execute work for a little while.
+    virtual void borrow() {}
+};
+
+#endif//SkExecutor_DEFINED
--- a/include/private/SkSemaphore.h
+++ b/include/private/SkSemaphore.h
@ -25,6 +25,9 @@ public:
    // then if the counter is <= 0, sleep this thread until the counter is > 0.
    void wait();

+    // If the counter is positive, decrement it by 1 and return true, otherwise return false.
+    bool try_wait();
+
    // SkBaseSemaphore has no destructor.  Call this to clean it up.
    void cleanup();

--- a/src/core/SkExecutor.cpp
+++ b/src/core/SkExecutor.cpp
@ -0,0 +1,124 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkExecutor.h"
+#include "SkMakeUnique.h"
+#include "SkMutex.h"
+#include "SkSemaphore.h"
+#include "SkSpinlock.h"
+#include "SkTArray.h"
+#include "SkThreadUtils.h"
+
+#if defined(_MSC_VER)
+    #include <windows.h>
+    static int num_cores() {
+        SYSTEM_INFO sysinfo;
+        GetNativeSystemInfo(&sysinfo);
+        return (int)sysinfo.dwNumberOfProcessors;
+    }
+#else
+    #include <unistd.h>
+    static int num_cores() {
+        return (int)sysconf(_SC_NPROCESSORS_ONLN);
+    }
+#endif
+
+SkExecutor::~SkExecutor() {}
+
+// The default default SkExecutor is an SkTrivialExecutor, which just runs the work right away.
+class SkTrivialExecutor final : public SkExecutor {
+    void add(std::function<void(void)> work) override {
+        work();
+    }
+};
+
+static SkTrivialExecutor gTrivial;
+static SkExecutor* gDefaultExecutor = &gTrivial;
+
+SkExecutor& SkExecutor::GetDefault() {
+    return *gDefaultExecutor;
+}
+void SkExecutor::SetDefault(SkExecutor* executor) {
+    gDefaultExecutor = executor ? executor : &gTrivial;
+}
+
+// An SkThreadPool is an executor that runs work on a fixed pool of OS threads.
+class SkThreadPool final : public SkExecutor {
+public:
+    explicit SkThreadPool(int threads) {
+        for (int i = 0; i < threads; i++) {
+            fThreads.emplace_back(new SkThread(&Loop, this));
+            fThreads.back()->start();
+        }
+    }
+
+    ~SkThreadPool() {
+        // Signal each thread that it's time to shut down.
+        for (int i = 0; i < fThreads.count(); i++) {
+            this->add(nullptr);
+        }
+        // Wait for each thread to shut down.
+        for (int i = 0; i < fThreads.count(); i++) {
+            fThreads[i]->join();
+        }
+    }
+
+    virtual void add(std::function<void(void)> work) override {
+        // Add some work to our pile of work to do.
+        {
+            SkAutoExclusive lock(fWorkLock);
+            fWork.emplace_back(std::move(work));
+        }
+        // Tell the Loop() threads to pick it up.
+        fWorkAvailable.signal(1);
+    }
+
+    virtual void borrow() override {
+        // If there is work waiting, do it.
+        if (fWorkAvailable.try_wait()) {
+            SkAssertResult(this->do_work());
+        }
+    }
+
+private:
+    // This method should be called only when fWorkAvailable indicates there's work to do.
+    bool do_work() {
+        std::function<void(void)> work;
+        {
+            SkAutoExclusive lock(fWorkLock);
+            SkASSERT(!fWork.empty());        // TODO: if (fWork.empty()) { return true; } ?
+            work = std::move(fWork.back());
+            fWork.pop_back();
+        }
+
+        if (!work) {
+            return false;  // This is Loop()'s signal to shut down.
+        }
+
+        work();
+        return true;
+    }
+
+    static void Loop(void* ctx) {
+        auto pool = (SkThreadPool*)ctx;
+        do {
+            pool->fWorkAvailable.wait();
+        } while (pool->do_work());
+    }
+
+    // Both SkMutex and SkSpinlock can work here.
+    using Lock = SkMutex;
+
+    SkTArray<std::unique_ptr<SkThread>> fThreads;
+    SkTArray<std::function<void(void)>> fWork;
+    Lock                                fWorkLock;
+    SkSemaphore                         fWorkAvailable;
+};
+
+std::unique_ptr<SkExecutor> SkExecutor::MakeThreadPool(int threads) {
+    return skstd::make_unique<SkThreadPool>(threads > 0 ? threads : num_cores());
+}
--- a/src/core/SkSemaphore.cpp
+++ b/src/core/SkSemaphore.cpp
@ -71,3 +71,11 @@ void SkBaseSemaphore::osWait() {
 void SkBaseSemaphore::cleanup() {
    delete fOSSemaphore;
 }
+
+bool SkBaseSemaphore::try_wait() {
+    int count = fCount.load(std::memory_order_relaxed);
+    if (count > 0) {
+        return fCount.compare_exchange_weak(count, count-1, std::memory_order_acquire);
+    }
+    return false;
+}
--- a/src/core/SkTaskGroup.cpp
+++ b/src/core/SkTaskGroup.cpp
@ -5,206 +5,43 @@
 * found in the LICENSE file.
 */

-#include "SkLeanWindows.h"
-#include "SkOnce.h"
-#include "SkSemaphore.h"
-#include "SkSpinlock.h"
-#include "SkTArray.h"
-#include "SkTDArray.h"
+#include "SkExecutor.h"
 #include "SkTaskGroup.h"
-#include "SkThreadUtils.h"

-#if defined(SK_BUILD_FOR_WIN32)
-    static void query_num_cores(int* cores) {
-        SYSTEM_INFO sysinfo;
-        GetNativeSystemInfo(&sysinfo);
-        *cores = sysinfo.dwNumberOfProcessors;
-    }
-#else
-    #include <unistd.h>
-    static void query_num_cores(int* cores) {
-        *cores = (int)sysconf(_SC_NPROCESSORS_ONLN);
-    }
-#endif
+SkTaskGroup::SkTaskGroup(SkExecutor& executor) : fPending(0), fExecutor(executor) {}

-static int num_cores() {
-    // We cache num_cores() so we only query the OS once.
-    static int cores = 0;
-    static SkOnce once;
-    once(query_num_cores, &cores);
-    SkASSERT(cores > 0);
-    return cores;
+void SkTaskGroup::add(std::function<void(void)> fn) {
+    fPending.fetch_add(+1, sk_memory_order_relaxed);
+    fExecutor.add([=] {
+        fn();
+        fPending.fetch_add(-1, sk_memory_order_release);
+    });
 }

-namespace {
-
-class ThreadPool : SkNoncopyable {
-public:
-    static void Add(std::function<void(void)> fn, SkAtomic<int32_t>* pending) {
-        if (!gGlobal) {
-            return fn();
-        }
-        gGlobal->add(fn, pending);
+void SkTaskGroup::batch(int N, std::function<void(int)> fn) {
+    // TODO: I really thought we had some sort of more clever chunking logic.
+    fPending.fetch_add(+N, sk_memory_order_relaxed);
+    for (int i = 0; i < N; i++) {
+        fExecutor.add([=] {
+            fn(i);
+            fPending.fetch_add(-1, sk_memory_order_release);
+        });
    }
+}

-    static void Batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
-        if (!gGlobal) {
-            for (int i = 0; i < N; i++) { fn(i); }
-            return;
-        }
-        gGlobal->batch(N, fn, pending);
+void SkTaskGroup::wait() {
+    // Actively help the executor do work until our task group is done.
+    // This lets SkTaskGroups nest arbitrarily deep on a single SkExecutor:
+    // no thread ever blocks waiting for others to do its work.
+    // (We may end up doing work that's not part of our task group.  That's fine.)
+    while (fPending.load(sk_memory_order_acquire) > 0) {
+        fExecutor.borrow();
    }
-
-    static void Wait(SkAtomic<int32_t>* pending) {
-        if (!gGlobal) {  // If we have no threads, the work must already be done.
-            SkASSERT(pending->load(sk_memory_order_relaxed) == 0);
-            return;
-        }
-        // Acquire pairs with decrement release here or in Loop.
-        while (pending->load(sk_memory_order_acquire) > 0) {
-            // Lend a hand until our SkTaskGroup of interest is done.
-            Work work;
-            {
-                // We're stealing work opportunistically,
-                // so we never call fWorkAvailable.wait(), which could sleep us if there's no work.
-                // This means fWorkAvailable is only an upper bound on fWork.count().
-                AutoLock lock(&gGlobal->fWorkLock);
-                if (gGlobal->fWork.empty()) {
-                    // Someone has picked up all the work (including ours).  How nice of them!
-                    // (They may still be working on it, so we can't assert *pending == 0 here.)
-                    continue;
-                }
-                work = gGlobal->fWork.back();
-                gGlobal->fWork.pop_back();
-            }
-            // This Work isn't necessarily part of our SkTaskGroup of interest, but that's fine.
-            // We threads gotta stick together.  We're always making forward progress.
-            work.fn();
-            work.pending->fetch_add(-1, sk_memory_order_release);  // Pairs with load above.
-        }
-    }
-
-private:
-    struct AutoLock {
-        AutoLock(SkSpinlock* lock) : fLock(lock) { fLock->acquire(); }
-        ~AutoLock() { fLock->release(); }
-    private:
-        SkSpinlock* fLock;
-    };
-
-    struct Work {
-        std::function<void(void)> fn; // A function to call
-        SkAtomic<int32_t>* pending;   // then decrement pending afterwards.
-    };
-
-    explicit ThreadPool(int threads) {
-        if (threads == -1) {
-            threads = num_cores();
-        }
-        for (int i = 0; i < threads; i++) {
-            fThreads.push(new SkThread(&ThreadPool::Loop, this));
-            fThreads.top()->start();
-        }
-    }
-
-    ~ThreadPool() {
-        SkASSERT(fWork.empty());  // All SkTaskGroups should be destroyed by now.
-
-        // Send a poison pill to each thread.
-        SkAtomic<int> dummy(0);
-        for (int i = 0; i < fThreads.count(); i++) {
-            this->add(nullptr, &dummy);
-        }
-        // Wait for them all to swallow the pill and die.
-        for (int i = 0; i < fThreads.count(); i++) {
-            fThreads[i]->join();
-        }
-        SkASSERT(fWork.empty());  // Can't hurt to double check.
-        fThreads.deleteAll();
-    }
-
-    void add(std::function<void(void)> fn, SkAtomic<int32_t>* pending) {
-        Work work = { fn, pending };
-        pending->fetch_add(+1, sk_memory_order_relaxed);  // No barrier needed.
-        {
-            AutoLock lock(&fWorkLock);
-            fWork.push_back(work);
-        }
-        fWorkAvailable.signal(1);
-    }
-
-    void batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
-        pending->fetch_add(+N, sk_memory_order_relaxed);  // No barrier needed.
-        {
-            AutoLock lock(&fWorkLock);
-            for (int i = 0; i < N; i++) {
-                Work work = { [i, fn]() { fn(i); }, pending };
-                fWork.push_back(work);
-            }
-        }
-        fWorkAvailable.signal(N);
-    }
-
-    static void Loop(void* arg) {
-        ThreadPool* pool = (ThreadPool*)arg;
-        Work work;
-        while (true) {
-            // Sleep until there's work available, and claim one unit of Work as we wake.
-            pool->fWorkAvailable.wait();
-            {
-                AutoLock lock(&pool->fWorkLock);
-                if (pool->fWork.empty()) {
-                    // Someone in Wait() stole our work (fWorkAvailable is an upper bound).
-                    // Well, that's fine, back to sleep for us.
-                    continue;
-                }
-                work = pool->fWork.back();
-                pool->fWork.pop_back();
-            }
-            if (!work.fn) {
-                return;  // Poison pill.  Time... to die.
-            }
-            work.fn();
-            work.pending->fetch_add(-1, sk_memory_order_release);  // Pairs with load in Wait().
-        }
-    }
-
-    // fWorkLock must be held when reading or modifying fWork.
-    SkSpinlock      fWorkLock;
-    SkTArray<Work>  fWork;
-
-    // A thread-safe upper bound for fWork.count().
-    //
-    // We'd have it be an exact count but for the loop in Wait():
-    // we never want that to block, so it can't call fWorkAvailable.wait(),
-    // and that's the only way to decrement fWorkAvailable.
-    // So fWorkAvailable may overcount actual the work available.
-    // We make do, but this means some worker threads may wake spuriously.
-    SkSemaphore fWorkAvailable;
-
-    // These are only changed in a single-threaded context.
-    SkTDArray<SkThread*> fThreads;
-    static ThreadPool* gGlobal;
-
-    friend struct SkTaskGroup::Enabler;
-};
-ThreadPool* ThreadPool::gGlobal = nullptr;
-
-}  // namespace
+}

 SkTaskGroup::Enabler::Enabler(int threads) {
-    SkASSERT(ThreadPool::gGlobal == nullptr);
-    if (threads != 0) {
-        ThreadPool::gGlobal = new ThreadPool(threads);
+    if (threads) {
+        fThreadPool = SkExecutor::MakeThreadPool(threads);
+        SkExecutor::SetDefault(fThreadPool.get());
    }
 }
-
-SkTaskGroup::Enabler::~Enabler() { delete ThreadPool::gGlobal; }
-
-SkTaskGroup::SkTaskGroup() : fPending(0) {}
-
-void SkTaskGroup::wait()                            { ThreadPool::Wait(&fPending); }
-void SkTaskGroup::add(std::function<void(void)> fn) { ThreadPool::Add(fn, &fPending); }
-void SkTaskGroup::batch(int N, std::function<void(int)> fn) {
-    ThreadPool::Batch(N, fn, &fPending);
-}
--- a/src/core/SkTaskGroup.h
+++ b/src/core/SkTaskGroup.h
@ -8,24 +8,18 @@
 #ifndef SkTaskGroup_DEFINED
 #define SkTaskGroup_DEFINED

-#include <functional>
-
-#include "SkTypes.h"
 #include "SkAtomics.h"
-#include "SkTemplates.h"
+#include "SkExecutor.h"
+#include "SkTypes.h"
+#include <functional>

 class SkTaskGroup : SkNoncopyable {
 public:
-    // Create one of these in main() to enable SkTaskGroups globally.
-    struct Enabler : SkNoncopyable {
-        explicit Enabler(int threads = -1);  // Default is system-reported core count.
-        ~Enabler();
-    };
-
-    SkTaskGroup();
+    // Tasks added to this SkTaskGroup will run on its executor.
+    explicit SkTaskGroup(SkExecutor& executor = SkExecutor::GetDefault());
    ~SkTaskGroup() { this->wait(); }

-    // Add a task to this SkTaskGroup.  It will likely run on another thread.
+    // Add a task to this SkTaskGroup.
    void add(std::function<void(void)> fn);

    // Add a batch of N tasks, all calling fn with different arguments.
@ -35,8 +29,16 @@ public:
    // You may safely reuse this SkTaskGroup after wait() returns.
    void wait();

+    // A convenience for testing tools.
+    // Creates and owns a thread pool, and passes it to SkExecutor::SetDefault().
+    struct Enabler {
+        explicit Enabler(int threads = 1);  // -1 -> num_cores, 0 -> noop
+        std::unique_ptr<SkExecutor> fThreadPool;
+    };
+
 private:
    SkAtomic<int32_t> fPending;
+    SkExecutor&       fExecutor;
 };

 #endif//SkTaskGroup_DEFINED