SkOnce: 2 bytes -> 1 byte

This uses the same logic we worked out for SkOncePtr to reduce the memory footprint of SkOnce from a done byte and lock byte to a single 3-state byte: - NotStarted: no thread has tried to run fn() yet - Active: a thread is running fn() - Done: fn() is complete Threads which see Done return immediately. Threads which see NotStarted try to move to Active, run fn(), then move to Done. Threads which see Active spin until the active thread moves to Done. This additionally fixes a too-weak memory order bug in SkOncePtr, and adds a big note to explain. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1904483003 Committed: https://skia.googlesource.com/skia/+/df02d338be8e3c1c50b48a3a9faa582703a39c07 Review URL: https://codereview.chromium.org/1904483003
2016-04-20 13:49:15 -07:00 · 2016-04-20 13:49:15 -07:00 · 650f9e9a26
commit 650f9e9a26
parent 9134686fd9
2 changed files with 43 additions and 12 deletions
--- a/include/private/SkOnce.h
+++ b/include/private/SkOnce.h
@ -8,9 +8,9 @@
 #ifndef SkOnce_DEFINED
 #define SkOnce_DEFINED

-#include "../private/SkSpinlock.h"
 #include <atomic>
 #include <utility>
+#include "SkTypes.h"

 // SkOnce provides call-once guarantees for Skia, much like std::once_flag/std::call_once().
 //
@ -21,20 +21,50 @@ class SkOnce {
 public:
    template <typename Fn, typename... Args>
    void operator()(Fn&& fn, Args&&... args) {
-        // Vanilla double-checked locking.
-        if (!fDone.load(std::memory_order_acquire)) {
-            fLock.acquire();
-            if (!fDone.load(std::memory_order_relaxed)) {
-                fn(std::forward<Args>(args)...);
-                fDone.store(true, std::memory_order_release);
-            }
-            fLock.release();
+        auto state = fState.load(std::memory_order_acquire);
+
+        if (state == Done) {
+            return;
        }
+
+        if (state == NotStarted) {
+            // Try to claim the job of calling fn() by swapping from NotStarted to Calling.
+            // See [1] below for why we use std::memory_order_acquire instead of relaxed.
+            if (fState.compare_exchange_strong(state, Calling, std::memory_order_acquire)) {
+                // Claimed!  Call fn(), then mark this SkOnce as Done.
+                fn(std::forward<Args>(args)...);
+                return fState.store(Done, std::memory_order_release);
+            }
+        }
+
+        while (state == Calling) {
+            // Some other thread is calling fn().  Wait for them to finish.
+            state = fState.load(std::memory_order_acquire);
+        }
+        SkASSERT(state == Done);
    }

 private:
-    std::atomic<bool> fDone{false};
-    SkSpinlock        fLock;
+    enum State : uint8_t { NotStarted, Calling, Done};
+    std::atomic<uint8_t> fState{NotStarted};
 };

+/* [1]  Why do we compare_exchange_strong() with std::memory_order_acquire instead of relaxed?
+ *
+ * If we succeed, we really only need a relaxed compare_exchange_strong()... we're the ones
+ * who are about to do a release store, so there's certainly nothing yet for an acquire to
+ * synchronize with.
+ *
+ * If that compare_exchange_strong() fails, we're either in Calling or Done state.
+ * Again, if we're in Calling state, relaxed would have been fine: the spin loop will
+ * acquire up to the Calling thread's release store.
+ *
+ * But if that compare_exchange_strong() fails and we find ourselves in the Done state,
+ * we've never done an acquire load to sync up to the store of that Done state.
+ *
+ * So on failure we need an acquire load.  Generally the failure memory order cannot be
+ * stronger than the success memory order, so we need acquire on success too.  The single
+ * memory order version of compare_exchange_strong() uses the same acquire order for both.
+ */
+
 #endif  // SkOnce_DEFINED
--- a/include/private/SkOncePtr.h
+++ b/include/private/SkOncePtr.h
@ -66,8 +66,9 @@ public:
            if (state == 0) {
                // It looks like no one has tried to create our pointer yet.
                // We try to claim that task by atomically swapping our state from '0' to '1'.
+                // See SkOnce.h for why we use an acquire memory order here rather than relaxed.
                if (sk_atomic_compare_exchange(
-                    &fState, &state, (uintptr_t)1, sk_memory_order_relaxed, sk_memory_order_relaxed)) {
+                    &fState, &state, (uintptr_t)1, sk_memory_order_acquire, sk_memory_order_acquire)) {
                    // We've claimed it.  Create our pointer and store it into fState.
                    state = (uintptr_t)f();
                    SkASSERT(state > 1);